python內置的最基本的HTTP請求庫,有以下四個模塊:
urllib.request 請求模塊
urllib.error 異常處理模塊
urllib.parse url解析模塊
urllib.robotparser? robots.txt解析模塊
?
urllib.request請求模塊:
urllib.request.urlopen(url,data=None,[timeout,]*,cafile=None,capath=None,cadefault=False,context=None)
'''urlopen()函數'''
import urllib.request
response = urllib.request.urlopen("http://www.baidu.com")
print(response.read().decode("utf-8")) #response.read()是bytes類型的數據,要轉碼。
import urllib.parse
data = bytes(urllib.parse.urlencode({'word':'hello'}),encoding='utf-8')
#該提交方式是post,data參數是bytes類型的鍵值對對象
response = urllib.request.urlopen("http://httpbin.org/post",data=data) #專門提供做http測試的網站
print(response.read())
#timeout是超時響應參數
response = urllib.request.urlopen("http://httpbin.org/get",timeout=1)
print(response.read())
import socket
import urllib.error
try:
urllib.request.urlopen("http://httpbin.org/get", timeout=0.1)
except urllib.error.URLError as e:
if isinstance(e.reason,socket.timeout):
print('TIME OUT')
#響應類型
print(type(response))
#響應頭、狀態碼
response = urllib.request.urlopen("https://www.python.org")
print(response.status) #得到響應的狀態碼
print(response.getheaders()) #得到響應的Response Headers
print(response.getheader("Server")) #根據鍵得到Response Headers中指定鍵的值
'''Request()函數:當urlopen()要傳遞headers等信息時候,就要用到Request()函數,
返回一個request對象作為urlopen()函數的一個參數。'''
import urllib.parse
url = "http://httpbin.org/post"
headers = {
# 'User-Agent':'Mozilla/4.0(compatible;MSIE 5.5;Windows NT)',
'Host':'httpbin.org'
}
dict = {
'name':'Germey'
}
data = bytes(urllib.parse.urlencode(dict),encoding='utf-8')
req = urllib.request.Request(url=url,data=data,headers=headers,method='POST')
req.add_header('User-Agent','Mozilla/4.0(compatible;MSIE 5.5;Windows NT)') #可以單獨添加header
response = urllib.request.urlopen(req)
print(response.read().decode('utf-8'))
'''cookie'''
import http.cookiejar,urllib.request
cookie = http.cookiejar.MozillaCookieJar()
handler = urllib.request.HTTPCookieProcessor(cookie)
opener = urllib.request.build_opener(handler)
response = opener.open("http://www.baidu.com")
for item in cookie:
print(item.name + "=: " + item.value)
#存儲cookie
filename = "cookieLWP.txt"
cookie = http.cookiejar.LWPCookieJar(filename)
handler = urllib.request.HTTPCookieProcessor(cookie)
opener = urllib.request.build_opener(handler)
response = opener.open("http://www.baidu.com")
cookie.save(ignore_discard=True,ignore_expires=True)
#讀取cookie
cookie = http.cookiejar.LWPCookieJar() #怎么存就怎么取
cookie.load('cookieLWP.txt',ignore_discard=True,ignore_expires=True)
handler = urllib.request.HTTPCookieProcessor(cookie)
opener = urllib.request.build_opener(handler)
response = opener.open("http://www.baidu.com")
print(response.read().decode('utf-8'))
urllib.error異常處理模塊:
'''異常處理'''
from urllib import error
try:
response = urllib.request.urlopen("https://www.cnblogs.com/wisir/index.html")
except error.HTTPError as e:
print(e.reason,e.code,e.headers,sep='\n')
except error.URLError as e:
print(e.reason)
else:
print("Request Successfully")
try:
response = urllib.request.urlopen("https://www.baidu.com",timeout=0.01)
except urllib.error.URLError as e:
print(e.reason)
if isinstance(e.reason,socket.timeout):
print('TIME OUT')
urllib.parse URL解析模塊:
'''urlparse'''
# urllib.parse.urlparse(urlstring,scheme="",allow_fragments=True)
from urllib.parse import urlparse
result = urlparse("http://www.baidu.com/index.html;user?id=5#comment")
print(type(result),result)
'''urlunparse:作用與urlparse相反,是將ParseResult類型的六個參數,合成一個完整的url。'''
from urllib.parse import urlunparse
data = ['http','www.baidu.com','index.html','user','a=6','comment']
print(urlunparse(data))
'''urljoin:以第二個參數為基準,若第二個參數沒有ParseResult類型六個參數中的某一個,則用第一個參數作為補充。'''
from urllib.parse import urljoin
print(urljoin("http://www.baidu.com","FAQ.html"))
print(urljoin("http://www.baidu.com","https://www.cnblogs.com/wisir/"))
'''urlencode:字典對象轉換為get請求參數'''
from urllib.parse import urlencode
params = {
'name':'germey',
'age':22
}
base_url = "http://www.baidu.com?"
url = base_url + urlencode(params)
print(url)
python3 urllib庫官方文檔:https://docs.python.org/3/library/urllib.html