urllib庫

python內置的最基本的HTTP請求庫，有以下四個模塊：

urllib.request　　請求模塊

urllib.error　　　異常處理模塊

urllib.parse　　　url解析模塊

urllib.robotparser? robots.txt解析模塊

urllib.request請求模塊：

urllib.request.urlopen(url,data=None,[timeout,]*,cafile=None,capath=None,cadefault=False,context=None)

'''urlopen()函數'''

import urllib.request

response = urllib.request.urlopen("http://www.baidu.com")

print(response.read().decode("utf-8"))    #response.read()是bytes類型的數據，要轉碼。

import urllib.parse
data = bytes(urllib.parse.urlencode({'word':'hello'}),encoding='utf-8')
#該提交方式是post，data參數是bytes類型的鍵值對對象
response = urllib.request.urlopen("http://httpbin.org/post",data=data)  #專門提供做http測試的網站
print(response.read())

#timeout是超時響應參數

response = urllib.request.urlopen("http://httpbin.org/get",timeout=1)

print(response.read())

import socket
import urllib.error
try:
    urllib.request.urlopen("http://httpbin.org/get", timeout=0.1)
except urllib.error.URLError as e:
    if isinstance(e.reason,socket.timeout):
        print('TIME OUT')

#響應類型
print(type(response))

#響應頭、狀態碼
response = urllib.request.urlopen("https://www.python.org")
print(response.status)  #得到響應的狀態碼
print(response.getheaders())    #得到響應的Response Headers
print(response.getheader("Server")) #根據鍵得到Response Headers中指定鍵的值

'''Request()函數：當urlopen()要傳遞headers等信息時候，就要用到Request()函數，
返回一個request對象作為urlopen()函數的一個參數。'''
import urllib.parse
url = "http://httpbin.org/post"
headers = {
    # 'User-Agent':'Mozilla/4.0(compatible;MSIE 5.5;Windows NT)',
    'Host':'httpbin.org'
}
dict = {
    'name':'Germey'
}
data = bytes(urllib.parse.urlencode(dict),encoding='utf-8')
req = urllib.request.Request(url=url,data=data,headers=headers,method='POST')
req.add_header('User-Agent','Mozilla/4.0(compatible;MSIE 5.5;Windows NT)')  #可以單獨添加header
response = urllib.request.urlopen(req)
print(response.read().decode('utf-8'))

'''cookie'''
import http.cookiejar,urllib.request
cookie = http.cookiejar.MozillaCookieJar()
handler = urllib.request.HTTPCookieProcessor(cookie)
opener = urllib.request.build_opener(handler)
response = opener.open("http://www.baidu.com")
for item in cookie:
    print(item.name + "=: " + item.value)

#存儲cookie
filename = "cookieLWP.txt"
cookie = http.cookiejar.LWPCookieJar(filename)
handler = urllib.request.HTTPCookieProcessor(cookie)
opener = urllib.request.build_opener(handler)
response = opener.open("http://www.baidu.com")
cookie.save(ignore_discard=True,ignore_expires=True)
#讀取cookie
cookie = http.cookiejar.LWPCookieJar()  #怎么存就怎么取
cookie.load('cookieLWP.txt',ignore_discard=True,ignore_expires=True)
handler = urllib.request.HTTPCookieProcessor(cookie)
opener = urllib.request.build_opener(handler)
response = opener.open("http://www.baidu.com")
print(response.read().decode('utf-8'))



urllib.error異常處理模塊：

'''異常處理'''
from urllib import error
try:
    response = urllib.request.urlopen("https://www.cnblogs.com/wisir/index.html")
except error.HTTPError as e:
    print(e.reason,e.code,e.headers,sep='\n')
except error.URLError as e:
    print(e.reason)
else:
    print("Request Successfully")

try:
    response = urllib.request.urlopen("https://www.baidu.com",timeout=0.01)
except urllib.error.URLError as e:
    print(e.reason)
    if isinstance(e.reason,socket.timeout):
        print('TIME OUT')



urllib.parse URL解析模塊：

'''urlparse'''
# urllib.parse.urlparse(urlstring,scheme="",allow_fragments=True)
from urllib.parse import urlparse
result = urlparse("http://www.baidu.com/index.html;user?id=5#comment")
print(type(result),result)

'''urlunparse：作用與urlparse相反，是將ParseResult類型的六個參數，合成一個完整的url。'''
from urllib.parse import urlunparse
data = ['http','www.baidu.com','index.html','user','a=6','comment']
print(urlunparse(data))

'''urljoin：以第二個參數為基準，若第二個參數沒有ParseResult類型六個參數中的某一個，則用第一個參數作為補充。'''
from urllib.parse import urljoin
print(urljoin("http://www.baidu.com","FAQ.html"))
print(urljoin("http://www.baidu.com","https://www.cnblogs.com/wisir/"))

'''urlencode：字典對象轉換為get請求參數'''
from urllib.parse import urlencode
params = {
    'name':'germey',
    'age':22
}
base_url = "http://www.baidu.com?"
url = base_url + urlencode(params)
print(url)


python3 urllib庫官方文檔：https://docs.python.org/3/library/urllib.html

轉載于:https://www.cnblogs.com/wisir/p/9969833.html

本文來自互聯網用戶投稿，該文觀點僅代表作者本人，不代表本站立場。本站僅提供信息存儲空間服務，不擁有所有權，不承擔相關法律責任。
如若轉載，請注明出處：http://www.pswp.cn/news/250240.shtml
繁體地址，請注明出處：http://hk.pswp.cn/news/250240.shtml
英文地址，請注明出處：http://en.pswp.cn/news/250240.shtml

如若內容造成侵權/違法違規/事實不符，請聯系多彩編程網進行投訴反饋email:809451989@qq.com，一經查實，立即刪除！