百度翻譯:利用爬蟲技術模擬人工查詢英文單詞,將查到的信息保存到本地
import requests
import json
# 1.指定url
post_url = 'https://fanyi.baidu.com/sug'
# 2.UA標識
headers = {"User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
}
# 3.post請求參數處理
'''在這個上下文中,"payload"指的是在中HTTP請求的主體部分,包含了需要傳輸的數據。它可以是表單數據、JSON數據、文件等等。
'''
word = input('請輸入中文字詞:')
data = {'kw': word
}
# 4.發起請求
response = requests.post(url=post_url, headers=headers, data=data)
# 5.獲取響應數據
result_obi = response.json()
print(result_obi)
# 6.持久化數據
filename = word+'.json'
f = open('D:/Pythonstudy/python爬蟲/百度翻譯/'+filename, 'w', encoding='utf-8')
json.dump(result_obi, f, ensure_ascii=False)
print("success")
f.close()
網頁采集器:輸入關鍵字,獲取目標網頁關于關鍵字的信息
import requests# UA標識
headers = {"User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
}# 請求頭
url = 'https://sogou.com/web?'# url攜帶的參數
parms = {'query': '賈康康'
}
response = requests.get(url, params=parms, headers=headers)
page_text = response.text
with open('sogou.html', 'w', encoding='utf-8') as f:f.write(page_text)
肯德基:post請求的應用,輸入目標地點,獲取目標地點的相關門店信息
import requestsurl = 'https://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=keyword'
data = {'cname': '','pid': '','keyword': '蘭州','pageIndex': '1','pageSize': '20'
}
headers = {"User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
}
response = requests.post(url=url, headers=headers, data=data)
result = response.json()
print(result)
豆瓣電影排行榜信息爬取:通過指定請求頭的參數,來獲取想要的排行榜的區間
import jsonimport requests# 1.指定url
douban_url = 'https://movie.douban.com/j/chart/top_list'
# 2.指定參數
data = {'type': '25','interval_id': '100:90','action': '','start': '1','limit': '20'
}
# 3.UA標識
headers = {"User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
}
# 4.請求訪問
'''如果上面的地址欄變化證明不是ajax請求'''
response = requests.get(url=douban_url,headers=headers,params=data)
# 5.獲取響應參數
result = response.json()
print(result)
# 6.持久化數據
f = open('D:/Pythonstudy/python爬蟲/豆瓣電影/douban.json','w',encoding='utf-8')
json.dump(result,f,ensure_ascii=False)
f.close()