爬蟲之初級實戰項目:爬取知乎任一作者的文章練手
在正式上代碼之前,先過一遍之前所學知識的框架內容,溫故而知新!!!
接下來我們直接上代碼,一定要手敲代碼、手敲代碼、手敲代碼!!!
import requests,csv
csv_file = open('知乎-收錄.csv','w',newline = '',encoding = 'utf-8')
#加newline=' '參數的原因是,可以避免csv文件出現兩倍的行距(就是能避免表格的行與行之間出現空白行);
#加encoding='utf-8',可以避免編碼問題導致的報錯或亂碼。
writer = csv.writer(csv_file)
writer.writerow(['標題','摘要','鏈接'])
url ='https://www.zhihu.com/api/v4/members/zhang-jia-wei/included-articles?'
headers={'user-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
offset = 0
while True:
params = {
'include': 'data[*].comment_count,suggest_edit,is_normal,thumbnail_extra_info,thumbnail,can_comment,comment_permission,admin_closed_comment,content,voteup_count,created,updated,upvoted_followees,voting,review_info,is_labeled,label_info;data[*].author.badge[?(type=best_answerer)].topics',
'offset': str(offset),
'limit': '10',
'sort_by': 'included'
}
res = requests.get(url,headers = headers)
js_zh = res.json()
zhihu = js_zh['data']
for i in zhihu:
list1 = [i['title'],i['excerpt'],i['url']]
writer.writerow(list1)
offset = offset + 10 #利用offset對循環進行控制
if offset > 50:
break
csv_file.close()
標簽:練手,comment,知乎,Python,代碼,writer,offset,csv