親自使用過,太好用了
# 導入requests模塊,模擬發送請求
import requests
# 導入json
import json
# 導入re
import re# 定義請求頭
headers = {'Accept': '*/*','Accept-Language': 'en-US,en;q=0.5','User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36'
}# 正則表達式,根據條件匹配出值
def my_match(text, pattern):match = re.search(pattern, text)print(match.group(1))print()return json.loads(match.group(1))def download_video(old_video_url, video_url, audio_url, video_name):headers.update({"Referer": old_video_url})print("開始下載視頻:%s" % video_name)video_content = requests.get(video_url, headers=headers)print('%s視頻大小:' % video_name, video_content.headers['content-length'])audio_content = requests.get(audio_url, headers=headers)print('%s音頻大小:' % video_name, audio_content.headers['content-length'])# 下載視頻開始received_video = 0with open('%s_video.mp4' % video_name, 'ab') as output:while int(video_content.headers['content-length']) > received_video:headers['Range'] = 'bytes=' + str(received_video) + '-'response = requests.get(video_url, headers=headers)output.write(response.content)received_video += len(response.content)# 下載視頻結束# 下載音頻開始audio_content = requests.get(audio_url, headers=headers)received_audio = 0with open('%s_audio.mp4' % video_name, 'ab') as output:while int(audio_content.headers['content-length']) > received_audio:# 視頻分片下載headers['Range'] = 'bytes=' + str(received_audio) + '-'response = requests.get(audio_url, headers=headers)output.write(response.content)received_audio += len(response.content)# 下載音頻結束return video_nameif __name__ == '__main__':# 換成你要爬取的視頻地址url ='https://www.bilibili.com/video/BV1zK4y1B7Z8/?share_source=copy_web'# 發送請求,拿回數據res = requests.get(url, headers=headers)# 視頻詳情jsonplayinfo = my_match(res.text, '__playinfo__=(.*?)</script><script>')# 視頻內容jsoninitial_state = my_match(res.text, r'__INITIAL_STATE__=(.*?);\(function\(\)')# 視頻分多種格式,直接取分辨率最高的視頻 1080pvideo_url = playinfo['data']['dash']['video'][0]['baseUrl']# 取出音頻地址audio_url = playinfo['data']['dash']['audio'][0]['baseUrl']video_name = initial_state['videoData']['title']print('視頻名字為:video_name')print('視頻地址為:', video_url)print('音頻地址為:', audio_url)download_video(url, video_url, audio_url, video_name)
它可以爬取并下載視頻文件和音頻文件!