爬蟲——————爬取中金所，深交所，上交所期權數據

先從深交所開始：直接上傳源碼：

from bs4 import BeautifulSoup
from lxml import etree
import pandas as pd
import akshare as ak
import datetime
import requests
import csv
from contextlib import closing
import time
from urllib.request import urlopen
import requests
from urllib import request
from io import BytesIO
import gzip
import random#設定獲取數據的日期
date = ak.tool_trade_date_hist_sina()
date =date.loc[date['trade_date']>='2019-01-01']
df1 = pd.DataFrame()for j in date['trade_date']:print(j)#session = requests.Session()# s = datetime.datetime.strptime(j,'%Y-%m-%d').strftime('%Y%m')# s2 = datetime.datetime.strptime(j,'%Y-%m-%d').strftime('%d')#url = 'http://www.szse.com/api/report/ShowReport?SHOWTYPE=xlsx&CATALOGID=option_hyfxzb&TABKEY=tab1&txtSearchDate=%(j)s&random=%(r)s'%{'j':j,'r':random.random()}#'http://query.sse.com.cn/derivative/downloadRisk.do?trade_date=%(YM)s%(D)s&productType=0'%{'YM':s,'D':s2}url = 'http://www.szse.cn/api/report/ShowReport?SHOWTYPE=xlsx&CATALOGID=option_hyfxzb&TABKEY=tab1&txtSearchDate=%(j)s&random=0.5379373345285146'%{'j':j}print(url)response = requests.get(url)#print(response.content)
"""
這一塊本人很不滿意，先保存再讀取，多此一舉。主要是因為直接顯示發現亂碼，本人無論如何都無法解析為正常結果，
只能先放到xlsx，之后重新讀取保存。請諸位大蝦見到給小弟一點幫助，如何解決。多謝！！！！！！！！！！
""""with open('D:/結果存放3.xlsx', 'ab') as file_handle:  file_handle.write(response.content)  # 寫入# file_handle.write('\n')df= pd.read_excel('D:/結果存放3.xlsx')df['trade_date'] = jdf1 = df1.append(df)df1.to_csv('szse.csv')

爬取上交所

import csv
from contextlib import closing
import time
from urllib.request import urlopen
date = ak.tool_trade_date_hist_sina()
date =date.loc[date['trade_date']>='2019-01-01']
df1 = pd.DataFrame()
#//query.sse.com.cn/derivative/downloadRisk.do?trade_date=20201207&productType=0
for j in date['trade_date']:s = datetime.datetime.strptime(j,'%Y-%m-%d').strftime('%Y%m')s2 = datetime.datetime.strptime(j,'%Y-%m-%d').strftime('%d')url = 'http://query.sse.com.cn/derivative/downloadRisk.do?trade_date=%(YM)s%(D)s&productType=0'%{'YM':s,'D':s2}# 讀取數據with closing(requests.get(url, stream=True)) as r:f = (line.decode('gbk') for line in r.iter_lines())reader = csv.reader(f,delimiter=',', quotechar=',')for row in reader:print(row)#print(row.reverse())df = pd.DataFrame(row)df1=df1.append(df.T)df1.to_csv('sse.csv')

爬取中金所

import datetime
import requests
from lxml import etree
import pandas as pd
import akshare as ak
import time
date = ak.tool_trade_date_hist_sina()
date =date.loc[date['trade_date']>='2019-01-01']
df1 = pd.DataFrame()
for j in date['trade_date']:s = datetime.datetime.strptime(j,'%Y-%m-%d').strftime('%Y%m')s2 = datetime.datetime.strptime(j,'%Y-%m-%d').strftime('%d')url = 'http://www.cffex.com.cn/sj/hqsj/rtj/%(YM)s/%(D)s/index.xml?id=39'%{'YM':s,'D':s2}response = requests.get(url)p = etree.HTML((response.content))df = pd.DataFrame()for i in range(1,len(p.xpath('//dailydata'))):#print('//dailydata[{}]/instrumentid/text()'.format(i))# print(p.xpath('//dailydata[{}]/instrumentid/text()'.format(i)))#df.loc[i,'instrument']=p.xpath('//dailydata[{}]/instrumentid/text()'.format(i))try:#print((p.xpath('//dailydata[{}]/instrumentid[1]/text()'))[i])df.loc[i,'instrumentid']=(p.xpath('//dailydata[{}]/instrumentid/text()'.format(i)))except:df.loc[i,'instrumentid']=0try:df.loc[i,'tradingday']=(p.xpath('//dailydata[{}]/tradingday/text()'.format(i)))except:df.loc[i,'tradingday']=0try:df.loc[i,'openprice']=(p.xpath('//dailydata[{}]/openprice/text()'.format(i)))except:df.loc[i,'openprice']=0try:df.loc[i,'highestprice']=(p.xpath('//dailydata[{}]/highestprice/text()'.format(i)))except:df.loc[i,'highestprice'] =0try:df.loc[i,'lowestprice']=(p.xpath('//dailydata[{}]/lowestprice/text()'.format(i)))except:df.loc[i,'lowestprice']=0try:df.loc[i,'closeprice']=(p.xpath('//dailydata[{}]/closeprice/text()'.format(i)))except:df.loc[i,'closeprice'] = 0try:df.loc[i,'preopeninterest']=(p.xpath('//dailydata[{}]/preopeninterest/text()'.format(i)))except:df.loc[i,'preopeninterest'] = 0try:df.loc[i,'openinterest']=(p.xpath('//dailydata[{}]/openinterest/text()'.format(i)))except:df.loc[i,'openinterest'] = 0try:df.loc[i,'presettlementprice']=(p.xpath('//dailydata[{}]/presettlementprice/text()'.format(i)))except:df.loc[i,'presettlementprice'] = 0try:df.loc[i,'settlementpriceif']=(p.xpath('//dailydata[{}]/settlementpriceif/text()'.format(i)))except:df.loc[i,'settlementpriceif'] = 0try:df.loc[i,'settlementprice']=(p.xpath('//dailydata[{}]/settlementprice/text()'.format(i)))except:df.loc[i,'settlementprice'] = 0try:df.loc[i,'volume']=(p.xpath('//dailydata[{}]/volume/text()'.format(i)))except:df.loc[i,'volume'] = 0try:df.loc[i,'turnover']=(p.xpath('//dailydata[{}]/turnover/text()'.format(i)))except:df.loc[i,'turnover'] = 0try:df.loc[i,'productid']=(p.xpath('//dailydata[{}]/productid/text()'.format(i)))except:df.loc[i,'productid'] = 0try:df.loc[i,'delta']=(p.xpath('//dailydata[{}]/delta/text()'.format(i)))except:df.loc[i,'delta'] = 0try:df.loc[i,'expiredate']=(p.xpath('//dailydata[i]/expiredate/text()'.format(i)))except:df.loc[i,'expiredate'] = 0df1 = df1.append(df)df1.to_csv('cffex.csv')

以上是爬取三大交易所期權數據的源代碼，可以直接使用，也可以修改保存至數據庫。

本文來自互聯網用戶投稿，該文觀點僅代表作者本人，不代表本站立場。本站僅提供信息存儲空間服務，不擁有所有權，不承擔相關法律責任。
如若轉載，請注明出處：http://www.pswp.cn/news/449820.shtml
繁體地址，請注明出處：http://hk.pswp.cn/news/449820.shtml
英文地址，請注明出處：http://en.pswp.cn/news/449820.shtml

如若內容造成侵權/違法違規/事實不符，請聯系多彩編程網進行投訴反饋email:809451989@qq.com，一經查實，立即刪除！