先從深交所開始:直接上傳源碼:
from bs4 import BeautifulSoup
from lxml import etree
import pandas as pd
import akshare as ak
import datetime
import requests
import csv
from contextlib import closing
import time
from urllib.request import urlopen
import requests
from urllib import request
from io import BytesIO
import gzip
import random#設定獲取數據的日期
date = ak.tool_trade_date_hist_sina()
date =date.loc[date['trade_date']>='2019-01-01']
df1 = pd.DataFrame()for j in date['trade_date']:print(j)#session = requests.Session()# s = datetime.datetime.strptime(j,'%Y-%m-%d').strftime('%Y%m')# s2 = datetime.datetime.strptime(j,'%Y-%m-%d').strftime('%d')#url = 'http://www.szse.com/api/report/ShowReport?SHOWTYPE=xlsx&CATALOGID=option_hyfxzb&TABKEY=tab1&txtSearchDate=%(j)s&random=%(r)s'%{'j':j,'r':random.random()}#'http://query.sse.com.cn/derivative/downloadRisk.do?trade_date=%(YM)s%(D)s&productType=0'%{'YM':s,'D':s2}url = 'http://www.szse.cn/api/report/ShowReport?SHOWTYPE=xlsx&CATALOGID=option_hyfxzb&TABKEY=tab1&txtSearchDate=%(j)s&random=0.5379373345285146'%{'j':j}print(url)response = requests.get(url)#print(response.content)
"""
這一塊本人很不滿意,先保存再讀取,多此一舉。主要是因為直接顯示發現亂碼,本人無論如何都無法解析為正常結果,
只能先放到xlsx,之后重新讀取保存。請諸位大蝦見到給小弟一點幫助,如何解決。多謝!!!!!!!!!!
""""with open('D:/結果存放3.xlsx', 'ab') as file_handle: file_handle.write(response.content) # 寫入# file_handle.write('\n')df= pd.read_excel('D:/結果存放3.xlsx')df['trade_date'] = jdf1 = df1.append(df)df1.to_csv('szse.csv')
爬取上交所
import csv
from contextlib import closing
import time
from urllib.request import urlopen
date = ak.tool_trade_date_hist_sina()
date =date.loc[date['trade_date']>='2019-01-01']
df1 = pd.DataFrame()
#//query.sse.com.cn/derivative/downloadRisk.do?trade_date=20201207&productType=0
for j in date['trade_date']:s = datetime.datetime.strptime(j,'%Y-%m-%d').strftime('%Y%m')s2 = datetime.datetime.strptime(j,'%Y-%m-%d').strftime('%d')url = 'http://query.sse.com.cn/derivative/downloadRisk.do?trade_date=%(YM)s%(D)s&productType=0'%{'YM':s,'D':s2}# 讀取數據with closing(requests.get(url, stream=True)) as r:f = (line.decode('gbk') for line in r.iter_lines())reader = csv.reader(f,delimiter=',', quotechar=',')for row in reader:print(row)#print(row.reverse())df = pd.DataFrame(row)df1=df1.append(df.T)df1.to_csv('sse.csv')
爬取中金所
import datetime
import requests
from lxml import etree
import pandas as pd
import akshare as ak
import time
date = ak.tool_trade_date_hist_sina()
date =date.loc[date['trade_date']>='2019-01-01']
df1 = pd.DataFrame()
for j in date['trade_date']:s = datetime.datetime.strptime(j,'%Y-%m-%d').strftime('%Y%m')s2 = datetime.datetime.strptime(j,'%Y-%m-%d').strftime('%d')url = 'http://www.cffex.com.cn/sj/hqsj/rtj/%(YM)s/%(D)s/index.xml?id=39'%{'YM':s,'D':s2}response = requests.get(url)p = etree.HTML((response.content))df = pd.DataFrame()for i in range(1,len(p.xpath('//dailydata'))):#print('//dailydata[{}]/instrumentid/text()'.format(i))# print(p.xpath('//dailydata[{}]/instrumentid/text()'.format(i)))#df.loc[i,'instrument']=p.xpath('//dailydata[{}]/instrumentid/text()'.format(i))try:#print((p.xpath('//dailydata[{}]/instrumentid[1]/text()'))[i])df.loc[i,'instrumentid']=(p.xpath('//dailydata[{}]/instrumentid/text()'.format(i)))except:df.loc[i,'instrumentid']=0try:df.loc[i,'tradingday']=(p.xpath('//dailydata[{}]/tradingday/text()'.format(i)))except:df.loc[i,'tradingday']=0try:df.loc[i,'openprice']=(p.xpath('//dailydata[{}]/openprice/text()'.format(i)))except:df.loc[i,'openprice']=0try:df.loc[i,'highestprice']=(p.xpath('//dailydata[{}]/highestprice/text()'.format(i)))except:df.loc[i,'highestprice'] =0try:df.loc[i,'lowestprice']=(p.xpath('//dailydata[{}]/lowestprice/text()'.format(i)))except:df.loc[i,'lowestprice']=0try:df.loc[i,'closeprice']=(p.xpath('//dailydata[{}]/closeprice/text()'.format(i)))except:df.loc[i,'closeprice'] = 0try:df.loc[i,'preopeninterest']=(p.xpath('//dailydata[{}]/preopeninterest/text()'.format(i)))except:df.loc[i,'preopeninterest'] = 0try:df.loc[i,'openinterest']=(p.xpath('//dailydata[{}]/openinterest/text()'.format(i)))except:df.loc[i,'openinterest'] = 0try:df.loc[i,'presettlementprice']=(p.xpath('//dailydata[{}]/presettlementprice/text()'.format(i)))except:df.loc[i,'presettlementprice'] = 0try:df.loc[i,'settlementpriceif']=(p.xpath('//dailydata[{}]/settlementpriceif/text()'.format(i)))except:df.loc[i,'settlementpriceif'] = 0try:df.loc[i,'settlementprice']=(p.xpath('//dailydata[{}]/settlementprice/text()'.format(i)))except:df.loc[i,'settlementprice'] = 0try:df.loc[i,'volume']=(p.xpath('//dailydata[{}]/volume/text()'.format(i)))except:df.loc[i,'volume'] = 0try:df.loc[i,'turnover']=(p.xpath('//dailydata[{}]/turnover/text()'.format(i)))except:df.loc[i,'turnover'] = 0try:df.loc[i,'productid']=(p.xpath('//dailydata[{}]/productid/text()'.format(i)))except:df.loc[i,'productid'] = 0try:df.loc[i,'delta']=(p.xpath('//dailydata[{}]/delta/text()'.format(i)))except:df.loc[i,'delta'] = 0try:df.loc[i,'expiredate']=(p.xpath('//dailydata[i]/expiredate/text()'.format(i)))except:df.loc[i,'expiredate'] = 0df1 = df1.append(df)df1.to_csv('cffex.csv')
以上是爬取三大交易所期權數據的源代碼,可以直接使用,也可以修改保存至數據庫。