CAPM模型經歷了大量的實證和應用之后,有證據表明,市場風險溢酬并不能充分解釋個別風險資產的收益率。于是很多研究者開始探索其他的因素,比如公司市值、PE、杠桿比例、賬面市值比等。Fama和French兩個人對于各種因素進行了全面的組合分析,當單獨使用Beta或者用Beta分別與其他幾個因子相結合時,Beta的解釋能力很弱;市值、PE(市盈率)、杠桿比例、BM(賬面市值比,PB的倒數)單獨來用時,對于收益率的解釋能力都很強,但是組合起來時,市值、BM會弱化杠桿比例和PE的解釋能力。后來Fama和French兩人提取了3個重要因子:市場風險溢酬因子、市值因子和賬面市值比因子,以收益率作為因變量構建了一個類似CAPM的線性模型,即著名的三因子模型。
import tushare as ts
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
import datetime
import pandas as pd
sns.set()
mpl.rcParams['font.sans-serif'] = 'WenQuanYi Micro Hei'
pro = ts.pro_api()def cal_smb_hml(df):#劃分大小市值公司df['SB'] = df['circ_mv'].map(lambda x: 'B' if x >= df['circ_mv'].median() else 'S')#求賬面市值比:PB的倒數df['BM'] = 1/df['pb']#劃分高中低賬面市值比公司border_down,border_up = df['BM'].quantile([0.3,0.7])border_down,border_updf['HML'] = df['BM'].map(lambda x: 'H' if x >= border_up else 'M')df['HML'] = df.apply(lambda row:'L' if row['BM'] <= border_down else row['HML'],axis = 1)#組合劃分為6組df_SL = df.query('(SB=="S") & (HML == "L")')df_SM = df.query('(SB=="S") & (HML == "M")')df_SH = df.query('(SB=="S") & (HML == "H")')df_BL = df.query('(SB=="B") & (HML == "L")')df_BM = df.query('(SB=="B") & (HML == "M")')df_BH = df.query('(SB=="B") & (HML == "H")')#計算各組收益率R_SL = (df_SL['pct_chg'] * df_SL['circ_mv'] / 100).sum() / df_SL['circ_mv'].sum()R_SM = (df_SM['pct_chg'] * df_SM['circ_mv'] / 100).sum() / df_SM['circ_mv'].sum()R_SH = (df_SH['pct_chg'] * df_SH['circ_mv'] / 100).sum() / df_SH['circ_mv'].sum()R_BL = (df_BL['pct_chg'] * df_BL['circ_mv'] / 100).sum() / df_BL['circ_mv'].sum()R_BM = (df_BM['pct_chg'] * df_BM['circ_mv'] / 100).sum() / df_BM['circ_mv'].sum()R_BH = (df_BH['pct_chg'] * df_BH['circ_mv'] / 100).sum() / df_BH['circ_mv'].sum()#計算SMB,HML并返回smb = (R_SL + R_SM + R_SH - R_BL -R_BM - R_BH) / 3hml = (R_SH + R_BH - R_SL - R_BL ) / 3return smb, hmldata = []
df_cal = pro.trade_cal(start_date = '20170101',end_date = '20190110')
df_cal = df_cal.query('(exchange=="SSE") & (is_open==1)')
for date in df_cal.cal_date:df_daily = pro.daily(trade_date=date)df_basic = pro.daily_basic(trade_date=date)df = pd.merge(df_daily,df_basic,on='ts_code',how='inner')smb,hml = cal_smb_hml(df)data.append([date,smb,hml])print(date,smb,hml)df_tfm = pd.DataFrame(data,columns=['trade_date','SMB','HML'])
df_tfm['trade_date'] = pd.to_datetime(df_tfm.trade_date)
df_tfm = df_tfm.set_index('trade_date')
df_tfm.to_csv('df_three_factor_model.csv')
df_tfm.head()
#獲取數據
wanke = pro.daily(ts_code='000002.SZ',start_date='20170101',end_date = '20190110')
pingan = pro.daily(ts_code='601318.SH',start_date='20170101',end_date = '20190110')
maotai = pro.daily(ts_code='600519.SH',start_date='20170101',end_date = '20190110')
wanhua = pro.daily(ts_code='002415.SZ',start_date='20170101',end_date = '20190110')
keda = pro.daily(ts_code='002230.SZ',start_date='20170101',end_date = '20190110')
gzA = pro.index_daily(ts_code='399317.SZ',start_date='20170101',end_date = '20190110')#僅保留收益率數據,且用日期作為index
#然后按照日期排序(增序)
stock_list = [wanke,pingan,maotai,wanhua,keda,gzA]
for stock in stock_list:stock.index = pd.to_datetime(stock.trade_date)
df_stock = pd.concat([stock.pct_chg / 100 for stock in stock_list],axis=1)
df_stock.columns = ['wanke','pingan','maotai','wanhua','keada','gzA']
df_stock = df_stock.sort_index(ascending=True)
df_stock.head()df = pd.merge(df_stock,df_tfm,left_index=True,right_index=True,how='inner')
df = df.fillna(0)
#無風險收益率,以年化3.2%計算
rf = 1.032**(1/360)-1
df= df-rf
df2 = df.copy()
df =df['20180101':]
df.head()
sns.heatmap(df.corr(),cmap='bwr')
plt.figure(figsize=(10,5))
for col in df.columns:plt.plot((df[col]+1).cumprod()-1,label=col)
plt.title('累計收益率時序圖(2019至今)',fontsize=20)
plt.legend();
import statsmodels.api as sm
stock_names = { 'wanke':'萬科A','pingan':'中國平安','maotai':'貴州茅臺','wanhua':'萬華化學','keda':'科大訊飛',
}
for stock in ['wanke','pingan','maotai','wanhua','keda']:model = sm.OLS(df[stock],sm.add_constant(df[['gzA','SMB','HML']].values))result = model.fit()print(stock_names[stock] + '\n')print(result.summary())print('\n\n')