3.6,3.7pandas合并concat&merge
頭文件:
import pandas as pd
import numpy as np
concat基礎合并用法
df1= pd.DataFrame(np.ones((3,4))*0,columns = ['a','b','c','d'])
df2= pd.DataFrame(np.ones((3,4))*1,columns = ['a','b','c','d'])
df3= pd.DataFrame(np.ones((3,4))*2,columns = ['a','b','c','d'])res = pd.concat([df1,df2,df3],axis=0,ignore_index=True) #axis=0 豎向合并 ignore_index是讓索引重新排序df1= pd.DataFrame(np.ones((3,4))*0,columns = ['a','b','c','d'])
df2= pd.DataFrame(np.ones((3,4))*1,columns = ['b','c','d','e'])
concat中join用法
#join='outer'可以對標簽不一樣的部分用NAN進行填充
#join = 'inter'時對標簽不一樣的部分去掉
res = pd.concat([df1,df2],join='outer',ignore_index=True)
join_axes (新版本刪除了)
join_axes= [df1.index]設置合并后按照df1的索引進行保留
# res = pd.concat([df1,df2],axis=1,join_axes = [df1.index])
append添加數據
#添加整個數據
df1= pd.DataFrame(np.ones((3,4))*0,columns = ['a','b','c','d'])
df2= pd.DataFrame(np.ones((3,4))*1,columns = ['a','b','c','d'])
df3= pd.DataFrame(np.ones((3,4))*2,columns = ['a','b','c','d'])
res = df1.append([df2,df3],ignore_index=True) #上下結合#添加一行數據
s1 = pd.Series([1,2,3,4],index=['a','b','c','d'])
res = df1.append(s1,ignore_index=True)
merge按照key合并
left = pd.DataFrame({'key':['K0','K1','K2','K3'],'A':['A0','A1','A2','A3'],'B':['B0','B1','B2','B3']
})
right = pd.DataFrame({'key':['K0','K1','K2','K3'],'C':['C0','C1','C2','C3'],'D':['D0','D1','D2','D3']
})
#按照key合并
res = pd.merge(left,right,on='key')
print(res)
merge考慮兩個key,根據index進行數據合并
#若考慮兩個key,根據index進行數據合并
res = pd.merge(left,right,on=['key1','ke2']) #默認是'inner':只考慮相同部分
res = pd.merge(left,right,on=['key1','ke2'],how='outer') #outer:全部考慮,不存在的用nan填充
merge中indicator
indicator=True時,能展示合并后,哪部分是有某標簽數據,哪部分是沒有該標簽數據的
res = pd.merge(left,right,on=['key1','ke2'],how='outer',indicator=True)
merge中left_index和right_index
根據數據索引進行結合
res = pd.merge(left,right,left_index=True,right_index=True,how=‘outer’)
merge中suffixes
suffixes來區分標簽相同但是數值不同的數據
boys = pd.DataFrame({'K':['K0','K1','K2'],'age':[1,2,3]})
girls = pd.DataFrame({'K':['K0','K1','K2'],'age':[4,5,6]})res = pd.merge(boys,girls,on='K',suffixes=['_boy','_girl'],how='inner')
3.8pandas plot畫圖
頭文件:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
Series的數據(類似數組)
data = pd.Series(np.random.randn(1000),index=np.arange(1000))
data = data.cumsum() #對生成的1000個數據進行累加
data.plot()
plt.show()
DataFrame數據(類似矩陣)
折線圖:
data = pd.DataFrame(np.random.randn(1000,4),index = np.arange(1000),columns = list("ABCD"))
data = data.cumsum()
data.plot()
plt.show()
散點圖
ax = data.plot.scatter(x='A',y='B',color='DarkBlue',label = 'Class 1')
data.plot.scatter(x = 'A',y = 'C',color = 'DarkGreen',label='Class 2',ax=ax)
plt.show()