介紹1
R和python是兩種不同的編程語言,前者是統計學家發明并且服務數學統計計算,后者則是最萬能的膠水語言。隨著大數據時代的到來,兩者在數據分析領域存在越來越多的共同點且可以相互使用,為了破解二者的編程壁壘,CRAN收錄了具有R接口的python包,從而使得兩類語言的數據能共同使用。
reticulate
2 是用于Python和R之間協同操作的全套工具,在R
和Rstudio
中均可使用;要求Rstudio必須在1.2版本以上;
install.packages("reticulate")
library(reticulate)
特性
- reticulate 在R中支持多種方式調用python;
- 實現R和python對象之間的轉換;
- 隨意切換不同版本的python;
- R內使用
$
調用python對象; - python內使用
.
調用R對象; - 使用import函數導入python模塊
import("os")
; - source_python()獲取任何Python腳本;
- 使用repl_python()交互使用python;
范例
R
和python
對同一數據進行可視化,可視化圖形包括scatterplot
,boxplot
,barplot
和heatmap
等
散點圖
R代碼
library(dplyr)
library(ggplot2)iris %>% mutate(Species=factor(Species, levels = c("setosa", "versicolor", "virginica"))) %>%ggplot(aes(x=Sepal.Width, y=Petal.Width, color=Species))+geom_point()+guides(color=guide_legend("", keywidth = .5, keyheight = .5))+labs(title = 'Scatter plot')+theme_bw()+scale_color_manual(values = c("red", "green", "blue"))+theme(plot.title = element_text(size = 10, color = "black", face = "bold", hjust = 0.5), axis.title = element_text(size = 10, color = "black", face = "bold"),axis.text = element_text(size = 9, color = "black"),text = element_text(size = 8, color = "black"),strip.text = element_text(size = 9, color = "black", face = "bold"),panel.grid = element_blank(),legend.position = c(1, 1),legend.justification = c(1, 1),legend.background = element_rect(fill="white", color = "black"))
Python代碼
dat = r.iris # Python調用R內嵌數據使用r.data
species_map = {'setosa':1, 'versicolor':2, 'virginica':3}
dat['Species'] = dat['Species'].map(species_map)import numpy as np
import matplotlib.pyplot as plt
# plt.scatter(dat['Sepal.Width'], dat['Petal.Width'], c=dat['Species'],
# alpha=0.8, edgecolors='none', s=30, label=["1", "2", "3"])
# plt.title('Scatter plot in iris')
# plt.xlabel('Sepal.Width (cm)')
# plt.ylabel('Petal.Width (cm)')
# plt.legend(loc=1)
# plt.show()dat1 = (np.array(dat[dat.Species==1]['Sepal.Width']), np.array(dat[dat.Species==1]['Petal.Width']))
dat2 = (np.array(dat[dat.Species==2]['Sepal.Width']), np.array(dat[dat.Species==2]['Petal.Width']))
dat3 = (np.array(dat[dat.Species==3]['Sepal.Width']), np.array(dat[dat.Species==3]['Petal.Width']))mdat = (dat1, dat2, dat3)
colors = ("red", "green", "blue")
groups = ("setosa", "versicolor", "virginica")# step1 build figure background
fig = plt.figure()# step2 build axis
ax = fig.add_subplot(1, 1, 1, facecolor='1.0') # step3 build figure
for data, color, group in zip(mdat, colors, groups):x, y = dataax.scatter(x, y, alpha=0.8, c=color, edgecolors='none', s=30, label=group) plt.title('Scatter plot')
plt.legend(loc=1) # step4 show figure in the screen
plt.show()
箱形圖
R代碼
library(dplyr)
library(ggplot2)iris %>% mutate(Species=factor(Species, levels = c("setosa", "versicolor", "virginica"))) %>%ggplot(aes(x=Species, y=Sepal.Width, fill=Species))+stat_boxplot(geom = "errorbar", width = .12)+geom_boxplot(width = .3, outlier.shape = 3, outlier.size = 1)+guides(fill=guide_legend(NULL, keywidth = .5, keyheight = .5))+xlab("")+theme_bw()+scale_fill_manual(values = c("red", "green", "blue"))+theme(plot.title = element_text(size = 10, color = "black", face = "bold", hjust = 0.5), axis.title = element_text(size = 10, color = "black", face = "bold"),axis.text = element_text(size = 9, color = "black"),text = element_text(size = 8, color = "black"),strip.text = element_text(size = 9, color = "black", face = "bold"),panel.grid = element_blank(),legend.position = c(1, 1),legend.justification = c(1, 1),legend.background = element_rect(fill="white", color = "black"))
Python代碼
dat = r.iris # Python調用R內嵌數據使用r.data
species_map = {'setosa':1, 'versicolor':2, 'virginica':3}
dat['Species'] = dat['Species'].map(species_map)import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatchesdat11 = (np.array(dat[dat.Species==1]['Sepal.Width']))
dat21 = (np.array(dat[dat.Species==2]['Sepal.Width']))
dat31 = (np.array(dat[dat.Species==3]['Sepal.Width']))mdat2 = (dat11, dat21, dat31)
colors = ("red", "green", "blue")
groups = ("setosa", "versicolor", "virginica")fig = plt.figure()
axes = fig.add_subplot(facecolor='1.0')
bplot = axes.boxplot(mdat2, patch_artist=True, notch=0, sym='+', vert=1, whis=1.5,whiskerprops = dict(linestyle='--',linewidth=1.2, color='black'))# color
for patch, color in zip(bplot['boxes'], colors):patch.set_facecolor(color)# axes labels
plt.setp(axes, xticks=[1,2,3],xticklabels=["setosa", "versicolor", "virginica"])red_patch = mpatches.Patch(color='red', label='setosa')
green_patch = mpatches.Patch(color='green', label='versicolor')
blue_patch = mpatches.Patch(color='blue', label='virginica')plt.legend(handles=[red_patch, green_patch, blue_patch], loc=1)plt.show()
條形圖
R代碼
library(dplyr)
library(ggplot2)iris %>% mutate(Species=factor(Species, levels = c("setosa", "versicolor", "virginica"))) %>%select(Species, Sepal.Width) %>% group_by(Species) %>%summarize(avg=mean(Sepal.Width), n=n(), sd=sd(Sepal.Width), se=sd/sqrt(n)) %>%ungroup() %>%ggplot(aes(x=Species, y=avg, fill=Species))+geom_bar(stat="identity", width=.4, color="black")+geom_errorbar(aes(ymin=avg-sd, ymax=avg+sd), width=.15,position=position_dodge(.9), size=1)+guides(fill=guide_legend(NULL, keywidth = .5, keyheight = .5))+xlab("")+ylab("Sepal.Width")+scale_y_continuous(breaks=seq(0, 3.5,0.5), limits=c(0, 4.4),expand = c(0,0))+theme_bw()+scale_fill_manual(values = c("red", "green", "blue"))+theme(axis.title = element_text(size = 10, color = "black", face = "bold"),axis.text = element_text(size = 9, color = "black"),text = element_text(size = 8, color = "black"),strip.text = element_text(size = 9, color = "black", face = "bold"),panel.grid = element_blank(),legend.position = c(1, 1),legend.justification = c(1, 1),legend.background = element_rect(fill="white", color = "black"))
Python代碼
dat = r.iris # Python調用R內嵌數據使用r.data
species_map = {'setosa':1, 'versicolor':2, 'virginica':3}
dat['Species'] = dat['Species'].map(species_map)import numpy as np
import pandas as pd
import matplotlib.pyplot as pltmean = list(dat['Sepal.Width'].groupby(dat['Species']).mean())
sd = list(dat.groupby('Species').agg(np.std, ddof=0)['Sepal.Width'])df = pd.DataFrame({'mean':mean}, index=["setosa", "versicolor", "virginica"])
df.plot(kind='bar', alpha=0.75, rot=0, edgecolor='black', yerr=sd, align='center', ecolor='black', capsize=5,color=("red", "green", "blue"),ylim=(0.0, 4.4),yticks=list(np.arange(0, 4.0, 0.5)))# xlabel
plt.xlabel('')
plt.ylabel('Sepal.Width')# legend
red_patch = mpatches.Patch(color='red', label='setosa')
green_patch = mpatches.Patch(color='green', label='versicolor')
blue_patch = mpatches.Patch(color='blue', label='virginica')
plt.legend(handles=[red_patch, green_patch, blue_patch], # color and grouploc=1, # locationprop={'size': 8}) # size
plt.show()
心得
初次使用reticulate
的感覺還不錯,可以比較完美串聯R和Python,尤其是在Rmarkdown文件內使用R和Python代碼,但缺點也很明顯:
- 運行Python cell沒有詳細報錯信息;
- 粗略的報錯提示行信息不以Rmd文件整體行作為開始;
- 無法兼容帶有漢字的注釋信息;
- 無法像R一樣查看python環境下變量;
- 出錯后有時無任何報錯信息
根據visual studio code的最新python插件公布情況看,以后vsc可以完美兼容Jupyter notebook格式文件,因此如果想單獨使用python但無較好交互編輯器,可以使用vsc的python插件讀取ipynb文件3
參考
https://zhuanlan.zhihu.com/p/35049732 ??
https://github.com/rstudio/reticulate ??
https://code.visualstudio.com/docs/python/jupyter-support ??