Kaggle競賽之Titanic存活預測2

提高代碼規范性，基于上一個 baseline 的提高

import pandas as pd
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split#數據劃分方法
from sklearn.ensemble import RandomForestClassifier#隨機森林
from sklearn.model_selection import GridSearchCV #網格交叉搜索交叉驗證# data = pd.read_csv("file_path")打印出來看def data_clean(file_path):""""數據處理函數parameters:file_path:數據文件路徑""""data = pd.read_csv(file_path)#刪除不需要的列data.drop("PassengerId",axis=1,inplace=True)data.drop(["Name","Ticket","Cabin"],axis=1,inplace=True)#對空值進行填充data["Age"].fillna(data["Age"].mean(),inplace=True)#對字符的東西進行一個空值填充data["Embarked"].fillna(data["Embarked"].mode[0],inplace=True)#數據preprocessingdata["Sex"]=LabelBinarizer().fit_transform(data["Sex"])data = pd.get_dummied(data)#縮放，減少計算量data["Fare"] = StandardScaler().fit_transform(data["Fare"].values.reshape(-1,1))return (data)# 數據劃分
def data_split(data):""""數據劃分函數parameters:data:要劃分的數據""""x = data.drop(["Survived"],axis=1)y = data["Survived"]x_train,x_test,y_train,y_test = train_test.split(x,y,test_size=0.2)return (x_train,x_test,y_train,y_test)# 模型搭建
def model_fit(x,y):"""模型訓練函數parameters:x:特征y:標簽"""#定義好參數Para_grid = [{"n_estimators":[3,10,30],"max_features":[2,4,6,8]},{"bootstrap",[False],"n_estimators":[3,10],"max_features":[2,4,6]}]#模型實例化model = RandomForestClassifier()#模型和交叉驗證次數gird_search = GridSearchCV(model,Para_grid,cv=5)#模型訓練grid_search.fit(x,y)return(grid_search.best_params_,grid_search.best_estimator_)data = data_clean("data/train.csv")
x_train,x_test,y_train,y_test = data_split(data)
model_fit(x_train,y_train)
# 上一行得到最優的模型和參數
model = RandomForestClassifier(n_estimators=30,max_features=2,max_depth=100)
model.fit(x_train,y_train)
model.score(x_test,y_test)#程序入口
if __name__ == '__main__':ABC

模型融合，去 sklearn 看一下就懂

本文來自互聯網用戶投稿，該文觀點僅代表作者本人，不代表本站立場。本站僅提供信息存儲空間服務，不擁有所有權，不承擔相關法律責任。
如若轉載，請注明出處：http://www.pswp.cn/news/714699.shtml
繁體地址，請注明出處：http://hk.pswp.cn/news/714699.shtml
英文地址，請注明出處：http://en.pswp.cn/news/714699.shtml

如若內容造成侵權/違法違規/事實不符，請聯系多彩編程網進行投訴反饋email:809451989@qq.com，一經查實，立即刪除！