import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LogisticRegression # 獲得數據 names=['Sample code number','Clump Thickness','Uniformity of Cell Size','Uniformity of Cell Shape','Marginal Adhesion','Single Hpithelial Cell Size','Bare Nucle','Bland Chromatin','Normal Nucleoli','Mitomeos','Class'] data=pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.data",names=names) # 處理數據 處理掉數據里的缺失值 data=data.replace(to_replace="?",value=np.nan) # 使用dropna刪除替代過的數據 data=data.dropna() # 分類數據 特征值 標準值 x=data.iloc[:,1:-1] y=data["Class"] # 分割數據 x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=20)# 標準化數據 transfer =StandardScaler() x_train=transfer.fit_transform(x_train) x_test=transfer.fit_transform(x_test) # 訓練模型 estimator=LogisticRegression() ret=estimator.fit(x_train,y_train) print(ret) # 模型評估 print(estimator.score(x_test,y_test))