機器學習第一篇線性回歸

數據集：公開的World Happiness Report | Kaggle中的happiness dataset2017.

目標：基于GDP值預測幸福指數。（單特征預測）

代碼：

文件一：prepare_for_traning.py

"""用于科學計算的一個庫，提供了多維數組對象以及操作函數"""
from utils.features import prepare_for_training
"""數據預處理的一個私庫"""class LinearRegression:def __init__(self,data,labels,polynomial_degree = 0,sinusoid_degree = 0,normalize_data = True):"""進行預處理操作:param data::param labels::param polynomial_degree::param sinusoid_degree::param normalize_data:"""(data_processed,features_mean,features_deviation) = prepare_for_training(data,polynomial_degree = 0,sinusoid_degree = 0,normalize_data = True)self.data = data_processedself.labels = labelsself.features_mean = features_meanself.features_deviation = features_deviationself.polynomial_degree = polynomial_degreeself.sinusoid_degree = sinusoid_degreeself.normalize_data = normalize_datanum_features = self.data.shape[1]self.theta = np.zeros((num_features,1))""""數據，學習率，訓練次數"""def train(self,alpha,num_iterations = 500):"""訓練模塊：梯度下降"""cost_history = self.gradient_descent(alpha,num_iterations)return self.theta,cost_historydef gradient_descent(self,alpha,num_iterations):"""迭代模塊"""cost_history = []for _ in range(num_iterations):self.gradient_step(alpha)cost_history.append(self.cost_function(self.data,self.labels))return cost_historydef gradient_step(self,alpha):"""梯度下降參數更新算法，矩陣計算，使用小批量梯度下降算法:param self::param alpha::return:"""num_examples = self.data.shape[0]prediction = LinearRegression.hypothesis(self.data,self.theta)delta = prediction - self.labelstheta = self.thetatheta = theta - alpha*(1/num_examples)*(np.dot(delta.T,self.data)).Tself.theta = thetadef cost_function(self,data,labels):"""損失計算模塊:param self::param data::param labels::return:"""num_examples = data.shape[0]delta = LinearRegression.hypothesis(self.data,self.theta) - labelscost = (1/2)*np.dot(delta.T,delta)/num_examples"""print(cost.shape)"""return cost[0][0]"""裝飾器"""@staticmethoddef hypothesis(data,theta):prediction = np.dot(data,theta)return predictiondef get_cost(self,data,labels):data_processed = prepare_for_training(data,self.polynomial_degree,self.sinusoid_degree,self.normalize_data)[0]return self.cost_function(data_processed,labels)def predict(self,data):data_processed = prepare_for_training(data,self.polynomial_degree,self.sinusoid_degree,self.normalize_data)[0]predictions = LinearRegression.hypothesis(data_processed,self.theta)return predictions

文件2：Linear_regression.py?

import numpy as np
"""用于科學計算的一個庫，提供了多維數組對象以及操作函數"""
import pandas as pd
"""一個用于數據導入、導出、清洗和分析的庫，本文中導入csv格式數據等等"""
import matplotlib.pyplot as plt
"""pyplot提供了繪圖接口"""
import matplotlib
"""一個強大的繪圖庫"""# 設置matplotlib正常顯示中文和負號
matplotlib.rcParams['font.family'] = 'SimHei'  # 指定默認字體為黑體
matplotlib.rcParams['axes.unicode_minus'] = False  # 正確顯示負號from prepare_for_training import LinearRegressiondata = pd.read_csv("D:/machine_learning/archive/2017.csv")
train_data = data.sample(frac = 0.8)
test_data = data.drop(train_data.index)input_param_name = 'Economy..GDP.per.Capita.'
output_param_name = 'Happiness.Score'x_train = train_data[[input_param_name]].values
y_train = train_data[[output_param_name]].valuesx_test = test_data[[input_param_name]].values
y_test = test_data[[output_param_name]].valuesplt.scatter(x_train,y_train,label ='Train data')
plt.scatter(x_test,y_test,label ='Test data')
plt.xlabel(input_param_name)
plt.ylabel(output_param_name)
plt.title('Happy')
plt.legend()
plt.show()"""訓練次數，學習率"""
num_iterations = 500
learning_rate = 0.01linear_regression = LinearRegression(x_train,y_train)
(theta,cost_history) = linear_regression.train(learning_rate,num_iterations)
print('開始時的損失',cost_history[0])
print('訓練后的損失',cost_history[-1])plt.plot(range(num_iterations),cost_history)
plt.xlabel('Iter')
plt.ylabel('cost')
plt.title('損失值')
plt.show()predictions_num = 100
x_predictions = np.linspace(x_train.min(),x_train.max(),predictions_num).reshape(predictions_num,1)
y_predictions = linear_regression.predict(x_predictions)plt.scatter(x_train,y_train,label ='Train data')
plt.scatter(x_test,y_test,label ='Test data')
plt.plot(x_predictions,y_predictions,'r',label = 'Prediction')
plt.xlabel(input_param_name)
plt.ylabel(output_param_name)
plt.title('Happy')
plt.legend()
plt.show()效果圖：

本文來自互聯網用戶投稿，該文觀點僅代表作者本人，不代表本站立場。本站僅提供信息存儲空間服務，不擁有所有權，不承擔相關法律責任。
如若轉載，請注明出處：http://www.pswp.cn/pingmian/78363.shtml
繁體地址，請注明出處：http://hk.pswp.cn/pingmian/78363.shtml
英文地址，請注明出處：http://en.pswp.cn/pingmian/78363.shtml

如若內容造成侵權/違法違規/事實不符，請聯系多彩編程網進行投訴反饋email:809451989@qq.com，一經查實，立即刪除！