一、線性回歸原理
二、python實現線性回歸
1.基本矩陣運算
pratice1.py:
# Author:WYC
import numpy as np
from numpy.linalg import inv
from numpy import dot
from numpy import mat
print('-------------給定矩陣A,B----------')
A = np.mat([1,1])
print ('A:\n',A)
B = mat([[1,2],[2,3]])
print ('B:\n',B)
print('--------------矩陣乘法-----------')
print('A.B:\n',dot(A,B))
print('--------------矩陣變形----------')
print('A.T:\n',A.T)
print('A.reshape(2,1):\n',A.reshape(2,1))
print('B.reshape(1,4):\n',B.reshape(1,4))
print('B的逆:\n',inv(B))
print('B[0,:]:\n',B[0,:])
print('B[:,0]:\n',B[:,0])
#print('A.B:',dot(B,A))
2.實現最小二乘法
pratice2.py:
# Author:WYC
import numpy as np
from numpy.linalg import inv
from numpy import dot
from numpy import mat
#y=2x
X = mat([1,2,3]).reshape(3,1)
Y = 2*X
#theta = (X'X)~-1X`Y
theta = dot(dot(inv(dot(X.T,X)),X.T),Y)
print(theta)
3.實現梯度下降法
pratice3.py:
# Author:WYC
import numpy as np
from numpy.linalg import inv
from numpy import dot
from numpy import mat
#y=2x
X = mat([1,2,3]).reshape(3,1)
Y = 2*X
#theta = theta - alpha*(theta*X -Y)*X
theta = 1.
alpha = 0.1
for i in range(100):
theta = theta + np.sum(alpha * (Y- dot(X, theta))*X.reshape(1,3))/3.
print(theta)
4.回歸分析實戰
注:從筆記上copy一個網友的數據生成,列數不夠,缺少y和x0部分,進行了修改,后面很多次試驗用梯度下降方法求解thera都是NAN的結果,經過調試,發現可能是小數保留位數太多所致,所以用round函數保留一位小數,做到和講解的數據一致:
data.py:
# Author:WYC
import random
def Y(X0, X1, X2, X3):
return 0.65 * X1 + 0.70 * X2 - 0.55 * X3 + 1.95
def Produce():
filename = 'data.csv'
with open(filename, 'w') as file:
file.write('X0,Y,X1,X2,X3,\n')
for i in range(200):
random.seed()
x0 = i
x1 = round(random.random() * 2,1)
x2 = round(random.random() * 2,1)
x3 = round(random.random() * 2,1)
y = round(Y(x0 , x1, x2, x3),1)
try:
file.write(str(x0) + ',' + str(y) +',' + str(x1) + ',' + str(x2) + ',' + str(x3) + '\n')
except e:
print ('Write Error')
print (str(e))
if __name__ == '__main__':
Produce()
#打印csv中的數據格式,后面幾行可以不要
import pandas as pd
dataset = pd.read_csv('data.csv')
print(dataset)
獲得x
獲得y
通過最小二乘法計算thera值
# Author:WYC
import numpy as np
from numpy.linalg import inv
from numpy import dot
from numpy import mat
import pandas as pd
dataset = pd.read_csv('data.csv')
# print(dataset)
temp = dataset.iloc[:, 2:5]
temp['X0'] = 1
X = temp.iloc[:, [3, 0, 1, 2]]
# print(X)
# Y = dataset.iloc[:,1]
# print(Y)
Y = dataset.iloc[:,1].values.reshape(200,1)#Y需要轉置
# # 通過最小二乘法(向量法)算theta
theta = dot(dot(inv(dot(X.T, X)),X.T), Y)
print(theta)
通過梯度下降法計算thera值
pratice4.py全部代碼如下:
# Author:WYC
import numpy as np
from numpy.linalg import inv
from numpy import dot
from numpy import mat
import pandas as pd
dataset = pd.read_csv('data.csv')
# print(dataset)
temp = dataset.iloc[:, 2:5]
temp['X0'] = 1
X = temp.iloc[:, [3, 0, 1, 2]]
# print(X)
# Y = dataset.iloc[:,1]
# print(Y)
Y = dataset.iloc[:,1].values.reshape(200,1)#Y需要轉置
# # 通過最小二乘法(向量法)算theta
theta = dot(dot(inv(dot(X.T, X)),X.T), Y)
print(theta)
# 通過梯度下降方法算theta
theta = np.array([1., 1., 1., 1.]).reshape(4, 1)
alpha = 0.1
temp = theta #使用緩存,使得梯度下降的時候更新
#200一般是lenth(Y)得到
# X0 = X.iloc[:, 0].reshape(200, 1)
# X1 = X.iloc[:, 1].reshape(200, 1)
# X2 = X.iloc[:, 2].reshape(200, 1)
# X3 = X.iloc[:, 3].reshape(200, 1)
# reshape 運行報錯的話,是因為在pandas里面已經過時
X0 = X.iloc[:, 0].values.reshape(200, 1)
X1 = X.iloc[:, 1].values.reshape(200, 1)
X2 = X.iloc[:, 2].values.reshape(200, 1)
X3 = X.iloc[:, 3].values.reshape(200, 1)
# 同步更新
for i in range(1000):
temp[0] = theta[0] + alpha*np.sum((Y- dot(X, theta))*X0)/200.
temp[1] = theta[1] + alpha*np.sum((Y- dot(X, theta))*X1)/200.
temp[2] = theta[2] + alpha*np.sum((Y- dot(X, theta))*X2)/200.
temp[3] = theta[3] + alpha*np.sum((Y- dot(X, theta))*X3)/200.
theta = temp
print(theta)
(完結)