與經典線性回歸比較
import matplotlib.pyplot as plt
%matplotlib inline
import tensorflow as tf
import numpy as np
from sklearn.linear_model import LinearRegression
#from sklearn.datasets.samples_generator import make_regression
Xdata = np.array([4.0, 4.5, 5.0, 5.5, 6.0, 6.5, 7.0])
Ydata = np.array([33, 42, 45, 51, 53, 61, 62])
plt.figure(figsize=(8,5))
plt.scatter(Xdata,Ydata, s = 80)
plt.xlabel('x', fontsize= 16)
plt.tick_params(labelsize=16)
這些點看來在一條直線上,因此線性回歸可能有用。
構建階段
這些點看來在一條直線上,因此線性回歸可能有用。
構建階段
我們來構建神經網絡。它只有一個神經元,使用線性激活函數。
#定義模型函數
def model(x,w,b):
??? return tf.multiply(x,w)+b
def loss_fun(x,y,w,b):
??? err = model(x,w,b)-y
??? squared_err = tf.square(err)
??? return tf.reduce_mean(squared_err)
def grad(x,y,w,b):
??? with tf.GradientTape() as tape:
??????? loss_ = loss_fun(x,y,w,b)
??? return tape.gradient(loss_,[w,b])
需要一些額外的節點:
- 損失函數?J
- 使損失函數最小的節點
改變數據集的形狀
我們然望?X?和?Y?是一行的張量。我們可以檢查一下
print(Xdata.shape)
print(Ydata.shape)
這不是我們想要的,所以我們要改變它們
x = Xdata.reshape(1,-1)
y = Ydata.reshape(1,-1)
print(x.shape)
print(y.shape)
現在更好了... 為了檢查不同的學習速率我們可以定義函數進行訓練
def run_linear_model(learning_rate, train_epochs, x,y, debug = False):???
??? #開始訓練,輪數為epoch,采用SGD隨機梯度下降優化方法
? w = tf.Variable(np.random.randn(),tf.float32)
? b = tf.Variable(0.0,tf.float32)
? #count = tf.Variable(0.0,tf.float32)
??? #開始訓練,輪數為epoch,采用SGD隨機梯度下降優化方法
? loss = []
? count = 0
? display_count = 10 #控制顯示粒度的參數,每訓練10個樣本輸出一次損失值
? for epoch in range(train_epochs):
??? for xs,ys in zip(x,y):? #for xs,ys in zip(x_data,y_data):
??????? #計算損失,并保存本次損失計算結果
??????? loss_ =loss_fun(xs,ys,w,b)
??????? loss.append(loss_)
??????? #計算當前[w,b]的梯度
??????? delta_w,delta_b = grad(xs,ys,w,b)
??????? change_w = delta_w * learning_rate
??????? change_b = delta_b * learning_rate
??????? w.assign_sub(change_w)
??????? b.assign_sub(change_b)
??????? #訓練步數加1
??????? count = count +1
??????? if count % display_count == 0:
??????????? print('train epoch : ','%02d'%(epoch+1),'step:%03d' % (count),'loss= ','{:.9f}'.format(loss_))
?????? #完成一輪訓練后,畫圖
??? #plt.plot(x,w.numpy() * x +b.numpy())?? #plt.plot(x_data,w.numpy() * x_data +b.numpy())
? return loss
ch = run_linear_model(0.1, 1000, x, y, True)
很有趣... 我們試一下更小的學習速率
ch1 = run_linear_model(1e-3, 1000, x, y, True)
ch2 = run_linear_model(1e-3, 5000, x, y, True)
檢查?J... 一直在變小。
繪制損失函數
plt.rc('font', family='arial')
plt.rc('xtick', labelsize='x-small')
plt.rc('ytick', labelsize='x-small')
plt.tight_layout()
fig = plt.figure(figsize=(10, 7))
ax = fig.add_subplot(1, 1, 1)
ax.plot(ch1, ls='solid', color = 'black')
ax.plot(ch2, ls='solid', color = 'red')
ax.set_xlabel('epochs', fontsize = 16)
ax.set_ylabel('Cost function $J$ (MSE)', fontsize = 16)
plt.xlim(0,1000)
plt.tick_params(labelsize=16)
你看不到有什么區別,我們來放大一下
你看不到有什么區別,我們來放大一下
plt.rc('font', family='arial')
plt.rc('xtick', labelsize='x-small')
plt.rc('ytick', labelsize='x-small')
plt.tight_layout()
fig = plt.figure(figsize=(10, 7))
ax = fig.add_subplot(1, 1, 1)
ax.plot(ch2, ls='solid', color = 'red')
ax.plot(ch1, ls='solid', color = 'black')
ax.set_ylim(3,3.6)
ax.set_xlabel('epochs', fontsize = 16)
ax.set_ylabel('Cost function $J$ (MSE)', fontsize = 16)
plt.xlim(0,5000)
plt.tick_params(labelsize=16)
注意到學習速率越小,收斂速度越慢... 我們試一下快一點的...
ch3 = run_linear_model(1e-2, 5000, x, y, True)
plt.rc('font', family='arial')
plt.rc('xtick', labelsize='x-small')
plt.rc('ytick', labelsize='x-small')
plt.tight_layout()
fig = plt.figure(figsize=(10, 7))
ax = fig.add_subplot(1, 1, 1)
ax.plot(ch3, ls='solid', lw = 3, color = 'blue', label = r"$\gamma = 10^{-2}$ up to 5000 epochs")
ax.plot(ch2, ls='solid', lw = 3, color = 'red', label = r"$\gamma = 10^{-3}$ up to 5000 epochs")
ax.plot(ch1, ls='--', lw = 5, color = 'black',label = r"$\gamma = 10^{-3}$, up to 1000 epochs")
ax.set_ylim(2.6,3.6)
ax.set_xlabel('epochs', fontsize = 16)
ax.set_ylabel('Cost function $J$ (MSE)', fontsize = 16)
plt.xlim(0,5000)
plt.legend(fontsize = 16)
plt.tick_params(labelsize=16)
現在接近扁平...
嘗度找到最佳參數
ch5 = run_linear_model(0.03, 5000, x, y, True)
#構建線性函數的斜率和截距
w = tf.Variable(np.random.randn(),tf.float32)
b = tf.Variable(0.0,tf.float32)
#設置迭代次數和學習率
train_epochs = 5000
learning_rate = 0.03
loss = []
count = 0
display_count = 10 #控制顯示粒度的參數,每訓練10個樣本輸出一次損失值
#開始訓練,輪數為epoch,采用SGD隨機梯度下降優化方法
for epoch in range(train_epochs):
??? for xs,ys in zip(x,y):? #for xs,ys in zip(x_data,y_data):
??????? #計算損失,并保存本次損失計算結果
??????? loss_ =loss_fun(xs,ys,w,b)
??????? loss.append(loss_)
??????? #計算當前[w,b]的梯度
??????? delta_w,delta_b = grad(xs,ys,w,b)
??????? change_w = delta_w * learning_rate
??????? change_b = delta_b * learning_rate
??????? w.assign_sub(change_w)
??????? b.assign_sub(change_b)
??????? #訓練步數加1
??????? count = count +1
??????? if count % display_count == 0:
??????????? print('train epoch : ','%02d'%(epoch+1),'step:%03d' % (count),'loss= ','{:.9f}'.format(loss_))
?????? #完成一輪訓練后,畫圖
#plt.plot(x,w.numpy() * x +b.numpy())?? #plt.plot(x_data,w.numpy() * x_data +b.numpy())
plt.rc('font', family='arial')
plt.rc('xtick', labelsize='x-small')
plt.rc('ytick', labelsize='x-small')
plt.tight_layout()
fig = plt.figure(figsize=(10, 7))
ax = fig.add_subplot(1, 1, 1)
ax.plot(ch5, ls='solid', lw = 3, color = 'green', label = r"$\gamma = 0.03$ up to 5000 epochs")
ax.plot(ch3, ls='solid', lw = 3, color = 'blue', label = r"$\gamma = 10^{-2}$ up to 5000 epochs")
ax.plot(ch2, ls='solid', lw = 3, color = 'red', label = r"$\gamma = 10^{-3}$ up to 5000 epochs")
ax.plot(ch1, ls='--', lw = 5, color = 'black',label = r"$\gamma = 10^{-3}$, up to 1000 epochs")
ax.set_ylim(2.6,3.6)
ax.set_xlabel('epochs', fontsize = 16)
ax.set_ylabel('Cost function $J$ (MSE)', fontsize = 16)
plt.xlim(0,5000)
plt.legend(fontsize = 16)
plt.tick_params(labelsize=16)
看來綠色或藍色線的值是可以達到的... 所以它們是好的候選...
pred_y = model(x,w.numpy(),b.numpy())?
mse_y = tf.reduce_mean(tf.square(pred_y - y))
plt.rc('font', family='arial')
plt.rc('xtick', labelsize='x-small')
plt.rc('ytick', labelsize='x-small')
???
plt.tight_layout()
fig = plt.figure(figsize=(10, 7))
ax = fig.add_subplot(1, 1, 1)
ax.scatter(y, pred_y, lw = 5)
ax.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw = 5)
ax.set_xlabel('Measured Target Value', fontsize = 16)
ax.set_ylabel('Predicted Target Value', fontsize = 16)
plt.tick_params(labelsize=16)
如何找到權重?
通常我們感興趣于線性回歸的參數。使用神經網絡時我們只對預測感興趣,因為參數太多,但看一下如何是從計算圖獲得參數是很有啟發性的。我們的線性方程 為
y=wx1+b
且?w?包含于?tf.Variable
?W
, 且偏置bias在?tf.Variable
?b
里。所以要得到它們我們只要簡單的要求tensorflow評估節點。
參數為
#顯示訓練結果
print('w: ',w.numpy())
print('b: ',b.numpy())
所以我們可以用最佳擬合繪制數據
x_ = np.arange(4, 7, 0.05).reshape(1,-1)
yfit_ = model(x_ ,w.numpy(),b.numpy())
plt.rc('font', family='arial')
plt.rc('xtick', labelsize='x-small')
plt.rc('ytick', labelsize='x-small')
???
fig = plt.figure(figsize=(8, 5))
ax = fig.add_subplot(1, 1, 1)
ax.plot(x_[0], yfit_[0], label = "Linear Regression")
ax.scatter (x,y, color = 'red', s = 80, label = "True Data")
ax.set_xlabel('x', fontsize = 16)
ax.set_ylabel('y', fontsize = 16)
plt.legend(fontsize = 16)
plt.tick_params(labelsize=16)
與經典的線性回歸進行比較
我們比較一下經典線性回歸的結果?sklearn
xt = x.reshape(7,-1)
yt = y.reshape(7,-1)
reg = LinearRegression().fit(xt,yt)
reg.score(xt,yt)
reg.coef_
reg.intercept_
xt_ = x_[0].reshape(60,-1)
yfitsk_ = reg.predict(xt_.reshape(60,-1))
plt.rc('font', family='arial')
plt.rc('xtick', labelsize='x-small')
plt.rc('ytick', labelsize='x-small')
???
fig = plt.figure(figsize=(8, 5))
ax = fig.add_subplot(1, 1, 1)
ax.plot(x_[0], yfit_[0], label = "Linear Regression")
ax.plot(x_[0], yfitsk_, label = "sklearn Linear Regression")
ax.scatter (x,y, color = 'red', s = 80, label = "True Data")
ax.set_xlabel('x', fontsize = 16)
ax.set_ylabel('y', fontsize = 16)
plt.legend(fontsize = 16)
plt.tick_params(labelsize=16)
ch4 = run_linear_model(1e-2, 15000, x, y, True)
#
構建線性函數的斜率和截距
w = tf.Variable(np.random.randn(),tf.float32)
b = tf.Variable(0.0,tf.float32)
#
設置迭代次數和學習率
train_epochs = 15000
learning_rate = 1e-2
loss = []
count = 0
display_count = 10 #
控制顯示粒度的參數,每訓練10個樣本輸出一次損失值
#
開始訓練,輪數為epoch,采用SGD隨機梯度下降優化方法
for epoch in range(train_epochs):
??? for xs,ys in zip(x,y):? #for xs,ys in zip(x_data,y_data):
??????? #
計算損失,并保存本次損失計算結果
??????? loss_ =loss_fun(xs,ys,w,b)
??????? loss.append(loss_)
??????? #
計算當前[w,b]的梯度
??????? delta_w,delta_b = grad(xs,ys,w,b)
??????? change_w = delta_w * learning_rate
??????? change_b = delta_b * learning_rate
??????? w.assign_sub(change_w)
??????? b.assign_sub(change_b)
??????? #
訓練步數加1
??????? count = count +1
??????? if count % display_count == 0:
??????????? print('train epoch : ','%02d'%(epoch+1),'step:%03d' % (count),'loss= ','{:.9f}'.format(loss_))
?????? #
完成一輪訓練后,畫圖
#print(W_, b_)
print('w: ',w.numpy())
print('b: ',b.numpy())
經典線性回歸的結果為
9.5, -2.67857143
所以非常的接近!