import torch
from torch import nn
import numpy as np
import matplotlib.pyplot as plt# torch.manual_seed(1) # reproducible# Hyper Parameters
INPUT_SIZE =1# rnn input size / image width
LR =0.02# learning rateclassRNN(nn.Module):def__init__(self):super(RNN, self).__init__()self.rnn = nn.RNN(input_size=1,hidden_size=32,# rnn hidden unitnum_layers=1,# number of rnn layerbatch_first=True,# input & output will has batch size as 1s dimension. e.g. (batch, time_step, input_size))self.out = nn.Linear(32,1)defforward(self, x, h_state):# x (batch, time_step, input_size)# h_state (n_layers, batch, hidden_size)# r_out (batch, time_step, output_size)r_out, h_state = self.rnn(x, h_state)outs =[]# this is where you can find torch is dynamicfor time_step inrange(r_out.size(1)):# calculate output for each time stepouts.append(self.out(r_out[:, time_step,:]))return torch.stack(outs, dim=1), h_staternn = RNN()print(rnn)optimizer = torch.optim.Adam(rnn.parameters(), lr=LR)# optimize all cnn parameters
loss_func = nn.MSELoss()# the target label is not one-hottedh_state =None# for initial hidden stateplt.figure(1, figsize=(12,5))
plt.ion()# continuously plot######################## Below is different ######################################### static time steps ########### for step in range(60):# start, end = step * np.pi, (step+1)*np.pi # time steps# # use sin predicts cos# steps = np.linspace(start, end, 10, dtype=np.float32)################ dynamic time steps #########
step =0for i inrange(60):dynamic_steps = np.random.randint(1,4)# has random time stepsstart, end = step * np.pi,(step + dynamic_steps)* np.pi # different time steps lengthstep += dynamic_steps# use sin predicts cossteps = np.linspace(start, end,10* dynamic_steps, dtype=np.float32)####################### Above is different ###########################print(len(steps))# print how many time step feed to RNNx_np = np.sin(steps)# float32 for converting torch FloatTensory_np = np.cos(steps)x = torch.from_numpy(x_np[np.newaxis,:, np.newaxis])# shape (batch, time_step, input_size)y = torch.from_numpy(y_np[np.newaxis,:, np.newaxis])prediction, h_state = rnn(x, h_state)# rnn output# !! next step is important !!h_state = h_state.data # repack the hidden state, break the connection from last iterationloss = loss_func(prediction, y)# cross entropy lossoptimizer.zero_grad()# clear gradients for this training steploss.backward()# backpropagation, compute gradientsoptimizer.step()# apply gradients# plottingplt.plot(steps, y_np.flatten(),'r-')plt.plot(steps, prediction.data.numpy().flatten(),'b-')plt.draw()plt.pause(0.05)plt.ioff()
plt.show()
5.3過擬合Dropout
import torch
import matplotlib.pyplot as plt# torch.manual_seed(1) # reproducibleN_SAMPLES =20
N_HIDDEN =300# training data
x = torch.unsqueeze(torch.linspace(-1,1, N_SAMPLES),1)
y = x +0.3*torch.normal(torch.zeros(N_SAMPLES,1), torch.ones(N_SAMPLES,1))# test data
test_x = torch.unsqueeze(torch.linspace(-1,1, N_SAMPLES),1)
test_y = test_x +0.3*torch.normal(torch.zeros(N_SAMPLES,1), torch.ones(N_SAMPLES,1))# show data
plt.scatter(x.data.numpy(), y.data.numpy(), c='magenta', s=50, alpha=0.5, label='train')
plt.scatter(test_x.data.numpy(), test_y.data.numpy(), c='cyan', s=50, alpha=0.5, label='test')
plt.legend(loc='upper left')
plt.ylim((-2.5,2.5))
plt.show()net_overfitting = torch.nn.Sequential(torch.nn.Linear(1, N_HIDDEN),torch.nn.ReLU(),torch.nn.Linear(N_HIDDEN, N_HIDDEN),torch.nn.ReLU(),torch.nn.Linear(N_HIDDEN,1),)net_dropped = torch.nn.Sequential(torch.nn.Linear(1, N_HIDDEN),torch.nn.Dropout(0.5),# drop 50% of the neurontorch.nn.ReLU(),torch.nn.Linear(N_HIDDEN, N_HIDDEN),torch.nn.Dropout(0.5),# drop 50% of the neurontorch.nn.ReLU(),torch.nn.Linear(N_HIDDEN,1),)print(net_overfitting)# net architectureprint(net_dropped)optimizer_ofit = torch.optim.Adam(net_overfitting.parameters(), lr=0.01)
optimizer_drop = torch.optim.Adam(net_dropped.parameters(), lr=0.01)
loss_func = torch.nn.MSELoss()plt.ion()# something about plottingfor t inrange(500):pred_ofit = net_overfitting(x)pred_drop = net_dropped(x)loss_ofit = loss_func(pred_ofit, y)loss_drop = loss_func(pred_drop, y)optimizer_ofit.zero_grad()optimizer_drop.zero_grad()loss_ofit.backward()loss_drop.backward()optimizer_ofit.step()optimizer_drop.step()if t %10==0:# change to eval mode in order to fix drop out effectnet_overfitting.eval()net_dropped.eval()# parameters for dropout differ from train mode一旦我們用測試集進行結果測試的時候,一定要使用net.eval()把dropout關掉# plottingplt.cla()test_pred_ofit = net_overfitting(test_x)test_pred_drop = net_dropped(test_x)plt.scatter(x.data.numpy(), y.data.numpy(), c='magenta', s=50, alpha=0.3, label='train')plt.scatter(test_x.data.numpy(), test_y.data.numpy(), c='cyan', s=50, alpha=0.3, label='test')plt.plot(test_x.data.numpy(), test_pred_ofit.data.numpy(),'r-', lw=3, label='overfitting')plt.plot(test_x.data.numpy(), test_pred_drop.data.numpy(),'b--', lw=3, label='dropout(50%)')plt.text(0,-1.2,'overfitting loss=%.4f'% loss_func(test_pred_ofit, test_y).data.numpy(), fontdict={'size':20,'color':'red'})plt.text(0,-1.5,'dropout loss=%.4f'% loss_func(test_pred_drop, test_y).data.numpy(), fontdict={'size':20,'color':'blue'})plt.legend(loc='upper left'); plt.ylim((-2.5,2.5));plt.pause(0.1)# change back to train modenet_overfitting.train()net_dropped.train()plt.ioff()
plt.show()
5.4批標準化
import torch
from torch import nn
from torch.nn import init
import torch.utils.data as Data
import matplotlib.pyplot as plt
import numpy as np# torch.manual_seed(1) # reproducible# np.random.seed(1)# Hyper parameters
N_SAMPLES =2000
BATCH_SIZE =64
EPOCH =12
LR =0.03
N_HIDDEN =8
ACTIVATION = torch.tanh
B_INIT =-0.2# use a bad bias constant initializer# training data
x = np.linspace(-7,10, N_SAMPLES)[:, np.newaxis]
noise = np.random.normal(0,2, x.shape)
y = np.square(x)-5+ noise# test data
test_x = np.linspace(-7,10,200)[:, np.newaxis]
noise = np.random.normal(0,2, test_x.shape)
test_y = np.square(test_x)-5+ noisetrain_x, train_y = torch.from_numpy(x).float(), torch.from_numpy(y).float()
test_x = torch.from_numpy(test_x).float()
test_y = torch.from_numpy(test_y).float()train_dataset = Data.TensorDataset(train_x, train_y)
train_loader = Data.DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2,)# show data
plt.scatter(train_x.numpy(), train_y.numpy(), c='#FF9359', s=50, alpha=0.2, label='train')
plt.legend(loc='upper left')classNet(nn.Module):def__init__(self, batch_normalization=False):super(Net, self).__init__()self.do_bn = batch_normalizationself.fcs =[]self.bns =[]self.bn_input = nn.BatchNorm1d(1, momentum=0.5)# for input datafor i inrange(N_HIDDEN):# build hidden layers and BN layersinput_size =1if i ==0else10fc = nn.Linear(input_size,10)setattr(self,'fc%i'% i, fc)# IMPORTANT set layer to the Moduleself._set_init(fc)# parameters initializationself.fcs.append(fc)if self.do_bn:bn = nn.BatchNorm1d(10, momentum=0.5)setattr(self,'bn%i'% i, bn)# IMPORTANT set layer to the Moduleself.bns.append(bn)self.predict = nn.Linear(10,1)# output layerself._set_init(self.predict)# parameters initializationdef_set_init(self, layer):init.normal_(layer.weight, mean=0., std=.1)init.constant_(layer.bias, B_INIT)defforward(self, x):pre_activation =[x]if self.do_bn: x = self.bn_input(x)# input batch normalizationlayer_input =[x]for i inrange(N_HIDDEN):x = self.fcs[i](x)pre_activation.append(x)if self.do_bn: x = self.bns[i](x)# batch normalizationx = ACTIVATION(x)layer_input.append(x)out = self.predict(x)return out, layer_input, pre_activationnets =[Net(batch_normalization=False), Net(batch_normalization=True)]# print(*nets) # print net architectureopts =[torch.optim.Adam(net.parameters(), lr=LR)for net in nets]loss_func = torch.nn.MSELoss()defplot_histogram(l_in, l_in_bn, pre_ac, pre_ac_bn):for i,(ax_pa, ax_pa_bn, ax, ax_bn)inenumerate(zip(axs[0,:], axs[1,:], axs[2,:], axs[3,:])):[a.clear()for a in[ax_pa, ax_pa_bn, ax, ax_bn]]if i ==0:p_range =(-7,10);the_range =(-7,10)else:p_range =(-4,4);the_range =(-1,1)ax_pa.set_title('L'+str(i))ax_pa.hist(pre_ac[i].data.numpy().ravel(), bins=10,range=p_range, color='#FF9359', alpha=0.5);ax_pa_bn.hist(pre_ac_bn[i].data.numpy().ravel(), bins=10,range=p_range, color='#74BCFF', alpha=0.5)ax.hist(l_in[i].data.numpy().ravel(), bins=10,range=the_range, color='#FF9359');ax_bn.hist(l_in_bn[i].data.numpy().ravel(), bins=10,range=the_range, color='#74BCFF')for a in[ax_pa, ax, ax_pa_bn, ax_bn]: a.set_yticks(());a.set_xticks(())ax_pa_bn.set_xticks(p_range);ax_bn.set_xticks(the_range)axs[0,0].set_ylabel('PreAct');axs[1,0].set_ylabel('BN PreAct');axs[2,0].set_ylabel('Act');axs[3,0].set_ylabel('BN Act')plt.pause(0.01)if __name__ =="__main__":f, axs = plt.subplots(4, N_HIDDEN +1, figsize=(10,5))plt.ion()# something about plottingplt.show()# traininglosses =[[],[]]# recode loss for two networksfor epoch inrange(EPOCH):print('Epoch: ', epoch)layer_inputs, pre_acts =[],[]for net, l inzip(nets, losses):net.eval()# set eval mode to fix moving_mean and moving_varpred, layer_input, pre_act = net(test_x)l.append(loss_func(pred, test_y).data.item())layer_inputs.append(layer_input)pre_acts.append(pre_act)net.train()# free moving_mean and moving_varplot_histogram(*layer_inputs,*pre_acts)# plot histogramfor step,(b_x, b_y)inenumerate(train_loader):for net, opt inzip(nets, opts):# train for each networkpred, _, _ = net(b_x)loss = loss_func(pred, b_y)opt.zero_grad()loss.backward()opt.step()# it will also learns the parameters in Batch Normalizationplt.ioff()# plot training lossplt.figure(2)plt.plot(losses[0], c='#FF9359', lw=3, label='Original')plt.plot(losses[1], c='#74BCFF', lw=3, label='Batch Normalization')plt.xlabel('step');plt.ylabel('test loss');plt.ylim((0,2000));plt.legend(loc='best')# evaluation# set net to eval mode to freeze the parameters in batch normalization layers[net.eval()for net in nets]# set eval mode to fix moving_mean and moving_varpreds =[net(test_x)[0]for net in nets]plt.figure(3)plt.plot(test_x.data.numpy(), preds[0].data.numpy(), c='#FF9359', lw=4, label='Original')plt.plot(test_x.data.numpy(), preds[1].data.numpy(), c='#74BCFF', lw=4, label='Batch Normalization')plt.scatter(test_x.data.numpy(), test_y.data.numpy(), c='r', s=50, alpha=0.2, label='train')plt.legend(loc='best')plt.show()
鮮為人知的6個黑科技網站Pandas is the go-to Python library for data analysis and manipulation. It provides numerous functions and methods that expedice the data analysis process.Pandas是用于數據分析和處理的Python庫。 它提供了加速數據分析過程的眾多功能和方法…
大熊貓卸妝后數據科學 (Data Science) Pandas is used mainly for reading, cleaning, and extracting insights from data. We will see an advanced use of Pandas which are very important to a Data Scientist. These operations are used to analyze data and manipulate…
數據eda數據科學和機器學習統計 (STATISTICS FOR DATA SCIENCE AND MACHINE LEARNING) Categorical variables are the ones where the possible values are provided as a set of options, it can be pre-defined or open. An example can be the gender of a person. In the …
jdk重啟后步行“永遠不要做出預測,尤其是關于未來的預測。” (KK Steincke) (“Never Make Predictions, Especially About the Future.” (K. K. Steincke)) Does this picture portray a horse or a car? 這張照片描繪的是馬還是汽車? How likely is …
mongodb仲裁者Coming out of college with a background in mathematics, I fell upward into the rapidly growing field of data analytics. It wasn’t until years later that I realized the incredible power that comes with the position. As Uncle Ben told Peter Par…
優化 回歸應用數據科學 (Applied data science) Price and quantity are two fundamental measures that determine the bottom line of every business, and setting the right price is one of the most important decisions a company can make. Under-pricing hurts the co…
大數據數據科學家常用面試題During my time as a Data Scientist, I had the chance to interview my fair share of candidates for data-related roles. While doing this, I started noticing a pattern: some kinds of (simple) mistakes were overwhelmingly frequent amo…
scrapy模擬模擬點擊復雜系統 (Complex Systems) In our daily life, we encounter many complex systems where individuals are interacting with each other such as the stock market or rush hour traffic. Finding appropriate models for these complex systems may give…