import torch
from torch import nn
from torchsummary import summary
#卷積層defconv_block(input_channels,num_channels):net=nn.Sequential(nn.BatchNorm2d(input_channels),nn.ReLU(),nn.Conv2d(input_channels,num_channels,kernel_size=3,padding=1))return net
#過渡層deftransition_block(inputs_channels,num_channels):net=nn.Sequential(nn.BatchNorm2d(inputs_channels),nn.ReLU(),nn.Conv2d(inputs_channels,num_channels,kernel_size=1),nn.AvgPool2d(kernel_size=2,stride=2))return net
#DenseNetBlockclassDenseBlock(nn.Module):def__init__(self, num_convs,input_channels,num_channels):super(DenseBlock,self).__init__()layer=[]for i inrange(num_convs):layer.append(conv_block(num_channels*i+input_channels,num_channels))self.net=nn.Sequential(*layer)defforward(self,X):for blk in self.net:Y=blk(X)X=torch.cat((X,Y),dim=1)return X
b1=nn.Sequential(nn.Conv2d(1,64,kernel_size=7,stride=2,padding=3),nn.BatchNorm2d(64),nn.ReLU(),nn.MaxPool2d(kernel_size=3,stride=2,padding=1))
num_channels,growth_rate=64,32
num_convs_in_dense_block=[4,4,4,4]
blks=[]for i,num_convs inenumerate(num_convs_in_dense_block):blks.append(DenseBlock(num_convs,num_channels,growth_rate))# 上一個稠密塊的輸出通道數num_channels+=num_convs*growth_rate# 在稠密塊之間添加一個轉換層,使通道數量減半if i!=len(num_convs_in_dense_block)-1:blks.append(transition_block(num_channels,num_channels//2))num_channels=num_channels//2
model=nn.Sequential(b1,*blks,nn.BatchNorm2d(num_channels),nn.ReLU(),nn.AdaptiveAvgPool2d((1,1)),nn.Flatten(),nn.Linear(num_channels,10))
device=torch.device("cuda"if torch.cuda.is_available()else'cpu')
model.to(device)
summary(model,input_size=(1,224,224),batch_size=64)
17.2 DenseNet網絡實現Fashion-Mnist分類
#################################################################################################################DenseNet################################################################################################################import torch
import torchvision
from torch import nn
import matplotlib.pyplot as plt
from torchvision.transforms import transforms
from torch.utils.data import DataLoader
from tqdm import tqdm
from sklearn.metrics import accuracy_score
from torch.nn import functional as F
plt.rcParams['font.family']=['Times New Roman']classReshape(torch.nn.Module):defforward(self,x):return x.view(-1,1,28,28)#[bs,1,28,28]defplot_metrics(train_loss_list, train_acc_list, test_acc_list, title='Training Curve'):epochs =range(1,len(train_loss_list)+1)plt.figure(figsize=(4,3))plt.plot(epochs, train_loss_list, label='Train Loss')plt.plot(epochs, train_acc_list, label='Train Acc',linestyle='--')plt.plot(epochs, test_acc_list, label='Test Acc', linestyle='--')plt.xlabel('Epoch')plt.ylabel('Value')plt.title(title)plt.legend()plt.grid(True)plt.tight_layout()plt.show()deftrain_model(model,train_data,test_data,num_epochs):train_loss_list =[]train_acc_list =[]test_acc_list =[]for epoch inrange(num_epochs):total_loss=0total_acc_sample=0total_samples=0loop=tqdm(train_data,desc=f"EPOCHS[{epoch+1}/{num_epochs}]")for X,y in loop:#X=X.reshape(X.shape[0],-1)#print(X.shape)X=X.to(device)y=y.to(device)y_hat=model(X)loss=CEloss(y_hat,y)optimizer.zero_grad()loss.backward()optimizer.step()#loss累加total_loss+=loss.item()*X.shape[0]y_pred=y_hat.argmax(dim=1).detach().cpu().numpy()y_true=y.detach().cpu().numpy()total_acc_sample+=accuracy_score(y_pred,y_true)*X.shape[0]#保存樣本數total_samples+=X.shape[0]test_acc_samples=0test_samples=0for X,y in test_data:X=X.to(device)y=y.to(device)#X=X.reshape(X.shape[0],-1)y_hat=model(X)y_pred=y_hat.argmax(dim=1).detach().cpu().numpy()y_true=y.detach().cpu().numpy()test_acc_samples+=accuracy_score(y_pred,y_true)*X.shape[0]#保存樣本數test_samples+=X.shape[0]avg_train_loss=total_loss/total_samplesavg_train_acc=total_acc_sample/total_samplesavg_test_acc=test_acc_samples/test_samplestrain_loss_list.append(avg_train_loss)train_acc_list.append(avg_train_acc)test_acc_list.append(avg_test_acc)print(f"Epoch {epoch+1}: Loss: {avg_train_loss:.4f},Trian Accuracy: {avg_train_acc:.4f},test Accuracy: {avg_test_acc:.4f}")plot_metrics(train_loss_list, train_acc_list, test_acc_list)return model
definit_weights(m):iftype(m)== nn.Linear ortype(m)== nn.Conv2d:nn.init.xavier_uniform_(m.weight)#################################################################################################################DenseNet#################################################################################################################卷積層defconv_block(input_channels,num_channels):net=nn.Sequential(nn.BatchNorm2d(input_channels),nn.ReLU(),nn.Conv2d(input_channels,num_channels,kernel_size=3,padding=1))return net
#過渡層deftransition_block(inputs_channels,num_channels):net=nn.Sequential(nn.BatchNorm2d(inputs_channels),nn.ReLU(),nn.Conv2d(inputs_channels,num_channels,kernel_size=1),nn.AvgPool2d(kernel_size=2,stride=2))return net
#DenseNetBlockclassDenseBlock(nn.Module):def__init__(self, num_convs,input_channels,num_channels):super(DenseBlock,self).__init__()layer=[]for i inrange(num_convs):layer.append(conv_block(num_channels*i+input_channels,num_channels))self.net=nn.Sequential(*layer)defforward(self,X):for blk in self.net:Y=blk(X)X=torch.cat((X,Y),dim=1)return X
b1=nn.Sequential(nn.Conv2d(1,64,kernel_size=7,stride=2,padding=3),nn.BatchNorm2d(64),nn.ReLU(),nn.MaxPool2d(kernel_size=3,stride=2,padding=1))
num_channels,growth_rate=64,32
num_convs_in_dense_block=[4,4,4,4]
blks=[]for i,num_convs inenumerate(num_convs_in_dense_block):blks.append(DenseBlock(num_convs,num_channels,growth_rate))# 上一個稠密塊的輸出通道數num_channels+=num_convs*growth_rate# 在稠密塊之間添加一個轉換層,使通道數量減半if i!=len(num_convs_in_dense_block)-1:blks.append(transition_block(num_channels,num_channels//2))num_channels=num_channels//2################################################################################################################
transforms=transforms.Compose([transforms.Resize(96),transforms.ToTensor(),transforms.Normalize((0.5,),(0.5,))])#第一個是mean,第二個是std
train_img=torchvision.datasets.FashionMNIST(root="./data",train=True,transform=transforms,download=True)
test_img=torchvision.datasets.FashionMNIST(root="./data",train=False,transform=transforms,download=True)
train_data=DataLoader(train_img,batch_size=128,num_workers=4,shuffle=True)
test_data=DataLoader(test_img,batch_size=128,num_workers=4,shuffle=False)################################################################################################################
device=torch.device("cuda"if torch.cuda.is_available()else'cpu')
model=nn.Sequential(b1,*blks,nn.BatchNorm2d(num_channels),nn.ReLU(),nn.AdaptiveAvgPool2d((1,1)),nn.Flatten(),nn.Linear(num_channels,10))
model.to(device)
model.apply(init_weights)
optimizer=torch.optim.SGD(model.parameters(),lr=0.01,momentum=0.9)
CEloss=nn.CrossEntropyLoss()
model=train_model(model,train_data,test_data,num_epochs=10)################################################################################################################
Cursor:代碼編寫的智能伙伴?Cursor 是 Anysphere 公司推出的一款 AI 編程工具,它基于微軟開源代碼編輯器 VS Code 開發,將 AI 技術深度整合到開發人員的工作流程中。Cursor 的功能十分強大,不僅能夠自動用純英文編寫代碼…
發表:EMNLP_FINDING_2024
機構:Shanghai Jiao Tong University
連接:LaCo: Large Language Model Pruning via Layer Collapse - ACL Anthology
代碼:https://github.com/yangyifei729/LaCo
Abstract
基于 Transformer 的大語…
系列文章 序號文章名稱1Spring AI 項目實戰(一):Spring AI 核心模塊入門2Spring AI 項目實戰(二):Spring Boot + AI + DeepSeek 深度實戰(附完整源碼)3Spring AI 項目實戰(三):Spring Boot + AI + DeepSeek 打造智能客服系統(附完整源碼)4