BERT 情感分析
一、 數據集加載與模型訓練
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset
import torch
import numpy as np
from sklearn.metrics import accuracy_score
mode_name_or_path = '/root/autodl-tmp/bert-base-uncased'# 1. 加載 SST-2 數據集
dataset = load_dataset("glue", "sst2")
model = BertForSequenceClassification.from_pretrained("bert-base-uncased").to('cuda')
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
model.save_pretrained(mode_name_or_path)
tokenizer.save_pretrained(mode_name_or_path)
# 2. 數據預處理(tokenization)
def preprocess(example):return tokenizer(example["sentence"], truncation=True, padding="max_length", max_length=128)encoded_dataset = dataset.map(preprocess, batched=True)
encoded_dataset = encoded_dataset.rename_column("label", "labels")
encoded_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])# 4. 訓練參數
training_args = TrainingArguments(output_dir="./output",# evaluation_strategy="epoch",per_device_train_batch_size=16,per_device_eval_batch_size=32,num_train_epochs=1,logging_dir="./logs",
)# 5. 定義指標
def compute_metrics(p):preds = np.argmax(p.predictions, axis=1)return {"accuracy": accuracy_score(p.label_ids, preds)}print(encoded_dataset["train"])
# 6. 啟動訓練器
trainer = Trainer(model=model,args=training_args,train_dataset=encoded_dataset["train"],eval_dataset=encoded_dataset["validation"],compute_metrics=compute_metrics,
)trainer.train()
trainer.evaluate()
二、HUGGING FACE 量化
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
from analyze import *# 1. 加載訓練后BERT模型和tokenizer
model_name = "./output/checkpoint-2105/"
tokenizer = AutoTokenizer.from_pretrained(model_name)
original_model = AutoModelForSequenceClassification.from_pretrained(model_name).to('cuda')# 2. 8-bit量化加載模型
quantized_model = AutoModelForSequenceClassification.from_pretrained(model_name,device_map="auto", # 自動分配到可用設備load_in_8bit=True, # 啟用8-bit量化
)
- 比較模型大小
模型大小大幅度降低,降低了近3-4倍;
def print_model_size(model, model_name):param_size = 0for param in model.parameters():param_size += param.nelement() * param.element_size()buffer_size = 0for buffer in model.buffers():buffer_size += buffer.nelement() * buffer.element_size()size_all_mb = (param_size + buffer_size) / 1024**2print(f"{model_name} size: {size_all_mb:.3f}MB")print_model_size(original_model, "Original BERT")
print_model_size(quantized_model, "Quantized 8-bit BERT")
#Original BERT size: 417.655MB
#Quantized 8-bit BERT size: 127.269MB
- 模型精度比較
在大小降低的同時,在驗證集上的精度也大幅度降低;
def evaluate(model, dataset, labels):model.eval()preds = []with torch.no_grad():for i in range(0, len(dataset), 32): # batch size = 32batch = dataset[i:i + 32]input_ids = batch["input_ids"].to(model.device)attention_mask = batch["attention_mask"].to(model.device)outputs = model(input_ids=input_ids, attention_mask=attention_mask)logits = outputs.logitsbatch_preds = torch.argmax(logits, dim=1).cpu()preds.extend(batch_preds.tolist())correct = sum([int(p == t) for p, t in zip(preds, labels)])acc = correct / len(labels)return acc
def preprocess(example):return tokenizer(example["sentence"], truncation=True, padding="max_length", max_length=128)from datasets import load_dataset
dataset = load_dataset("glue", "sst2")
val_dataset = dataset["validation"]encoded_val_dataset = val_dataset.map(preprocess, batched=True)
encoded_val_dataset.set_format(type="torch", columns=["input_ids", "attention_mask"])
labels = torch.tensor(val_dataset["label"])acc_fp32 = evaluate(original_model, encoded_val_dataset, labels)
acc_int8 = evaluate(quantized_model, encoded_val_dataset, labels)print(f"Original FP32 model accuracy: {acc_fp32:.4f}")
print(f"Quantized INT8 model accuracy: {acc_int8:.4f}")#Original FP32 model accuracy: 0.9300
#Quantized INT8 model accuracy: 0.5482
- 量化分析
👉 這種量化方式雖然簡單,但存在一個明顯的問題,這是方式是 HuggingFace 基于 bitsandbytes
庫 實現的輕量量化方式,背后用的是:
bitsandbytes
的8-bit optimizers
- 權重是
FP16 或 INT8
存儲,但不是 PyTorch 的量化張量(QTensor) - 目的是節省 顯存 和 內存
🎯 為此,該方法無法通過調用tensor.q_scale()
, tensor.q_zero_point()
進行逐層分析
三、PYTORCH Eager Mode 量化
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer# 1. 加載原始模型
model_name = "./output/checkpoint-2105/"
model = AutoModelForSequenceClassification.from_pretrained(model_name).eval()
tokenizer = AutoTokenizer.from_pretrained(model_name)# 2. 將模型移到 CPU(Eager 模式量化推薦在 CPU 上執行)
model.to('cpu')# 3. 準備量化配置 (動態量化)
quantized_model = torch.quantization.quantize_dynamic(model,{torch.nn.Linear}, # 指定要量化的模塊類型dtype=torch.qint8 # 量化類型
)
- 量化后大小比較,結果比
huggingface
量化方式大一點
# 大小比較
# Original BERT size: 417.655MB
# Quantized 8-bit BERT size: 127.269MB# 精度比較
# Original FP32 model accuracy: 0.9300
# Quantized INT8 model accuracy: 0.5482 不變
四、PYTORCH EXPORT 量化 (存在bug)
目前的這種量化方式還有bug存在,并且還找不到錯誤,希望有大哥幫助一下,主要的問題是模型可以成功量化,但是量化后的模型推理時會報錯誤,而且量化結果的大小也很奇怪:Original BERT size: 417.655MB ; Quantized 8-bit BERT size: 0.001MB
import torch
from torch.export import export
from torch.ao.quantization.quantize_pt2e import prepare_pt2e, convert_pt2e
from torch.ao.quantization.quantizer.xnnpack_quantizer import get_symmetric_quantization_config
from torch.ao.quantization.quantizer.x86_inductor_quantizer import X86InductorQuantizer
from transformers import AutoModelForSequenceClassification, AutoTokenizer
model_name = "./output/checkpoint-2105/"
# 1. 加載原始模型
model = AutoModelForSequenceClassification.from_pretrained(model_name).eval().cuda()
tokenizer = AutoTokenizer.from_pretrained(model_name)# 2. 準備樣例輸入
example = tokenizer("This movie is great!", return_tensors="pt", padding="max_length", max_length=128)
example = {k: v.cuda() for k, v in example.items()}
example_inputs = (example["input_ids"], example["attention_mask"])# 3. 導出模型
ep = export(model, args=example_inputs,dynamic_shapes=None)
gm = ep.graph_module# 4. 準備量化器
quantizer = X86InductorQuantizer()
quantizer.set_global(get_symmetric_quantization_config(is_per_channel=True))# 5. 插入 observer
prepared = prepare_pt2e(gm, quantizer)
quantity_model = convert_pt2e(prepared)# 報錯信息forward() missing 203 required positional arguments: 'p_bert_embeddings_position_embeddings_weight', 'p_bert_embeddings_layernorm_weight', 'p_bert_embeddings_layernorm_bias',