bert模型用于二分類問題微調

使用bert-base-chinese預訓練模型對二分類問題進行微調

import pandas as pd
from transformers import BertTokenizerFast, AutoModelForSequenceClassification, Trainer, TrainingArguments
import torchmodel_name = "./bert-base-chinese"
path = "./abuse_22.csv"df = pd.read_csv(path, encoding="utf-8")
texts = df["content"][:1000].tolist()
labels = df["punish_result"][:1000].tolist()
texts = list(map(lambda x: str(x), texts))class Dataset(torch.utils.data.Dataset):def __init__(self, encodings, labels):self.encodings = encodingsself.labels = labelsdef __getitem__(self, idx):item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}item['labels'] = torch.tensor(self.labels[idx])return itemdef __len__(self):return len(self.labels)model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
tokenizer = BertTokenizerFast.from_pretrained(model_name)# 參考這里 https://blog.csdn.net/weixin_42924890/article/details/139269528
train_encodings = tokenizer(texts, truncation=True, padding=True, max_length=512)
encodings = Dataset(train_encodings, labels)args = TrainingArguments(output_dir='./output_dir',evaluation_strategy='epoch',no_cuda=True,num_train_epochs=2,learning_rate=1e-4,weight_decay=1e-2,per_device_eval_batch_size=32,per_device_train_batch_size=32)trainer = Trainer(model=model,args=args,train_dataset=encodings,
)# 開始訓練
trainer.train()

本文來自互聯網用戶投稿，該文觀點僅代表作者本人，不代表本站立場。本站僅提供信息存儲空間服務，不擁有所有權，不承擔相關法律責任。
如若轉載，請注明出處：http://www.pswp.cn/bicheng/18903.shtml
繁體地址，請注明出處：http://hk.pswp.cn/bicheng/18903.shtml
英文地址，請注明出處：http://en.pswp.cn/bicheng/18903.shtml

如若內容造成侵權/違法違規/事實不符，請聯系多彩編程網進行投訴反饋email:809451989@qq.com，一經查實，立即刪除！