I have tried to use bert model to train on RAFT datasets, the original model went well. But when I tried to add adapter to finetune, it went wrong.
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import datasets
datasets.logging.set_verbosity_error()
# In[2]:
from datasets import get_dataset_config_names
RAFT_TASKS = get_dataset_config_names("ought/raft")
RAFT_TASKS
# In[3]:
from datasets import load_dataset
TASK = "ade_corpus_v2"
raft_dataset = load_dataset("ought/raft", name=TASK)
raft_dataset
# In[5]:
from transformers import AutoTokenizer,Seq2SeqTrainingArguments, TrainerCallback
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
from sklearn.model_selection import train_test_split
X = raft_dataset["train"]['Sentence']
y = raft_dataset["train"]['Label']
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2)
X_train_tokenized = tokenizer(X_train, padding=True, truncation=True, max_length=512)
X_val_tokenized = tokenizer(X_val, padding=True, truncation=True, max_length=512)
# In[19]:
import torch
class Dataset(torch.utils.data.Dataset):
def __init__(self, encodings, labels=None):
self.encodings = encodings
self.labels = labels
def __getitem__(self, idx):
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
if self.labels:
item["labels"] = torch.tensor(self.labels[idx]-1)
return item
def __len__(self):
return len(self.encodings["input_ids"])
train_dataset = Dataset(X_train_tokenized, y_train)
val_dataset = Dataset(X_val_tokenized, y_val)
# In[20]:
# train_dataset[0]
# In[ ]:
from transformers import TrainingArguments, Trainer
from transformers import AutoModelForSequenceClassification,EarlyStoppingCallback
model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)
# In[22]:
from opendelta import Visualization
Visualization(model).structure_graph();
# In[26]: # ***Change here***
from opendelta import LowRankAdapterModel
delta_model1 = LowRankAdapterModel(backbone_model=model, modified_modules=['attention'])
# delta_model1.freeze_module(set_state_dict = True)
delta_model1.log(delta_ratio=True, trainable_ratio=True, visualization=True)
# In[29]:
def compute_metrics(p):
pred, labels = p
pred = np.argmax(pred, axis=1)
accuracy = accuracy_score(y_true=labels, y_pred=pred)
recall = recall_score(y_true=labels, y_pred=pred)
precision = precision_score(y_true=labels, y_pred=pred)
f1 = f1_score(y_true=labels, y_pred=pred)
return {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1}
# Define Trainer
args = TrainingArguments(
output_dir="output",
evaluation_strategy="steps",
eval_steps=500,
per_device_train_batch_size=8,
per_device_eval_batch_size=8,
num_train_epochs=3,
seed=0,
load_best_model_at_end=True,
)
trainer = Trainer(
model=delta_model1,
# model=model,
args=args,
train_dataset=train_dataset,
eval_dataset=val_dataset,
compute_metrics=compute_metrics,
callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],
)
# Train pre-trained model
trainer.train()
# TrainOutput(global_step=15, training_loss=0.5652575810750325, metrics={'train_runtime': 11.1754, 'train_samples_per_second': 10.738, 'train_steps_per_second': 1.342, 'total_flos': 4563332366400.0, 'train_loss': 0.5652575810750325, 'epoch': 3.0})