Sorry for not providing the code as I was not implemented the code.
My implementation is:
from transformers import AutoModelForSequenceClassification, AutoConfig
from peft import PeftModelForSequenceClassification, get_peft_config
config = AutoConfig.from_pretrained("google/muril-base-cased")
config = {
"peft_type": "PREFIX_TUNING",
"task_type": "SEQ_CLS",
"inference_mode": False,
"num_virtual_tokens": 20,
"token_dim": 768,
"num_transformer_submodules": 1,
"num_attention_heads": 12,
"num_layers": 12,
"encoder_hidden_size": 768,
"prefix_projection": False,
}
peft_config = get_peft_config(config)
model = AutoModelForSequenceClassification.from_pretrained(PRE_TRAINED_MODEL_NAME, num_labels=len(class_names))
peft_model = PeftModelForSequenceClassification(model, peft_config)
peft_model = peft_model.to(device)
My training function is:
def train_epoch(model, data_loader, loss_fn, optimizer, device, scheduler, n_examples):
model = model.train()
losses = []
correct_predictions = 0
progress_bar = tqdm(range(num_training_steps))
for d in data_loader:
input_ids = d["input_ids"].to(device)
attention_mask = d["attention_mask"].to(device)
targets = d["targets"].to(device)
outputs = model(
input_ids=input_ids,
attention_mask=attention_mask
)
outputs = F.softmax(outputs.logits, dim=-1)
_, preds = torch.max(outputs, dim=1)
loss = loss_fn(outputs, targets) #.unsqueeze(1))
correct_predictions += torch.sum(preds == targets)
losses.append(loss.cpu().detach())
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
optimizer.step()
scheduler.step()
optimizer.zero_grad()
progress_bar.update(EPOCHS)
return correct_predictions.double() / n_examples, np.mean(losses)
Hyperparameters setup
patience = 10
optimizer = AdamW(model.parameters(), lr=2e-5, correct_bias=False)
num_training_steps = EPOCHS * len(train_data_loader)
scheduler = get_linear_schedule_with_warmup(
optimizer,
num_warmup_steps=0,
num_training_steps=num_training_steps
)
loss_fn = torch.nn.CrossEntropyLoss(weight=class_weights).to(device)
Why the training is stuck to 0.632 validation accuracy?
Am I doing something wrong?
Epoch 1/100
------------------------------
100%|ββββββββββ| 36600/36600 [01:28<00:00, 413.98it/s]Train loss 0.6931820511817932 accuracy 0.5681176772427948
Val loss 0.6930877877318341 accuracy 0.6320109439124487
**********
Best model found in Epoch 1 with val_loss 0.6930877877318341
**********
Epoch 2/100
------------------------------
100%|ββββββββββ| 36600/36600 [01:29<00:00, 407.90it/s]Train loss 0.6931309700012207 accuracy 0.6001881467544685
Val loss 0.6930594962576161 accuracy 0.6320109439124487
**********
Best model found in Epoch 2 with val_loss 0.6930594962576161
**********
Epoch 3/100
------------------------------
100%|ββββββββββ| 36600/36600 [01:29<00:00, 408.18it/s]Train loss 0.6931337714195251 accuracy 0.6069443256649277
Val loss 0.6930221902287524 accuracy 0.6320109439124487
**********
Best model found in Epoch 3 with val_loss 0.6930221902287524
**********
Epoch 4/100
------------------------------
72%|ββββββββ | 26400/36600 [01:04<00:24, 408.29it/s]Train loss 0.6931185126304626 accuracy 0.6079705806893013
Val loss 0.6930161818214085 accuracy 0.6320109439124487
**********
Best model found in Epoch 4 with val_loss 0.6930161818214085
**********
Epoch 5/100
------------------------------
100%|ββββββββββ| 36600/36600 [01:29<00:00, 408.66it/s]Train loss 0.6931254863739014 accuracy 0.6085692294535192
Val loss 0.6930019298325414 accuracy 0.6320109439124487
**********
Best model found in Epoch 5 with val_loss 0.6930019298325414
**********