Hello,
I have followed the transformers tutorial by @nielsr to fine tune TrOCR on a pre-trained model. Link: Transformers-Tutorials/Fine_tune_TrOCR_on_IAM_Handwriting_Database_using_native_PyTorch.ipynb at master 路 NielsRogge/Transformers-Tutorials 路 GitHub
I managed to get it working, but the only evalute-metric i get is loss. Is it possible to get more metrics, like accuracy, F1, mAP, recall, etc? How do i do that?
Is it also possible to save these metrics from the training/evaluation into e.g. a xlsx file?
This is my code:
from transformers import VisionEncoderDecoderModel
import torch
from dataframe import create_dataframe, split_dataframe
from dataset import IAMDataset
from transformers import TrOCRProcessor
from torch.utils.data import DataLoader
from datasets import load_metric
import evaluate
# from transformers import AdamW
import torch.optim as optim
from tqdm.auto import tqdm
####################
# Prepare the data
####################
TrOCR_box_path = '/imagesPath'
df = create_dataframe()
train_df, test_df = split_dataframe(df)
# initialize the processor and model
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
# create the dataset for training and evaluation
train_dataset = IAMDataset(root_dir=TrOCR_box_path, df=train_df, processor=processor)
eval_dataset = IAMDataset(root_dir=TrOCR_box_path, df=test_df, processor=processor)
# create the dataloaders
train_dataloader = DataLoader(train_dataset, batch_size=4, shuffle=True)
eval_dataloader = DataLoader(eval_dataset, batch_size=4)
####################
# Load the model
####################
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-stage1")
model.to(device)
# set special tokens used for creating the decoder_input_ids from the labels
model.config.decoder_start_token_id = processor.tokenizer.cls_token_id
model.config.pad_token_id = processor.tokenizer.pad_token_id
# make sure vocab size is set correctly
model.config.vocab_size = model.config.decoder.vocab_size
# set beam search parameters
model.config.eos_token_id = processor.tokenizer.sep_token_id
model.config.max_length = 64
model.config.early_stopping = True
model.config.no_repeat_ngram_size = 3
model.config.length_penalty = 2.0
model.config.num_beams = 4
####################
# Train the model
# We will evaluate the model on the Character Error Rate (CER), which is available in HuggingFace Datasets.
####################
# cer_metric = load_metric("cer")
cer_metric = evaluate.load("cer")
def compute_cer(pred_ids, label_ids):
pred_str = processor.batch_decode(pred_ids, skip_special_tokens=True)
label_ids[label_ids == -100] = processor.tokenizer.pad_token_id
label_str = processor.batch_decode(label_ids, skip_special_tokens=True)
cer = cer_metric.compute(predictions=pred_str, references=label_str)
return cer
optimizer = optim.AdamW(model.parameters(), lr=5e-5)
for epoch in range(2): # loop over the dataset multiple times
# train
model.train()
train_loss = 0.0
for batch in tqdm(train_dataloader):
# get the inputs
for k,v in batch.items():
batch[k] = v.to(device)
# forward + backward + optimize
outputs = model(**batch)
loss = outputs.loss
loss.backward()
optimizer.step()
optimizer.zero_grad()
train_loss += loss.item()
print(f"Loss after epoch {epoch}:", train_loss/len(train_dataloader))
# evaluate
model.eval()
valid_cer = 0.0
with torch.no_grad():
for batch in tqdm(eval_dataloader):
# run batch generation
outputs = model.generate(batch["pixel_values"].to(device))
# compute metrics
cer = compute_cer(pred_ids=outputs, label_ids=batch["labels"])
valid_cer += cer
print("Validation CER:", valid_cer / len(eval_dataloader))
model.save_pretrained("my-model")