Hi guys,
I am trying to use the T5-small model and fine-tune it on several customize datasets for translation tasks.
I have 10 customized datasets and each training set has 200-500 sentences (14000 tokens).
Each testing set has 400-1000 sentences (28000 tokens).
I use the pre-trained T5-small model to evaluate the bleu score, let’s say I got 10 score-0.
Then I fine-tune the training set and evaluate the bleu score, let’s say I got 10 score-1.
The weird thing is that score-1 < score-0 in most of the 10 cases.
And I have tried different learning rates, but it doesn’t help.
I have read the T5 Fine-tuning Tips (T5 Finetuning Tips) and tried their suggestions but also doesn’t help.
Here is my code:
def get_data(split, batchsz, domain=None):
if split == 'train_teacher':
path = './data/aggs/all.json'
temp_data = load_dataset('json', data_files=path)['train']
temp_loader = DataLoader(temp_data, shuffle=True, batch_size=batchsz)
return temp_loader
if split == 'FT_target':
path = './data/test_support/{}.json'.format(domain)
temp_data = load_dataset('json', data_files=path)['train']
temp_loader = DataLoader(temp_data, shuffle=True, batch_size=batchsz)
return temp_loader
if split == 'evaluation':
path = './data/test_query/{}.json'.format(domain)
temp_data = load_dataset('json', data_files=path)['train']
temp_loader = DataLoader(temp_data, shuffle=True, batch_size=batchsz)
return temp_loader
def compute_bleu(metric_name, y_pred, y_true):
metric = load_metric(metric_name)
metric.add_batch(predictions=y_pred, references=y_true)
report = metric.compute()
if metric_name == 'bleu':
return report['bleu'] * 100
if metric_name == 'sacrebleu':
return report['score']
def encode_sentences(is_src, batch):
# Prepare and tokenize source and target sentences
prefix = "Translate English to German: "
encoded_sentences = tokenizer(
[prefix + line for line in batch[args().src_language]] if is_src
else [line for line in batch[args().tgt_language]],
max_length=200,
padding=True,
truncation=True,
return_tensors='pt',
add_special_tokens=True).input_ids.to(device)
return encoded_sentences
def evaluation(epoch):
model.eval()
y_true = []
y_pred = []
eva_loader = get_data('evaluation', args().batchsz, domain)
for i, batch in enumerate(eva_loader):
# Prepare and tokenize the source sentences
encoded_src = encode_sentences(is_src=True, batch=batch)
# Translate and decode the inputs
outputs = model.generate(encoded_src, max_length=200)
batch_pred = tokenizer.batch_decode(outputs, skip_special_tokens=True)
# Concatenate the translated and reference sentences
for sentence in batch[args().tgt_language]:
if args().metric == 'bleu':
sentence = tokenizer.tokenize(sentence)
y_true.append([sentence])
for sentence in batch_pred:
if args().metric == 'bleu':
sentence = tokenizer.tokenize(sentence)
y_pred.append(sentence)
bleu = compute_bleu(args().metric, y_pred, y_true)
now = datetime.now()
print('Time: {}:{}, Domain: {}, {} score: {:.2f}'.format(now.hour, now.minute, domain, args().metric, bleu))
def fine_tune_target():
model.train()
ft_loader = get_data(split='FT_target', batchsz=args().batchsz, domain=domain)
optimizer = get_optimizer(model, args().optimizer_name, args().tgt_ft_lr)
for epoch in range(args().tgt_ft_epochs):
loss = 0
for i, batch in enumerate(ft_loader):
# Prepare the source and target data corpus
encoded_src = encode_sentences(is_src=True, batch=batch)
encoded_tgt = encode_sentences(is_src=False, batch=batch)
prediction = model(input_ids=encoded_src, labels=encoded_tgt)
# Compute the loss
train_loss = prediction.loss
loss += train_loss.item()
# Update model parameters
optimizer.zero_grad()
train_loss.backward()
optimizer.step()
loss = loss / len(ft_loader)
now = datetime.now()
print('Time: {}:{}, Epoch: {}, Fine Tune Loss: {:.4f}'.format(now.hour, now.minute, epoch + 1, loss))
evaluation(epoch + 1)
now = datetime.now()
print('Time: {}:{}, {} Done'.format(now.hour, now.minute, domain))
model = T5ForConditionalGeneration.from_pretrained("t5-small").to(device)
tokenizer = T5Tokenizer.from_pretrained("t5-small")
domain = 'covid'
fine_tune_target()
Hyperparameters:
optimizer: adafactor
learning rate: 5e-4
fine-tune epochs: 20
batch size: 16