I use t5-small to train a model,want to finish the English to German Bidirectional translation.,but i have met some problems.
I use the wmt14 dataset to train this model, but itās training loss decline too low, i donāt konw why.
*import random
def preprocess_data(examples):
inputs = []
targets = []
for translation in examples['translation']:
en_text = translation['en']
de_text = translation['de']
if random.random() < 0.5:
inputs.append(f"translate English to German: {en_text}")
targets.append(de_text)
else:
inputs.append(f"translate German to English: {de_text}")
targets.append(en_text)
return {'input_text': inputs, 'target_text': targets}*
*def tokenize_data(examples):
model_inputs = tokenizer(examples['input_text'], max_length=128, truncation=True, padding='max_length',return_tensors='pt')
with tokenizer.as_target_tokenizer():
labels = tokenizer(examples['target_text'], max_length=128, truncation=True, padding='max_length',return_tensors='pt')
model_inputs['labels'] = labels['input_ids']
return model_inputs*
from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer
device = torch.device(ācudaā if torch.cuda.is_available() else ācpuā)
print(f"Using device: {device}")
model.to(device)
training_args = Seq2SeqTrainingArguments(
output_dir=ā./results2ā,
num_train_epochs=20,
per_device_train_batch_size=128,
per_device_eval_batch_size=128,
save_steps=2000,
save_total_limit=10,
eval_strategy=āstepsā,
eval_steps=200,
warmup_steps=200,
weight_decay=0.01,
logging_dir=ā./logsā,
logging_steps=10,
predict_with_generate=True
)
trainer = Seq2SeqTrainer(
model=model,
args=training_args,
tokenizer=tokenizer,
train_dataset=train_dataset,
eval_dataset=test_dataset
)