Hello.
I try to train RoBERTa from scratch. There are the code and printed log below. From the code, I can check the mlm loss, but I couldn’t find options for mlm accuracy. Is there anything I can do for check mlm acc?
from transformers import RobertaConfig
config = RobertaConfig(
num_hidden_layers=4,
hidden_size=512,
hidden_dropout_prob=0.1,
num_attention_heads=8,
attention_probs_dropout_prob=0.1,
intermediate_size=2048,
vocab_size=34492,
type_vocab_size=1,
initializer_range=0.02,
max_position_embeddings=512,
position_embedding_type="absolute"
)
from transformers import RobertaTokenizerFast
tokenizer = RobertaTokenizerFast.from_pretrained("tokenizer", max_len=512)
from transformers import RobertaForMaskedLM
model = RobertaForMaskedLM(config=config)
from transformers import LineByLineTextDataset
train_dataset = LineByLineTextDataset(
tokenizer=tokenizer,
file_path="train.txt",
block_size=tokenizer.max_len_single_sentence
)
from transformers import DataCollatorForLanguageModeling
data_collator = DataCollatorForLanguageModeling(
tokenizer=tokenizer, mlm=True, mlm_probability=0.15
)
from transformers import Trainer, TrainingArguments
num_train_epochs = 4
max_steps = num_train_epochs * len(train_dataset)
warmup_steps = int(max_steps*0.05)
training_args = TrainingArguments(
output_dir="output",
overwrite_output_dir=True,
do_train=True,
max_steps=max_steps,
warmup_steps=warmup_steps,
num_train_epochs=num_train_epochs,
per_device_train_batch_size=100,
learning_rate=5e-5,
weight_decay=0,
max_grad_norm=1,
adam_beta1=0.9,
adam_beta2=0.98,
adam_epsilon=1e-6,
# disable_tqdm=True
logging_dir="log",
logging_first_step=True
)
trainer = Trainer(
model=model,
args=training_args,
data_collator=data_collator,
train_dataset=train_dataset,
)
trainer.train()
0: {'loss': 10.588345527648926, 'learning_rate': 1.4910684996868758e-09, 'epoch': 0.0011918951132300357}
0: {'loss': 10.444767531507718, 'learning_rate': 7.455342498434379e-07, 'epoch': 0.5959475566150179}
0: {'loss': 9.9342578125, 'learning_rate': 1.4910684996868757e-06, 'epoch': 1.1918951132300357}
0: {'loss': 9.384439453125, 'learning_rate': 2.236602749530314e-06, 'epoch': 1.7878426698450536}
0: {'loss': 8.790998046875, 'learning_rate': 2.9821369993737514e-06, 'epoch': 2.3837902264600714}
0: {'loss': 8.097921875, 'learning_rate': 3.727671249217189e-06, 'epoch': 2.9797377830750893}
0: {'loss': 7.4109140625, 'learning_rate': 4.473205499060628e-06, 'epoch': 3.575685339690107}
0: {'loss': 6.89530859375, 'learning_rate': 5.218739748904065e-06, 'epoch': 4.171632896305125}
0: {'loss': 6.57353515625, 'learning_rate': 5.964273998747503e-06, 'epoch': 4.767580452920143}
0: {'loss': 6.354984375, 'learning_rate': 6.70980824859094e-06, 'epoch': 5.363528009535161}
0: {'loss': 6.194296875, 'learning_rate': 7.455342498434378e-06, 'epoch': 5.959475566150179}
0: {'loss': 6.028484375, 'learning_rate': 8.200876748277817e-06, 'epoch': 6.5554231227651965}
...