Model description
I add simple custom pytorch-crf
layer on top of TokenClassification model
for NER
. It will make the model more robust.
I train the model and I get the error:
***** Running training *****
Num examples = 4
Num Epochs = 2
Instantaneous batch size per device = 2
Total train batch size (w. parallel, distributed & accumulation) = 2
Gradient Accumulation steps = 1
Total optimization steps = 4
TypeError: __init__() missing 3 required positional arguments: 'id2label', 'label2id', and 'num_labels'
Code
from torchcrf import CRF
model_checkpoint = "spanbert"
tokenizer = BertTokenizer.from_pretrained(model_checkpoint,add_prefix_space=True)
bert_model = BertForTokenClassification.from_pretrained(
model_checkpoint,id2label=id2label,label2id=label2id)
bert_model.config.output_hidden_states=True
class BertClassifierConfig(PretrainedConfig):
model_type="BertForTokenClassification"
def __init__(self,id2label ,label2id,num_labels,**kwargs):
self.num_labels=num_labels
self.id2label=id2label
self.label2id=label2id
self.output_hidden_states=True
super().__init__(**kwargs)
Model
class BertForTokenClassification(PreTrainedModel):
config_class =BertClassifierConfig
def __init__(self, config,bert_model, num_labels):
super(BertForTokenClassification, self).__init__(config)
self.bert = bert_model
self.dropout = nn.Dropout(0.25)
self.classifier = nn.Linear(768, num_labels)
self.crf = CRF(num_labels, batch_first = True)
def forward(self, input_ids, attention_mask, labels=None, token_type_ids=None):
outputs = self.bert(input_ids, attention_mask=attention_mask)
sequence_output = torch.stack((outputs[1][-1], outputs[1][-2], outputs[1][-3], outputs[1][-4])).mean(dim=0)
sequence_output = self.dropout(sequence_output)
emission = self.classifier(sequence_output) # [32,256,17]
labels=labels.reshape(attention_mask.size()[0],attention_mask.size()[1])
if labels is not None:
loss = -self.crf(log_soft(emission, 2), labels, mask=attention_mask.type(torch.uint8), reduction='mean')
prediction = self.crf.decode(emission, mask=attention_mask.type(torch.uint8))
return [loss, prediction]
else:
prediction = self.crf.decode(emission, mask=attention_mask.type(torch.uint8))
return prediction
Saving
configuration = BertClassifierConfig(id2label ,label2id,num_labels=len(label2id))
model = BertForTokenClassification(configuration,bert_model, num_labels=len(label2id))
model.to(device)
args = TrainingArguments(
"test0000",
# evaluation_strategy="epoch",
save_strategy="epoch",
learning_rate=2e-5,
num_train_epochs=2,
weight_decay=0.01,
per_device_train_batch_size=2,
# per_device_eval_batch_size=32
fp16=True
# bf16=True #Ampere GPU
)
trainer = Trainer(
model=model,
args=args,
train_dataset=train_data,
# eval_dataset=train_data,
# data_collator=data_collator,
# compute_metrics=compute_metrics,
tokenizer=tokenizer)
Saving
trainer.train()
trainer.save_model("modeltest")
AutoConfig.register("BertForTokenClassification", BertClassifierConfig)
AutoModel.register(BertClassifierConfig, BertForTokenClassification)
ERROR
***** Running training *****
Num examples = 4
Num Epochs = 2
Instantaneous batch size per device = 2
Total train batch size (w. parallel, distributed & accumulation) = 2
Gradient Accumulation steps = 1
Total optimization steps = 4
TypeError: __init__() missing 3 required positional arguments: 'id2label', 'label2id', and 'num_labels'