I get the following error when finetuning Bert for classification. My forwad() function does not take any token inputs ?
PRE_TRAINED_MODEL_NAME = '/Bert/sm'
tokenizer = FlaubertTokenizer.from_pretrained(PRE_TRAINED_MODEL_NAME)
PRE_TRAINED_MODEL = FlaubertModel.from_pretrained(PRE_TRAINED_MODEL_NAME)
class FlauBertForSequenceClassification(FlaubertModel):
"""
FlauBert Model for Classification Tasks.
"""
def __init__(self, config, num_labels, freeze_encoder=False):
"""
@param FlauBert: a FlauBertModel object
@param classifier: a torch.nn.Module classifier
@param freeze_encoder (bool): Set `False` to fine-tune the FlauBERT model
"""
# instantiate the parent class FlaubertModel
super().__init__(config)
# Specify hidden size of FB hidden size of our classifier, and number of labels
# instantiate num. of classes
self.num_labels = num_labels
# instantiate and load a pretrained FlaubertModel
self.encoder = FlaubertModel.from_pretrained(PRE_TRAINED_MODEL_NAME)
# freeze the encoder parameters if required (Q1)
if freeze_encoder:
for param in self.encoder.parameters():
param.requires_grad = False
# the classifier: a feed-forward layer attached to the encoder's head
self.classifier = torch.nn.Sequential(
torch.nn.Linear(in_features=config.emb_dim, out_features=512),
torch.nn.Tanh(), # or nn.ReLU()
torch.nn.Dropout(p=0.1),
torch.nn.Linear(in_features=512, out_features=self.num_labels, bias=True),
)
# instantiate a dropout function for the classifier's input
self.dropout = torch.nn.Dropout(p=0.1)
def forward(
self,
input_ids=None,
attention_mask=None,
head_mask=None,
inputs_embeds=None,
labels=None,
output_attentions=None,
output_hidden_states=None,
):
# encode a batch of sequences
encoder_output = self.encoder(
input_ids=input_ids,
attention_mask=attention_mask,
head_mask=head_mask,
inputs_embeds=inputs_embeds,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
)
# extract the hidden representations from the encoder output
hidden_state = encoder_output[0] # (bs, seq_len, dim)
pooled_output = hidden_state[:, 0] # (bs, dim)
# apply dropout
pooled_output = self.dropout(pooled_output) # (bs, dim)
# feed into the classifier
logits = self.classifier(pooled_output) # (bs, dim)
outputs = (logits,) + encoder_output[1:]
if labels is not None:
#multilabel
#loss_fct = BCEWithLogitsLoss()
#loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1, self.num_labels))
#binaryclassification
#loss_fct = torch.nn.CrossEntropyLoss() #crossEntropyLoss
#loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
# aggregate outputs
#outputs = (loss,) + outputs
#multiclassification
loss_fct = torch.nn.CrossEntropyLoss() #crossEntropyLoss
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
# aggregate outputs
outputs = (loss,) + outputs
return outputs # (loss), logits, (hidden_states), (attentions)
# instantiate model
model = FlauBertForSequenceClassification(
config=PRE_TRAINED_MODEL.config, num_labels=3, freeze_encoder = False
)
# Create torch dataset
class Dataset(torch.utils.data.Dataset):
def __init__(self, encodings, labels=None):
self.encodings = encodings
self.labels = labels
def __getitem__(self, idx):
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
if self.labels:
item["labels"] = torch.tensor(self.labels[idx])
return item
def __len__(self):
return len(self.encodings["input_ids"])
var, l = input_file(path1)
X = list(var)
y = list(l)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2)
X_train_tokenized = tokenizer(X_train, padding="max_length", truncation=True, max_length=512)
print(X_train_tokenized)
X_val_tokenized = tokenizer(X_val, padding="max_length", truncation=True, max_length=512)
train_dataset = Dataset(X_train_tokenized, y_train)
val_dataset = Dataset(X_val_tokenized, y_val)
training_args = TrainingArguments(
output_dir='/ghf/sm',
logging_dir='/ogs/sm',
do_train=True,
do_eval=False,
evaluation_strategy="steps",
logging_first_step=True,
logging_steps=10,
num_train_epochs=2.0,
per_device_train_batch_size=8,
per_device_eval_batch_size=8,
learning_rate=5e-5,
weight_decay=0.01
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=val_dataset,
compute_metrics=compute_metrics
)
print("Train")
trainer.train()
STACKTRACE
***** Running training *****
Num examples = 536
Num Epochs = 2
Instantaneous batch size per device = 8
Total train batch size (w. parallel, distributed & accumulation) = 32
Gradient Accumulation steps = 1
Total optimization steps = 34
0%| | 0/34 [00:00<?, ?it/s]Traceback (most recent call last):
File "//finetuning.py", line 371, in <module>
trainer.train()
File "/ython3.9/site-packages/transformers/trainer.py", line 1269, in train
tr_loss += self.training_step(model, inputs)
File "/lw/.conda/envs/bert/lib/python3.9/site-packages/transformers/trainer.py", line 1754, in training_step
loss = self.compute_loss(model, inputs)
File "/luw/.conda/envs/bert/lib/python3.9/site-packages/transformers/trainer.py", line 1786, in compute_loss
outputs = model(**inputs)
File "uw/.conda/envs/bert/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "uw/.conda/envs/bert/lib/python3.9/site-packages/torch/nn/parallel/data_parallel.py", line 168, in forward
outputs = self.parallel_apply(replicas, inputs, kwargs)
File "/uw/.conda/envs/bert/lib/python3.9/site-packages/torch/nn/parallel/data_parallel.py", line 178, in parallel_apply
return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
File "/uw/.conda/envs/bert/lib/python3.9/site-packages/torch/nn/parallel/parallel_apply.py", line 86, in parallel_apply
output.reraise()
File "uw/.conda/envs/bert/lib/python3.9/site-packages/torch/_utils.py", line 425, in reraise
raise self.exc_type(msg)
TypeError: Caught TypeError in replica 0 on device 0.
Original Traceback (most recent call last):
File "/uw/.conda/envs/bert/lib/python3.9/site-packages/torch/nn/parallel/parallel_apply.py", line 61, in _worker
output = module(*input, **kwargs)
File "/uw/.conda/envs/bert/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
TypeError: forward() got an unexpected keyword argument 'token_type_ids'