I trained my model on a longformer, but when I want to test my model, I face this error.
My model:
model = LongformerForSequenceClassification.from_pretrained('yikuan8/Clinical-Longformer',num_labels = len(cols))
ltokenizer = LongformerTokenizerFast.from_pretrained('yikuan8/Clinical-Longformer')
This is how I perform predictions:
# Put model in evaluation mode
device = "cuda"
model = trainer.model.to("cuda") # moving model to cuda
model.eval()
# Tracking variables
pred_outs, true_labels = [], []
#i=0
# Predict
for batch in validation_dataset:
# Unpack the inputs from our dataloader
#batch = tuple(batch[t].to(device,) for t in batch)
#b_input_ids, b_attn_mask,
b_labels = batch['labels']
with torch.no_grad():
# Forward pass, calculate logit predictions
pred_out = trainer.model(input_ids=batch['input_ids'],
attention_mask=batch['attention_mask'])#,labels=batch['labels'])
pred_out = torch.sigmoid(pred_out)
# Move predicted output and labels to CPU
pred_out = pred_out.detach().cpu().numpy()
label_ids = b_labels.to('cpu').numpy()
#i+=1
# Store predictions and true labels
#print(i)
#print(outputs)
#print(logits)
#print(label_ids)
pred_outs.append(pred_out)
true_labels.append(label_ids)
My error:
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
Cell In[127], line 18
14 b_labels = batch['labels']
16 with torch.no_grad():
17 # Forward pass, calculate logit predictions
---> 18 pred_out = trainer.model(input_ids=batch['input_ids'],
19 attention_mask=batch['attention_mask'])#,labels=batch['labels'])
20 pred_out = torch.sigmoid(pred_out)
21 # Move predicted output and labels to CPU
File /opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
1496 # If we don't have any hooks, we want to skip the rest of the logic in
1497 # this function, and just call forward.
1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []
File /opt/conda/lib/python3.10/site-packages/accelerate/utils/operations.py:632, in convert_outputs_to_fp32.<locals>.forward(*args, **kwargs)
631 def forward(*args, **kwargs):
--> 632 return model_forward(*args, **kwargs)
File /opt/conda/lib/python3.10/site-packages/accelerate/utils/operations.py:620, in ConvertOutputsToFp32.__call__(self, *args, **kwargs)
619 def __call__(self, *args, **kwargs):
--> 620 return convert_to_fp32(self.model_forward(*args, **kwargs))
File /opt/conda/lib/python3.10/site-packages/torch/amp/autocast_mode.py:14, in autocast_decorator.<locals>.decorate_autocast(*args, **kwargs)
11 @functools.wraps(func)
12 def decorate_autocast(*args, **kwargs):
13 with autocast_instance:
---> 14 return func(*args, **kwargs)
File /opt/conda/lib/python3.10/site-packages/accelerate/utils/operations.py:632, in convert_outputs_to_fp32.<locals>.forward(*args, **kwargs)
631 def forward(*args, **kwargs):
--> 632 return model_forward(*args, **kwargs)
File /opt/conda/lib/python3.10/site-packages/accelerate/utils/operations.py:620, in ConvertOutputsToFp32.__call__(self, *args, **kwargs)
619 def __call__(self, *args, **kwargs):
--> 620 return convert_to_fp32(self.model_forward(*args, **kwargs))
File /opt/conda/lib/python3.10/site-packages/torch/amp/autocast_mode.py:14, in autocast_decorator.<locals>.decorate_autocast(*args, **kwargs)
11 @functools.wraps(func)
12 def decorate_autocast(*args, **kwargs):
13 with autocast_instance:
---> 14 return func(*args, **kwargs)
[... skipping similar frames: ConvertOutputsToFp32.__call__ at line 620 (10 times), autocast_decorator.<locals>.decorate_autocast at line 14 (10 times), convert_outputs_to_fp32.<locals>.forward at line 632 (10 times)]
File /opt/conda/lib/python3.10/site-packages/accelerate/utils/operations.py:632, in convert_outputs_to_fp32.<locals>.forward(*args, **kwargs)
631 def forward(*args, **kwargs):
--> 632 return model_forward(*args, **kwargs)
File /opt/conda/lib/python3.10/site-packages/accelerate/utils/operations.py:620, in ConvertOutputsToFp32.__call__(self, *args, **kwargs)
619 def __call__(self, *args, **kwargs):
--> 620 return convert_to_fp32(self.model_forward(*args, **kwargs))
File /opt/conda/lib/python3.10/site-packages/torch/amp/autocast_mode.py:14, in autocast_decorator.<locals>.decorate_autocast(*args, **kwargs)
11 @functools.wraps(func)
12 def decorate_autocast(*args, **kwargs):
13 with autocast_instance:
---> 14 return func(*args, **kwargs)
File /opt/conda/lib/python3.10/site-packages/transformers/models/longformer/modeling_longformer.py:1932, in LongformerForSequenceClassification.forward(self, input_ids, attention_mask, global_attention_mask, head_mask, token_type_ids, position_ids, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict)
1930 global_attention_mask = torch.zeros_like(input_ids)
1931 # global attention on cls token
-> 1932 global_attention_mask[:, 0] = 1
1934 outputs = self.longformer(
1935 input_ids,
1936 attention_mask=attention_mask,
(...)
1944 return_dict=return_dict,
1945 )
1946 sequence_output = outputs[0]
IndexError: too many indices for tensor of dimension 1
and this is other parts of my code:
test_encodings = ltokenizer(ds_test.text.tolist(), add_special_tokens=True,
max_length= 1024,
padding = 'max_length',
return_token_type_ids= False,
return_attention_mask= True,
truncation= True)
class ReadDataset(torch.utils.data.Dataset):
def __init__(self, encodings, labels):
self.encodings = encodings
self.labels = labels
def __getitem__(self, idx):
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
item['labels'] = torch.tensor(self.labels[idx])
return item
def __len__(self):
return len(self.labels)
train_dataset = ReadDataset(train_encodings, ds_train[cols].values)
validation_dataset = ReadDataset(validation_encodings, ds_val[cols].values)
test_dataset = ReadDataset(test_encodings, ds_test[cols].values)
class MultilabelTrainer(Trainer):
def compute_loss(self, model, inputs, return_outputs=False):
labels = inputs.get("labels")
outputs = model(input_ids=inputs['input_ids'],
attention_mask=inputs['attention_mask'])
logits = outputs.get('logits')
loss_fct = nn.BCEWithLogitsLoss()
loss = loss_fct(logits.view(-1, self.model.config.num_labels).float(),
labels.float().view(-1, self.model.config.num_labels))
return (loss, outputs) if return_outputs else loss
def sigmoid(x):
return np.exp(-np.logaddexp(0, -x))
def compute_metrics(eval_preds):
logits, labels = eval_preds
res = {}
for i, d in enumerate(cols):
res[d] = roc_auc_score(labels[:,i], sigmoid(logits[:,i]))
return res
training_args = TrainingArguments(
output_dir='result', # output directory
num_train_epochs=6, # total number of training epochs
per_device_train_batch_size=4, # batch size per device during training
per_device_eval_batch_size=8, # batch size for evaluation
evaluation_strategy='epoch',
learning_rate=2e-5,
fp16=True,
fp16_backend="amp"
)
trainer = MultilabelTrainer(
model=model, # the instantiated 🤗 Transformers model to be trained
args=training_args, # training arguments, defined above
train_dataset=train_dataset, # training dataset
eval_dataset=validation_dataset,
compute_metrics=compute_metrics,
)
trainer.train()
Thanks