Running the following code directly works. The grad function is set:
from transformers import BertForSequenceClassification
model = BertForSequenceClassification.from_pretrained(BERT_MODEL_NAME, return_dict=True, num_labels=1)
model(torch.ones(2,34).long(), labels = torch.ones(2,1))
I get:
SequenceClassifierOutput(loss=tensor(1.5051, grad_fn=<MseLossBackward0>), logits=tensor([[-0.2268],
[-0.2268]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)
But when I run that from my Lightning Module I get the RuntimeError mentioned above:
class SarcasmTagger(pl.LightningModule):
def __init__(
self,
model_name: str,
n_classes: int,
n_training_steps=None,
n_warmup_steps=None
):
super().__init__()
self.save_hyperparameters()
self.bert = BertForSequenceClassification.from_pretrained(model_name, return_dict=True, num_labels=n_classes)
self.n_training_steps = n_training_steps
self.n_warmup_steps = n_warmup_steps
def forward(self, input_ids, attention_mask, labels):
outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
return outputs
def shared_step(self, batch, batch_idx):
input_ids = batch["input_ids"]
attention_mask = batch["attention_mask"]
labels = batch["label"]
outputs = self(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
loss = outputs.loss
return outputs, loss, labels
def training_step(self, batch, batch_idx):
outputs, loss, labels = self.shared_step(batch, batch_idx)
self.log("train_loss", loss, prog_bar=True, logger=True)
return {"loss": loss, "predictions": outputs, "labels": labels}
def validation_step(self, batch, batch_idx):
outputs, loss, label = self.shared_step(batch, batch_idx)
self.log("val_loss", loss, prog_bar=True, logger=True)
return loss
def test_step(self, batch, batch_idx):
outputs, loss, label = self.shared_step(batch, batch_idx)
self.log("test_loss", loss, prog_bar=True, logger=True)
return loss
def configure_optimizers(self):
optimizer = AdamW(self.parameters(), lr=2e-5)
scheduler = get_linear_schedule_with_warmup(
optimizer,
num_warmup_steps=self.n_warmup_steps,
num_training_steps=self.n_training_steps
)
return dict(
optimizer=optimizer,
lr_scheduler=dict(
scheduler=scheduler,
interval='step')
)
My tensors come like this:
SequenceClassifierOutput(loss=tensor(0.6889, device='cuda:0'), logits=tensor([[-0.1969],
[-0.5344],
[-0.2181],
[-0.2516],
[-0.3895],
[-0.4390],
[-0.4549],
[-0.3304],
[-0.4036],
[-0.3530],
[-0.3621],
[-0.3212]], device='cuda:0'), hidden_states=None, attentions=None)
No grad function set. What can be the source of the problem?