I retried with BertForSequenceClassification and I got the somewhat similar message I posted initially:
#BertClass
class BertClassification(torch.nn.Module):
def init(self, num_labels=1):
super(BertClassification, self).init()
self.num_labels = num_labels
self.bert = BertForSequenceClassification.from_pretrained(âbert-base-uncasedâ, num_labels = self.num_labels, output_attentions = False,output_hidden_states = False)
def forward(self, input_ids, token_type_ids=None, attention_mask=None, labels=None):
if labels is None:
logits = self.bert(input_ids=input_ids, attention_mask=attention_mask,token_type_ids=token_type_ids, labels = None)
return logits
else:
loss, logits = self.bert(input_ids=input_ids, attention_mask=attention_mask,token_type_ids=token_type_ids, labels = labels)
return loss, logits
#Create Data to process for Bert
train_sentences = X_train[âtextâ].values
train_sentences = [sentence for sentence in train_sentences]
train_labels = Y_train[âlabelâ].values
#Tokenize the texts
tokenizer = BertTokenizer.from_pretrained(âbert-base-uncasedâ, do_lower_case=True)
for sent in train_sentences:
encoded_sent = tokenizer.encode_plus(
text=sent,
add_special_tokens=True,
max_length=max_len,
padding=âmax_lengthâ,
return_attention_mask=True,
truncation=True
)
#Add the outputs to the lists
input_ids.append(encoded_sent.get(âinput_idsâ))
attention_masks.append(encoded_sent.get(âattention_maskâ))
#Convert lists to tensors
train_inputs = torch.tensor(input_ids)
train_masks = torch.tensor(attention_masks)
train_labels = torch.tensor(train_labels, dtype=torch.long, device=device)
#Create Iterators for Train and Valid
train_data = TensorDataset(train_inputs, train_masks, train_labels)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)
model = BertClassification(num_labels=2)
optimizer = AdamW(model.parameters(), lr = 1e-5, eps = 1e-8)
total_steps = len(train_dataloader) * num_epochs
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps = 0, num_training_steps = total_steps)
#Train Model
model = train(model=model, num_epochs=num_epochs,optimizer=optimizer,scheduler=scheduler, train_dataloader=train_dataloader, valid_dataloader=validation_dataloader)
#Save Model
model_save = model.module if hasattr(model, âmoduleâ) else model
checkpoint = {âepochsâ: epochs, âstate_dictâ: model_save.state_dict() }
torch.save(checkpoint, save_path)
#Load Model for inference:
model = load_model(model_save_path)
Colab Mesasge:
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: [âcls.predictions.biasâ, âcls.predictions.transform.dense.weightâ, âcls.predictions.transform.dense.biasâ, âcls.predictions.decoder.weightâ, âcls.seq_relationship.weightâ, âcls.seq_relationship.biasâ, âcls.predictions.transform.LayerNorm.weightâ, âcls.predictions.transform.LayerNorm.biasâ]
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: [âclassifier.weightâ, âclassifier.biasâ]
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.