Hello, I foloow the huggingface web site to finetune FlauBert for classification task. What I would like to know is how to get probabilities for the classification. Something like this [0.75,0.85,0.25], because I have 3 classes, so far when priinting the results I get this : but it seems to correspond to the logits and not the probabilities ? Furthermore , they contains negative numbers. I thought probabilities were positive numbers between [0>1].
PredictionOutput(predictions=array([[ 0.53947556, 0.42591393, -0.8021714 ],
[ 1.6963196 , -3.3902004 , 1.8755357 ],
[ 1.9264233 , -0.35482746, -2.339029 ],
...,
[ 2.8833866 , -1.1608589 , -1.2109699 ],
[ 1.1803235 , -1.4036949 , 0.48559391],
[ 1.9253297 , -1.0417538 , -1.2987505 ]], dtype=float32), label_ids=array([0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 2, 2, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 1, 0, 2, 0, 0, 2, 0, 0, 1, 0, 1, 2, 2, 2, 1, 2, 0, 0,
0, 2, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 2, 1, 1, 0, 0, 0, 0, 1, 0, 1,
1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 2, 0, 2, 1, 2, 0, 1,
0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 2, 1, 0, 0, 0, 0, 1, 0, 0, 1,
1, 0, 2, 0, 0, 0, 0, 0, 1, 2, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0,
0, 1, 2, 1, 1, 2, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1,
1, 1, 0, 2, 0, 1, 1, 1, 1, 0, 0, 0, 2, 2, 0, 0, 1, 1, 2, 1, 1, 0,
0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 2, 0,
2, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 2, 0, 0, 1, 0, 0, 2, 0,
2, 2, 0, 0, 2, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1,
0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1,
0, 0, 0, 1, 1, 0, 1, 2, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1,
0, 0, 1, 1, 0, 0, 0, 1, 2, 0, 0, 2, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0,
0, 0, 0, 0, 2, 2, 1, 1, 2, 0, 2, 1, 1, 1, 0, 2, 0, 0, 0, 2, 2, 0,
1, 1, 1, 1, 1, 0, 0, 1, 2, 0, 0, 0, 1, 0, 1, 1, 2, 0, 0, 0, 0, 0,
1, 0, 0, 0, 0, 0, 0, 2, 2, 2, 0, 1, 2, 0, 1, 0, 1, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 2, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1,
0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 2, 1, 1, 0,
1, 0, 0, 1, 0, 1, 2, 2, 0, 1, 1, 0, 2, 1, 0, 0, 0, 1, 1, 1, 1, 1,
1, 1, 1, 1, 2, 0, 0, 2, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 2, 0, 0, 0,
1, 0, 2, 2, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1,
0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 2, 0, 0,
0, 0, 1, 2, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 2, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0,
1, 0, 0, 2, 0, 1, 0, 1, 0, 0, 2, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1,
1, 2, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0,
1, 0, 1, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 1, 0, 1, 2, 0, 0, 0, 0,
0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 0, 0, 1, 0, 2, 0, 0,
1, 2, 0, 1, 0, 0, 1, 1, 1, 0, 1, 2, 1, 0, 0, 0, 0, 1, 1, 0, 0, 2,
0, 1, 0, 1, 2, 0, 0, 1, 0, 0]), metrics={'test_loss': 1.164217233657837, 'test_accuracy': 0.565028901734104, 'test_f1_mi': 0.565028901734104, 'test_f1_ma': 0.42953547487160565, 'test_runtime': 1.4322, 'test_samples_per_second': 483.16, 'test_steps_per_second': 7.68})
```
Code for getting this results is adapted from the notebook for finetuning for task classification :slight_smile:
```
PRE_TRAINED_MODEL_NAME = '/gpfswork/rech/kpf/umg16uw/expe_5/model/sm'
class FlauBertForSequenceClassification(FlaubertModel):
"""
FlauBert Model for Classification Tasks.
"""
def __init__(self, config, num_labels, freeze_encoder=False):
"""
@param FlauBert: a FlauBertModel object
@param classifier: a torch.nn.Module classifier
@param freeze_encoder (bool): Set `False` to fine-tune the FlauBERT model
"""
# instantiate the parent class FlaubertModel
super().__init__(config)
# Specify hidden size of FB hidden size of our classifier, and number of labels
# instantiate num. of classes
self.num_labels = num_labels
# instantiate and load a pretrained FlaubertModel
self.encoder = FlaubertModel.from_pretrained(PRE_TRAINED_MODEL_NAME)
# freeze the encoder parameters if required (Q1)
if freeze_encoder:
for param in self.encoder.parameters():
param.requires_grad = False
# the classifier: a feed-forward layer attached to the encoder's head
self.classifier = torch.nn.Sequential(
torch.nn.Linear(in_features=config.emb_dim, out_features=512),
torch.nn.Tanh(), # or nn.ReLU()
torch.nn.Dropout(p=0.1),
torch.nn.Linear(in_features=512, out_features=self.num_labels, bias=True),
)
# instantiate a dropout function for the classifier's input
self.dropout = torch.nn.Dropout(p=0.1)
def forward(
self,
input_ids=None,
attention_mask=None,
head_mask=None,
inputs_embeds=None,
labels=None,
output_attentions=None,
output_hidden_states=None,
):
# encode a batch of sequences
encoder_output = self.encoder(
input_ids=input_ids,
attention_mask=attention_mask,
head_mask=head_mask,
inputs_embeds=inputs_embeds,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
)
# extract the hidden representations from the encoder output
hidden_state = encoder_output[0] # (bs, seq_len, dim)
pooled_output = hidden_state[:, 0] # (bs, dim)
# apply dropout
pooled_output = self.dropout(pooled_output) # (bs, dim)
# feed into the classifier
logits = self.classifier(pooled_output) # (bs, dim)
outputs = (logits,) + encoder_output[1:]
if labels is not None:
#multiclassification
loss_fct = torch.nn.CrossEntropyLoss() #crossEntropyLoss
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
outputs = (loss,) + outputs
return outputs # (loss), logits, (hidden_states), (attentions)
model = FlauBertForSequenceClassification(
config=model.config, num_labels=3, freeze_encoder = False
)
training_args = TrainingArguments(
output_dir='/gpfswork/rech/kpf/umg16uw/results_hf/sm',
logging_dir='/gpfswork/rech/kpf/umg16uw/logs/sm',
do_train=True,
do_eval=True,
evaluation_strategy="steps",
logging_first_step=True,
logging_steps=10,
num_train_epochs=3.0,
per_device_train_batch_size=16,
per_device_eval_batch_size=16,
learning_rate=2e-5,
weight_decay=0.01
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset = process_and_tokenize_file(X_train, y_train),
eval_dataset = process_and_tokenize_file(X_val, y_val),
compute_metrics=compute_metrics
)
# Train pre-trained model
# Start training loop
print("Start training...\n")
train_results = trainer.train()
val_results = trainer.evaluate()
for root, subdirs, files in os.walk(test_dir):
#print(root,"...")
#print(files,"...")
for f in files:
path_file = os.path.join(root, f)
input, input_label = input_file(path_file)
test_dataset = process_and_tokenize_file(input, input_label)
test_results = trainer.predict(test_dataset)
print(test_results) # give the results above
```