Fine-Tuning of Self-Supervised Models for Audio based Bird Detection.
I am trying to add linear classifier on top of wav2vec2. I want to fine tune the wav2vec2 mdel for multi-class classification as below.
I followed the tutorial from HuggingFace, but I am still getting the following error
ValueError: The model did not return a loss from the inputs, only the following keys: logits. For reference, the inputs it received are input_values.
"""
import torch
import numpy as np
import pandas as pd
import librosa
from sklearn.metrics import confusion_matrix
from sklearn.metrics import confusion_matrix, recall_score, accuracy_score
from transformers import pipeline, AutoFeatureExtractor, Wav2Vec2FeatureExtractor, Wav2Vec2Processor, AutoModelForAudioClassification, TrainingArguments, Trainer
from datasets import load_dataset, Dataset, load_metric, Audio, concatenate_datasets
"""
> FineTuning Wav2Vec2 for bird classification
class FineTuneSSLModels():
def __init__(self):
self.categories = {'Pigeon': 0, 'Sparrow': 1, 'Crow': 2, 'Eagle': 3, 'Hawk': 4, 'Parrot': 5, 'Dove': 6, 'Peacock': 7}
self.freeze_encoder = False
self.transformer = False
self.pre_trained_model = "facebook/wav2vec2-base-960h" #w2v2 base 960 hours librispeech
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
self.f1_metric = load_metric("f1")
self.recall_metric = load_metric("recall")
self.target_sr = 16000
#Read CSV Files
self.path_to_train='train.csv'
self.path_to_val='val.csv'
self.path_to_test='test.csv'
self.df_train = pd.read_csv(self.path_to_train, encoding='utf-8')
self.df_val = pd.read_csv(self.path_to_val, encoding='utf-8')
self.df_test = pd.read_csv(self.path_to_test, encoding='utf-8')
print("self train", self.df_train)
self.labels=sorted(self.df_train.Label.unique())
self.label_dict = {self.labels[i]: [j for j in range(len(self.labels))][i] for i in range(len(self.labels))}
print(self.label_dict)
self.df_train = self.df_train.replace({"Label": self.label_dict})
self.df_val = self.df_val.replace({"Label": self.label_dict})
self.df_test = self.df_test.replace({"Label": self.label_dict})
#Get Full Path
self.df_train['FilePath']='full_path/train/' + self.df_train['FileName']
self.df_val['FilePath']=''full_path/eval/' + self.df_val['FileName']
self.df_test['FilePath']='full_pathtest/' + self.df_test['FileName']
#Create Datasets
self.train_dataset = Dataset.from_pandas(self.df_train)
self.val_dataset = Dataset.from_pandas(self.df_val)
self.test_dataset = Dataset.from_pandas(self.df_test)
self.train_dataset = self.train_dataset.map(self.preprocess_data)
self.val_dataset = self.val_dataset.map(self.preprocess_data)
self.test_dataset = self.test_dataset.map(self.preprocess_data)
self.feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(self.pre_trained_model, cache_dir="SSLFineTune/")
self.processor = Wav2Vec2Processor.from_pretrained(self.pre_trained_model, cache_dir="SSLFineTune/")
self.train_dataset = self.train_dataset.map(self.prepare_datasets, remove_columns = ['FilePath',
'SpeakerLabel', 'FileName'], batched=True, batch_size=1)
self.val_dataset = self.val_dataset.map(self.prepare_datasets, remove_columns = ['FilePath', 'SpeakerLabel', 'FileName'], batched=True, batch_size=1)
self.test_dataset = self.test_dataset.map(self.prepare_datasets, remove_columns = ['FilePath', 'SpeakerLabel', 'FileName'], batched=True, batch_size=1)
self.model = AutoModelForAudioClassification.from_pretrained(
self.pre_trained_model,
trust_remote_code=True,
cache_dir="SSLFineTune/",
num_labels = len(self.df_train['Label'].unique()),
)
#Classification head for 8-classes
self.model.classifier=torch.nn.Linear(in_features=256, out_features=8, bias=True)
self.freeze_feature_extractor = False
self.freeze_transformer = False
if self.freeze_feature_extractor:
self.model.freeze_feature_extractor()
if self.freeze_transformer:
self.model.freeze_transformer()
self.args = TrainingArguments(
"SSLFineTune/",
overwrite_output_dir=True,
evaluation_strategy = "epoch",
save_strategy = "epoch",
learning_rate=3e-5,
per_device_train_batch_size=8,
gradient_accumulation_steps=1,
per_device_eval_batch_size=1,
num_train_epochs=10,
warmup_ratio=0.1,
logging_steps=10,
load_best_model_at_end=True,
metric_for_best_model="uar",
push_to_hub=False,
gradient_checkpointing=True,
save_total_limit=5
)
self.trainer = Trainer(
self.model,
self.args,
train_dataset=self.train_dataset,
eval_dataset=self.val_dataset,
tokenizer=self.feature_extractor,
compute_metrics=self.compute_metrics)
self.trainer.train()
self.predictions = self.trainer.predict(self.val_dataset)
print(self.compute_metrics(self.predictions))
def compute_metrics(self, eval_pred):
"""Computes accuracy on a batch of predictions"""
print(evl_pred)
predictions = np.argmax(eval_pred.predictions, axis=1)
recall = self.recall_metric.compute(predictions=predictions, references=eval_pred.label_ids,average="macro")
# f1 = f1_metric.compute(predictions=predictions, references=eval_pred.label_ids, average="macro")
#return {"f1": f1, "spearmanr": spearmanr}
return recall
def preprocess_data(self, audio_example):
"""Adds Audio and Sampling Rate to Panda Frame in another columns"""
audio_example['audio'], audio_example['sampling_rate'] = librosa.load(audio_example["FilePath"], sr=16000)
#print("Single Audio", np.array(audio_example['audio']).shape, audio_example)
#exit(0)
return audio_example
def prepare_datasets(self, audio_example):
audio_data = audio_example['audio']
enc_embeds = self.feature_extractor(audio_data, sampling_rate = self.feature_extractor.sampling_rate)
return enc_embeds
if __name__=="__main__":
initiate = FineTuneSSLModels()
Edit: fixed for me by changing BertModel
to BertForSequenceClassification
.
Iâm getting the same error, also following the tutorials in the course, chapter 3: Fine-tuning a model with the Trainer API - Hugging Face Course.
My code:
import evaluate
import numpy as np
from datasets import load_dataset
from transformers import (
BertModel,
BertTokenizerFast,
TrainingArguments,
Trainer,
EvalPrediction,
)
checkpoint = "bert-base-uncased"
model = BertModel.from_pretrained(checkpoint)
tokenizer = BertTokenizerFast.from_pretrained(checkpoint)
raw_ds = load_dataset("glue", "mrpc")
metric = evaluate.load("glue", "mrpc")
dataset = raw_ds.map(
lambda x: tokenizer(x["sentence1"], x["sentence2"], truncation=True),
batched=True,
)
dataset = dataset.remove_columns(["sentence1", "sentence2", "idx"])
dataset = dataset.rename_column("label", "labels")
dataset = dataset.with_format("torch")
trainer_args = TrainingArguments("test-trainer", evaluation_strategy="epoch")
def compute_metrics(eval_preds: EvalPrediction):
x, y = eval_preds
preds = np.argmax(x, -1)
return metric.compute(predictions=preds, references=y)
trainer = Trainer(
model=model,
args=trainer_args,
train_dataset=dataset["train"],
eval_dataset=dataset["validation"],
tokenizer=tokenizer,
compute_metrics=compute_metrics,
)
trainer.train()
I get a similar error when using
# Set DistilBERT tokenizer
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
# Define DistilBERT as our base model:
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=9)
# Use data_collector to convert our samples to PyTorch tensors and concatenate them with the correct amount of padding
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
Specifically, when using
# Define a new Trainer with all the objects we constructed so far
repo_name = "sentiment-model-amazon-reviews-distilbert"
training_args = TrainingArguments(
output_dir=repo_name,
learning_rate=2e-5,
per_device_train_batch_size=16,
per_device_eval_batch_size=16,
num_train_epochs=1,
weight_decay=0.01,
save_strategy="epoch",
push_to_hub=True
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized_train,
eval_dataset=tokenized_cal,
tokenizer=tokenizer,
data_collator=data_collator,
compute_metrics=compute_metrics
)
# Train and push to hub
trainer.train()
returns
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-50-f0acdc25090a> in <module>
24
25 # Train and push to hub
---> 26 trainer.train()
3 frames
/usr/local/lib/python3.8/dist-packages/transformers/trainer.py in train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
1541 self._inner_training_loop, self._train_batch_size, args.auto_find_batch_size
1542 )
-> 1543 return inner_training_loop(
1544 args=args,
1545 resume_from_checkpoint=resume_from_checkpoint,
/usr/local/lib/python3.8/dist-packages/transformers/trainer.py in _inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
1789 tr_loss_step = self.training_step(model, inputs)
1790 else:
-> 1791 tr_loss_step = self.training_step(model, inputs)
1792
1793 if (
/usr/local/lib/python3.8/dist-packages/transformers/trainer.py in training_step(self, model, inputs)
2537
2538 with self.compute_loss_context_manager():
-> 2539 loss = self.compute_loss(model, inputs)
2540
2541 if self.args.n_gpu > 1:
/usr/local/lib/python3.8/dist-packages/transformers/trainer.py in compute_loss(self, model, inputs, return_outputs)
2582 else:
2583 if isinstance(outputs, dict) and "loss" not in outputs:
-> 2584 raise ValueError(
2585 "The model did not return a loss from the inputs, only the following keys: "
2586 f"{','.join(outputs.keys())}. For reference, the inputs it received are {','.join(inputs.keys())}."
ValueError: The model did not return a loss from the inputs, only the following keys: logits. For reference, the inputs it received are input_ids,attention_mask.
Rename columns to text
and labels
for text classification using distilbert-base-uncased
model. It needs to be checked for other domains and models too.
I am trying to fine-tune mT5 for reading comprehension task and I get the same error. Here is my code:
tokenizer = MT5Tokenizer.from_pretrained(model_dir)
def model_init1():
return MT5ForQuestionAnswering.from_pretrained(model_dir)
trainer = Seq2SeqTrainer(
model_init=model_init1,
args=args,
train_dataset=training_set,
eval_dataset=validation_set,
data_collator=data_collator,
tokenizer=tokenizer,
)
# Training the model
trainer.train()
the error:
ValueError: The model did not return a loss from the inputs, only the following keys: start_logits,end_logits,past_key_values,encoder_last_hidden_state. For reference, the inputs it received are input_ids,attention_mask.
could anybody solve the issue?
How do you know which columns to use? I am using a different model but getting same error but with different model. This is my error statement and code
ValueError: The model did not return a loss from the inputs, only the following keys: logits,past_key_values. For reference, the inputs it received are input_ids,attention_mask.
CODE
model_checkpoint = "EleutherAI/pythia-70m-deduped"
tokenizer = tr.AutoTokenizer.from_pretrained(
model_checkpoint,
revision="step3000",
cache_dir=DA.paths.datasets,
)
model = tr.AutoModelForCausalLM.from_pretrained (
model_checkpoint,cache_dir=DA.paths.datasets)
data_collator = tr.DataCollatorWithPadding(tokenizer=tokenizer)
trainer = tr.Trainer(
model,
training_args,
train_dataset=split_dataset["train"],
eval_dataset=split_dataset["test"],
tokenizer=tokenizer,
data_collator=data_collator,
)
trainer.train()
Has anyone definitively solved this error?
I am running into the same problem:
from transformers import BertForQuestionAnswering, BertForTokenClassification, BertForSequenceClassification, pipeline, BertTokenizer, Trainer, TrainingArguments, DistilBertForSequenceClassification, DistilBertTokenizerFast,DataCollatorWithPadding, pipeline
from datasets import load_metric, load_dataset, DatasetDict, Dataset
from sklearn.model_selection import train_test_split
import torch
import pandas as pd
import numpy as np
from datasets import load_metric, load_dataset, Dataset
# load distillbert tokenizer
tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased")
# Explanation:
# ...<keyword>... represents a body of text which includes the keyword which is
# associated with the values in that text; these values represent the label we
# want to 'predict'
data = {
'key': ['1', '2', '3', '4', '5'],
'text': ['...fraction... 30 35', '...fraction... 25 30', '...fraction... 20 30', '...fraction... 30 40', '...fraction... 40 50'],
'val1': ['30', '25', '20', '30', '40'],
'val2': ['35', '30', '30', '40', '50']
}
pdf = pd.DataFrame(data)
features = pdf.drop(columns=["val1", "val2"])
labels = pdf[["val1", "val2"]]
train_df, test_df, train_labels, test_labels = train_test_split(features, labels, test_size=0.2, random_state=42)
features_dataset = Dataset.from_pandas(features)
train_dataset = Dataset.from_pandas(train_pdf)
test_dataset = Dataset.from_pandas(test_pdf)
train_test = DatasetDict({"train": train_dataset,"test": test_dataset})
from transformers import AutoModelForSequenceClassification
model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased")
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
def tokenize_function(dataset):
return tokenizer(dataset["text"], padding="max_length", truncation=True)
tokenized_datasets = features_dataset.map(tokenize_function, batched=True)
from transformers import AutoModelForSequenceClassification
model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased")
from transformers import TrainingArguments
output_path = "/test_trainer"
training_args = TrainingArguments(output_dir=output_path)
import numpy as np
import evaluate
metric = evaluate.load('accuracy')
def compute_metrics(eval_pred):
logits, labels = eval_pred
predictions = np.argmax(logits, axis=-1)
return metric.compute(predictions, references=labels)
from transformers import TrainingArguments, Trainer
training_args = TrainingArguments(output_dir=output_path, evaluation_strategy="epoch")
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=test_dataset,
compute_metrics=compute_metrics
)
trainer.evaluate()
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: __index_level_0__. If __index_level_0__ are not expected by `BertForSequenceClassification.forward`, you can safely ignore this message.
***** Running Evaluation *****
Num examples = 1
Batch size = 8
Out[27]: {'eval_runtime': 0.6222,
'eval_samples_per_second': 1.607,
'eval_steps_per_second': 1.607}
trainer.train()
The following columns in the training set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: __index_level_0__. If __index_level_0__ are not expected by `BertForSequenceClassification.forward`, you can safely ignore this message.
/databricks/python/lib/python3.9/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning
warnings.warn(
***** Running training *****
Num examples = 4
Num Epochs = 3
Instantaneous batch size per device = 8
Total train batch size (w. parallel, distributed & accumulation) = 8
Gradient Accumulation steps = 1
Total optimization steps = 3
**ValueError: The model did not return a loss from the inputs, only the following keys: logits. For reference, the inputs it received are input_ids,attention_mask.**
In my case the perfect solution was to have a column named âlabelâ. My data set had the column âtargetâ, âlabel_nameâ and so on. Seems like the column names made the issue.
I did toxic_encoded = toxic_encoded.rename_column("target", "label")
and it started training.
I had the same issue. I removed the data_collator
argument and it worked. I think it internally handles tokenizing + padding the formatted prompt.
trainer = SFTTrainer(
model=model,
train_dataset=dataset["train"],
eval_dataset=dataset["test"],
peft_config=peft_config,
max_seq_length=max_seq_length,
tokenizer=tokenizer,
formatting_func=formatting_prompts_func,
args=training_arguments
)
any update for this?
Keeping the columns as text
and label
solved the issue.
This worked for me, too.
Can you explain, what you mean by keeping columns âtextâ and âlabelâ?
SFTTrainer Docs on HF doesnât show any âlabelâ column. Do you add new column? What are the values for it?
this worked for me, thank you!
Oh , I trained BERT model , and while investigating about it found out that model expects the input for training as text
and predictions as label
. So I renamed the input dataset as text
and label
,
Hi, after trying the solutions you have mentioned and what the Hugging Face documentation indicates, I FINALLY FIXED THE ERROR (at least for my case). The error in question was: ValueError: The model did not return a loss of inputs, only the following keys: last_hidden_state,past_key_values. For reference, the inputs it received are input_ids,attention_mask.
The indications and steps taken into account are:
- Rename the labels column as âlabelâ.
- Apply a cast on this column to the ClassLabel type.
- When defining the tokenizer function, specify the hyperparameters âtextâ and âtext_targetâ (without that the sequence âlabelsâ is not created in the tokenized dataset) and padding='max_length.
- Tokenize the dataset after the above and then immediately delete the columns corresponding to the original dataset.
- In the TrainingArguments hyperparameters set remove_unused_columns=False (default is True and I understand that is why it does not take âlabelsâ as the inputs received).
With the above resolved started training. For better reference, I attach the link to the notebook (the comments are in Spanish): Google Colab
I hope this helps those who have the same problem.
P.D: In the hyperparameters of the tokenizer function I had first set padding=True (in addition to truncation=True) and that generated me the error âValueError: Unable to create tensor, you should probably activate truncation and/or padding with âpadding=Trueâ âtruncation=Trueâ to have batched tensors with the same length. Perhaps your features (labels
in this case) have excessive nesting (inputs type list
where type int
is expected)â, so I changed it to padding='max_length.
Thank you so much! That works for me!