Fine-Tuning of Self-Supervised Models for Audio based Bird Detection.
I am trying to add linear classifier on top of wav2vec2. I want to fine tune the wav2vec2 mdel for multi-class classification as below.
I followed the tutorial from HuggingFace, but I am still getting the following error
ValueError: The model did not return a loss from the inputs, only the following keys: logits. For reference, the inputs it received are input_values.
"""
import torch
import numpy as np
import pandas as pd
import librosa
from sklearn.metrics import confusion_matrix
from sklearn.metrics import confusion_matrix, recall_score, accuracy_score
from transformers import pipeline, AutoFeatureExtractor, Wav2Vec2FeatureExtractor, Wav2Vec2Processor, AutoModelForAudioClassification, TrainingArguments, Trainer
from datasets import load_dataset, Dataset, load_metric, Audio, concatenate_datasets
"""
> FineTuning Wav2Vec2 for bird classification
class FineTuneSSLModels():
def __init__(self):
self.categories = {'Pigeon': 0, 'Sparrow': 1, 'Crow': 2, 'Eagle': 3, 'Hawk': 4, 'Parrot': 5, 'Dove': 6, 'Peacock': 7}
self.freeze_encoder = False
self.transformer = False
self.pre_trained_model = "facebook/wav2vec2-base-960h" #w2v2 base 960 hours librispeech
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
self.f1_metric = load_metric("f1")
self.recall_metric = load_metric("recall")
self.target_sr = 16000
#Read CSV Files
self.path_to_train='train.csv'
self.path_to_val='val.csv'
self.path_to_test='test.csv'
self.df_train = pd.read_csv(self.path_to_train, encoding='utf-8')
self.df_val = pd.read_csv(self.path_to_val, encoding='utf-8')
self.df_test = pd.read_csv(self.path_to_test, encoding='utf-8')
print("self train", self.df_train)
self.labels=sorted(self.df_train.Label.unique())
self.label_dict = {self.labels[i]: [j for j in range(len(self.labels))][i] for i in range(len(self.labels))}
print(self.label_dict)
self.df_train = self.df_train.replace({"Label": self.label_dict})
self.df_val = self.df_val.replace({"Label": self.label_dict})
self.df_test = self.df_test.replace({"Label": self.label_dict})
#Get Full Path
self.df_train['FilePath']='full_path/train/' + self.df_train['FileName']
self.df_val['FilePath']=''full_path/eval/' + self.df_val['FileName']
self.df_test['FilePath']='full_pathtest/' + self.df_test['FileName']
#Create Datasets
self.train_dataset = Dataset.from_pandas(self.df_train)
self.val_dataset = Dataset.from_pandas(self.df_val)
self.test_dataset = Dataset.from_pandas(self.df_test)
self.train_dataset = self.train_dataset.map(self.preprocess_data)
self.val_dataset = self.val_dataset.map(self.preprocess_data)
self.test_dataset = self.test_dataset.map(self.preprocess_data)
self.feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(self.pre_trained_model, cache_dir="SSLFineTune/")
self.processor = Wav2Vec2Processor.from_pretrained(self.pre_trained_model, cache_dir="SSLFineTune/")
self.train_dataset = self.train_dataset.map(self.prepare_datasets, remove_columns = ['FilePath',
'SpeakerLabel', 'FileName'], batched=True, batch_size=1)
self.val_dataset = self.val_dataset.map(self.prepare_datasets, remove_columns = ['FilePath', 'SpeakerLabel', 'FileName'], batched=True, batch_size=1)
self.test_dataset = self.test_dataset.map(self.prepare_datasets, remove_columns = ['FilePath', 'SpeakerLabel', 'FileName'], batched=True, batch_size=1)
self.model = AutoModelForAudioClassification.from_pretrained(
self.pre_trained_model,
trust_remote_code=True,
cache_dir="SSLFineTune/",
num_labels = len(self.df_train['Label'].unique()),
)
#Classification head for 8-classes
self.model.classifier=torch.nn.Linear(in_features=256, out_features=8, bias=True)
self.freeze_feature_extractor = False
self.freeze_transformer = False
if self.freeze_feature_extractor:
self.model.freeze_feature_extractor()
if self.freeze_transformer:
self.model.freeze_transformer()
self.args = TrainingArguments(
"SSLFineTune/",
overwrite_output_dir=True,
evaluation_strategy = "epoch",
save_strategy = "epoch",
learning_rate=3e-5,
per_device_train_batch_size=8,
gradient_accumulation_steps=1,
per_device_eval_batch_size=1,
num_train_epochs=10,
warmup_ratio=0.1,
logging_steps=10,
load_best_model_at_end=True,
metric_for_best_model="uar",
push_to_hub=False,
gradient_checkpointing=True,
save_total_limit=5
)
self.trainer = Trainer(
self.model,
self.args,
train_dataset=self.train_dataset,
eval_dataset=self.val_dataset,
tokenizer=self.feature_extractor,
compute_metrics=self.compute_metrics)
self.trainer.train()
self.predictions = self.trainer.predict(self.val_dataset)
print(self.compute_metrics(self.predictions))
def compute_metrics(self, eval_pred):
"""Computes accuracy on a batch of predictions"""
print(evl_pred)
predictions = np.argmax(eval_pred.predictions, axis=1)
recall = self.recall_metric.compute(predictions=predictions, references=eval_pred.label_ids,average="macro")
# f1 = f1_metric.compute(predictions=predictions, references=eval_pred.label_ids, average="macro")
#return {"f1": f1, "spearmanr": spearmanr}
return recall
def preprocess_data(self, audio_example):
"""Adds Audio and Sampling Rate to Panda Frame in another columns"""
audio_example['audio'], audio_example['sampling_rate'] = librosa.load(audio_example["FilePath"], sr=16000)
#print("Single Audio", np.array(audio_example['audio']).shape, audio_example)
#exit(0)
return audio_example
def prepare_datasets(self, audio_example):
audio_data = audio_example['audio']
enc_embeds = self.feature_extractor(audio_data, sampling_rate = self.feature_extractor.sampling_rate)
return enc_embeds
if __name__=="__main__":
initiate = FineTuneSSLModels()
Edit: fixed for me by changing BertModel
to BertForSequenceClassification
.
Iām getting the same error, also following the tutorials in the course, chapter 3: Fine-tuning a model with the Trainer API - Hugging Face Course.
My code:
import evaluate
import numpy as np
from datasets import load_dataset
from transformers import (
BertModel,
BertTokenizerFast,
TrainingArguments,
Trainer,
EvalPrediction,
)
checkpoint = "bert-base-uncased"
model = BertModel.from_pretrained(checkpoint)
tokenizer = BertTokenizerFast.from_pretrained(checkpoint)
raw_ds = load_dataset("glue", "mrpc")
metric = evaluate.load("glue", "mrpc")
dataset = raw_ds.map(
lambda x: tokenizer(x["sentence1"], x["sentence2"], truncation=True),
batched=True,
)
dataset = dataset.remove_columns(["sentence1", "sentence2", "idx"])
dataset = dataset.rename_column("label", "labels")
dataset = dataset.with_format("torch")
trainer_args = TrainingArguments("test-trainer", evaluation_strategy="epoch")
def compute_metrics(eval_preds: EvalPrediction):
x, y = eval_preds
preds = np.argmax(x, -1)
return metric.compute(predictions=preds, references=y)
trainer = Trainer(
model=model,
args=trainer_args,
train_dataset=dataset["train"],
eval_dataset=dataset["validation"],
tokenizer=tokenizer,
compute_metrics=compute_metrics,
)
trainer.train()
I get a similar error when using
# Set DistilBERT tokenizer
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
# Define DistilBERT as our base model:
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=9)
# Use data_collector to convert our samples to PyTorch tensors and concatenate them with the correct amount of padding
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
Specifically, when using
# Define a new Trainer with all the objects we constructed so far
repo_name = "sentiment-model-amazon-reviews-distilbert"
training_args = TrainingArguments(
output_dir=repo_name,
learning_rate=2e-5,
per_device_train_batch_size=16,
per_device_eval_batch_size=16,
num_train_epochs=1,
weight_decay=0.01,
save_strategy="epoch",
push_to_hub=True
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized_train,
eval_dataset=tokenized_cal,
tokenizer=tokenizer,
data_collator=data_collator,
compute_metrics=compute_metrics
)
# Train and push to hub
trainer.train()
returns
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-50-f0acdc25090a> in <module>
24
25 # Train and push to hub
---> 26 trainer.train()
3 frames
/usr/local/lib/python3.8/dist-packages/transformers/trainer.py in train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
1541 self._inner_training_loop, self._train_batch_size, args.auto_find_batch_size
1542 )
-> 1543 return inner_training_loop(
1544 args=args,
1545 resume_from_checkpoint=resume_from_checkpoint,
/usr/local/lib/python3.8/dist-packages/transformers/trainer.py in _inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
1789 tr_loss_step = self.training_step(model, inputs)
1790 else:
-> 1791 tr_loss_step = self.training_step(model, inputs)
1792
1793 if (
/usr/local/lib/python3.8/dist-packages/transformers/trainer.py in training_step(self, model, inputs)
2537
2538 with self.compute_loss_context_manager():
-> 2539 loss = self.compute_loss(model, inputs)
2540
2541 if self.args.n_gpu > 1:
/usr/local/lib/python3.8/dist-packages/transformers/trainer.py in compute_loss(self, model, inputs, return_outputs)
2582 else:
2583 if isinstance(outputs, dict) and "loss" not in outputs:
-> 2584 raise ValueError(
2585 "The model did not return a loss from the inputs, only the following keys: "
2586 f"{','.join(outputs.keys())}. For reference, the inputs it received are {','.join(inputs.keys())}."
ValueError: The model did not return a loss from the inputs, only the following keys: logits. For reference, the inputs it received are input_ids,attention_mask.
Rename columns to text
and labels
for text classification using distilbert-base-uncased
model. It needs to be checked for other domains and models too.
I am trying to fine-tune mT5 for reading comprehension task and I get the same error. Here is my code:
tokenizer = MT5Tokenizer.from_pretrained(model_dir)
def model_init1():
return MT5ForQuestionAnswering.from_pretrained(model_dir)
trainer = Seq2SeqTrainer(
model_init=model_init1,
args=args,
train_dataset=training_set,
eval_dataset=validation_set,
data_collator=data_collator,
tokenizer=tokenizer,
)
# Training the model
trainer.train()
the error:
ValueError: The model did not return a loss from the inputs, only the following keys: start_logits,end_logits,past_key_values,encoder_last_hidden_state. For reference, the inputs it received are input_ids,attention_mask.
could anybody solve the issue?
How do you know which columns to use? I am using a different model but getting same error but with different model. This is my error statement and code
ValueError: The model did not return a loss from the inputs, only the following keys: logits,past_key_values. For reference, the inputs it received are input_ids,attention_mask.
CODE
model_checkpoint = "EleutherAI/pythia-70m-deduped"
tokenizer = tr.AutoTokenizer.from_pretrained(
model_checkpoint,
revision="step3000",
cache_dir=DA.paths.datasets,
)
model = tr.AutoModelForCausalLM.from_pretrained (
model_checkpoint,cache_dir=DA.paths.datasets)
data_collator = tr.DataCollatorWithPadding(tokenizer=tokenizer)
trainer = tr.Trainer(
model,
training_args,
train_dataset=split_dataset["train"],
eval_dataset=split_dataset["test"],
tokenizer=tokenizer,
data_collator=data_collator,
)
trainer.train()
Has anyone definitively solved this error?
I am running into the same problem:
from transformers import BertForQuestionAnswering, BertForTokenClassification, BertForSequenceClassification, pipeline, BertTokenizer, Trainer, TrainingArguments, DistilBertForSequenceClassification, DistilBertTokenizerFast,DataCollatorWithPadding, pipeline
from datasets import load_metric, load_dataset, DatasetDict, Dataset
from sklearn.model_selection import train_test_split
import torch
import pandas as pd
import numpy as np
from datasets import load_metric, load_dataset, Dataset
# load distillbert tokenizer
tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased")
# Explanation:
# ...<keyword>... represents a body of text which includes the keyword which is
# associated with the values in that text; these values represent the label we
# want to 'predict'
data = {
'key': ['1', '2', '3', '4', '5'],
'text': ['...fraction... 30 35', '...fraction... 25 30', '...fraction... 20 30', '...fraction... 30 40', '...fraction... 40 50'],
'val1': ['30', '25', '20', '30', '40'],
'val2': ['35', '30', '30', '40', '50']
}
pdf = pd.DataFrame(data)
features = pdf.drop(columns=["val1", "val2"])
labels = pdf[["val1", "val2"]]
train_df, test_df, train_labels, test_labels = train_test_split(features, labels, test_size=0.2, random_state=42)
features_dataset = Dataset.from_pandas(features)
train_dataset = Dataset.from_pandas(train_pdf)
test_dataset = Dataset.from_pandas(test_pdf)
train_test = DatasetDict({"train": train_dataset,"test": test_dataset})
from transformers import AutoModelForSequenceClassification
model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased")
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
def tokenize_function(dataset):
return tokenizer(dataset["text"], padding="max_length", truncation=True)
tokenized_datasets = features_dataset.map(tokenize_function, batched=True)
from transformers import AutoModelForSequenceClassification
model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased")
from transformers import TrainingArguments
output_path = "/test_trainer"
training_args = TrainingArguments(output_dir=output_path)
import numpy as np
import evaluate
metric = evaluate.load('accuracy')
def compute_metrics(eval_pred):
logits, labels = eval_pred
predictions = np.argmax(logits, axis=-1)
return metric.compute(predictions, references=labels)
from transformers import TrainingArguments, Trainer
training_args = TrainingArguments(output_dir=output_path, evaluation_strategy="epoch")
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=test_dataset,
compute_metrics=compute_metrics
)
trainer.evaluate()
The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: __index_level_0__. If __index_level_0__ are not expected by `BertForSequenceClassification.forward`, you can safely ignore this message.
***** Running Evaluation *****
Num examples = 1
Batch size = 8
Out[27]: {'eval_runtime': 0.6222,
'eval_samples_per_second': 1.607,
'eval_steps_per_second': 1.607}
trainer.train()
The following columns in the training set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: __index_level_0__. If __index_level_0__ are not expected by `BertForSequenceClassification.forward`, you can safely ignore this message.
/databricks/python/lib/python3.9/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning
warnings.warn(
***** Running training *****
Num examples = 4
Num Epochs = 3
Instantaneous batch size per device = 8
Total train batch size (w. parallel, distributed & accumulation) = 8
Gradient Accumulation steps = 1
Total optimization steps = 3
**ValueError: The model did not return a loss from the inputs, only the following keys: logits. For reference, the inputs it received are input_ids,attention_mask.**