Hi @darkcurrent,
Thanks, If you want to use the script, there’s a need to change some parts, but I’ll guide you through the notebook, and if it helped you, please contribute to improving the code for general use (repo).
Suppose you have a schema like this, and the emotion is a list of floats/integers.
Dataset({
features: ['path', 'emotion'],
num_rows: xxx
})
Item({
path: "/to/path/idk.wav",
emotion: [1.1, 2.1, 2.4, ..., 1.5]
})
First of all, we need to change the label information:
label_list = train_dataset.unique(output_column)
label_list.sort() # Let's sort it for determinism
num_labels = len(label_list)
print(f"A classification problem with {num_labels} classes: {label_list}")
To this
num_labels = len(train_dataset[0][output_column])
label_list = list(range(nnu_labels))
print(f"A regression problem with {num_labels} items: {label_list}")
is_regression = True
Then, add the problem type to the config and adjust the preprocessing step and the label type in the collator fn.
# config
config = AutoConfig.from_pretrained(
model_name_or_path,
num_labels=num_labels,
label2id={label: i for i, label in enumerate(label_list)},
id2label={i: label for i, label in enumerate(label_list)},
finetuning_task="wav2vec2_clf",
problem_type="regression"
)
setattr(config, 'pooling_mode', pooling_mode)
# preprocess
def speech_file_to_array_fn(path):
speech_array, sampling_rate = torchaudio.load(path)
resampler = torchaudio.transforms.Resample(sampling_rate, target_sampling_rate)
speech = resampler(speech_array).squeeze().numpy()
return speech
def preprocess_function(examples):
speech_list = [speech_file_to_array_fn(path) for path in examples[input_column]]
target_list = [label for label in examples[output_column]] # Do any preprocessing on your float/integer data
result = processor(speech_list, sampling_rate=target_sampling_rate)
result["labels"] = list(target_list)
return result
# collator
from dataclasses import dataclass
from typing import Dict, List, Optional, Union
import torch
import transformers
from transformers import Wav2Vec2Processor
@dataclass
class DataCollatorCTCWithPadding:
processor: Wav2Vec2Processor
padding: Union[bool, str] = True
max_length: Optional[int] = None
max_length_labels: Optional[int] = None
pad_to_multiple_of: Optional[int] = None
pad_to_multiple_of_labels: Optional[int] = None
def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
input_features = [{"input_values": feature["input_values"]} for feature in features]
label_features = [feature["labels"] for feature in features]
# d_type = torch.long if isinstance(label_features[0], int) else torch.float
d_type = torch.float
batch = self.processor.pad(
input_features,
padding=self.padding,
max_length=self.max_length,
pad_to_multiple_of=self.pad_to_multiple_of,
return_tensors="pt",
)
batch["labels"] = torch.tensor(label_features, dtype=d_type)
return batch
If I continue the process, it will be a long reply , so I’ll attach the modified notebook for better intuition.
Google Colab Notebook: Regression Example