My purpose is to classify my input text based on an intent using Bert classifier. For example, check out this content of my csv file.
text,intent
"I need to transcribe my video","transcribe video"
"Can you convert this to text?","transcribe video"
"transcription","transcribe video"
"transcribe","transcribe video"
"extract the whole text from the video","transcribe video"
"Translate this video","translate video"
"translate","translate video"
"turn the speech of this video into French","translate video"
"Translate this video to French","translate video"
"extract the audio","extract audio"
"extract audio","extract audio"
"I need the audio from this video","extract audio"
"get me the audio from this video","extract audio"
"audio from this video","extract audio"
"audio","extract audio"
"I need the .wav file","extract audio"
"convert to audio","extract audio"
"to mp3","extract audio"
".mp3","extract audio"
"to .wav","extract audio"
".wav","extract audio"
"I need to get the subtitles from this video","get subtitles"
"extract subtitles from this video","get subtitles"
"get me the captions","get subtitles"
"get me the subtitles in an srt file","get subtitles"
".srt","get subtitles"
"I need the captions in vtt format","get subtitles"
".vtt","get subtitles"
"subtitles","get subtitles"
"captions","get subtitles"
"How can I get subtitles from this?","get subtitles"
"Blur the face in this video","blur face"
"Upscale my video to 4K","upscale video"
"Make a gif from this clip","make gif"
"Create a clip from this video","create clip"
Here is my code for the training.
from datasets import Dataset
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, DataCollatorWithPadding, BertForSequenceClassification, Trainer, TrainingArguments
# Load the dataset from CSV
df = pd.read_csv("intent.csv")
# Split the dataset into training and validation sets
train_df, val_df = train_test_split(df, test_size=0.2)
# Convert dataframes to Hugging Face datasets
train_dataset = Dataset.from_pandas(train_df)
val_dataset = Dataset.from_pandas(val_df)
# Ensure the intent column is correctly mapped to numeric labels
intent_labels = df['intent'].unique()
label2id = {label: idx for idx, label in enumerate(intent_labels)}
id2label = {idx: label for idx, label in enumerate(intent_labels)}
train_dataset = train_dataset.map(lambda x: {'label': label2id[x['intent']]})
val_dataset = val_dataset.map(lambda x: {'label': label2id[x['intent']]})
# Load the tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
# Tokenize the dataset
def tokenize_function(examples):
return tokenizer(examples['text'],
padding="max_length",
max_length=512,
truncation=True)
train_dataset = train_dataset.map(tokenize_function, batched=True)
val_dataset = val_dataset.map(tokenize_function, batched=True)
# Data collator to handle dynamic padding
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
# Load the model and configure for sequence classification
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', id2label=id2label, label2id=label2id, num_labels=len(intent_labels))
# Training arguments
training_args = TrainingArguments(
output_dir="./results",
evaluation_strategy="epoch",
learning_rate=2e-5,
per_device_train_batch_size=16,
per_device_eval_batch_size=16,
num_train_epochs=3,
weight_decay=0.01,
)
# Trainer
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=val_dataset,
tokenizer=tokenizer,
data_collator=data_collator,
)
# Train the model
trainer.train()
# Evaluate the model
trainer.evaluate()
# Save the model
model.save_pretrained("./fine-tuned-model")
tokenizer.save_pretrained("./fine-tuned-model")
And here is the code for testing
from transformers import pipeline
from transformers import BertTokenizer, BertForSequenceClassification
# Load the fine-tuned model and tokenizer
model = BertForSequenceClassification.from_pretrained('./fine-tuned-model')
tokenizer = BertTokenizer.from_pretrained('./fine-tuned-model')
classifier = pipeline('text-classification', model=model, tokenizer=tokenizer)
# Test the NLU system with new inputs
user_input = "make a gif"
result = classifier(user_input)
print(result)
For example, if I say “make a gif” in my input text, I get the following:
[{‘label’: ‘upscale video’, ‘score’: 0.1773073822259903}]
Instead, I should have gotten “make gif”, something like:
[{‘label’: ‘make gif’, ‘score’: 0.934345434534535}]
Or if I enter “transcribe”, I get the following
[{‘label’: ‘get subtitles’, ‘score’: 0.17158956825733185}]
Instead, I should have gotten “transcribe video”, something like:
[{‘label’: ‘transcribe video’, ‘score’: 0.954646456457744535}]
The model is very inconsistent. I am wondering where I am doing anything wrong. I would appreciate your help.