Hi,
I had an error while finetuning distilbert model.Screen shot is given
Screenshot of Code is(except data preprocessing):
import pandas as pd
import numpy as np
import seaborn as sns
import transformers
from transformers import AutoTokenizer,TFBertModel,TFDistilBertModel, DistilBertConfig
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
d_bert = TFDistilBertModel.from_pretrained('distilbert-base-uncased')
# In[72]:
bert = TFBertModel.from_pretrained('bert-base-uncased')
# In[73]:
df_train= X_train.replace("[^0-9a-zA-Z]", " ", regex = True)
df_test = X_test.replace("[^0-9a-zA-Z]", " ", regex = True)
X_train_list = list(df_train['Message'])
X_test_list = list(df_test['Message'])
Y_train_list= list(Y_train)
Y_test_list= list(Y_test)
# In[91]:
# print(X_test_list)
# In[75]:
from transformers import DistilBertTokenizerFast
tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')
# In[76]:
train_encodings = tokenizer(X_train_list, truncation= True, padding = True)
test_encodings = tokenizer(X_test_list, truncation= True, padding = True)
# In[92]:
# train_encodings
# In[78]:
import tensorflow as tf
train_dataset_sl = tf.data.Dataset.from_tensor_slices((dict(train_encodings), Y_train_list))
test_dataset_sl = tf.data.Dataset.from_tensor_slices((dict(test_encodings), Y_test_list))
# In[79]:
print(train_dataset_sl)
# In[86]:
from transformers import TFDistilBertForSequenceClassification, TFTrainer, TFTrainingArguments
training_args = TFTrainingArguments(
output_dir= './results',
num_train_epochs=2,
per_device_train_batch_size=8,
per_device_eval_batch_size=16,
warmup_steps=500,
weight_decay=0.01,
logging_dir='./logs',
logging_steps= 10)
# In[87]:
with training_args.strategy.scope():
model = TFDistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=6)
trainer= TFTrainer(
model = model,
args= training_args,
train_dataset=train_dataset_sl,
eval_dataset= test_dataset_sl)
trainer.train()
Number of labels in dataset= 6 (0 to 5)
Can anybody help me out to resolve the issue.
Thanks in advance.