Error in Model.prepare_tf_dataset()

##Data used is " lotusacharya/nepalinewsdataset " from kaggle.
##Code:
%%time

def preprocess_function(rows):
return tokenizer(rows[‘text’],truncation=True,max_length=128,padding=True)

print(“Tokenizing the data”)
tokenized_inputs = data.map(
preprocess_function,
batched=True,
num_proc=2,
remove_columns=data[“train”].column_names,
)

tokenized_inputs[‘train’] = tokenized_inputs[‘train’].add_column(
name=“labels”,column=data[‘train’][‘label’]
)
tokenized_inputs[‘test’] = tokenized_inputs[‘test’].add_column(
name=“labels”,column=data[‘test’][‘label’]
)

tokenized_inputs

from transformers import DataCollatorWithPadding

print(“Initializing Data Collator”)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer,
max_length=256,
return_tensors=“tf”)

print(“Preparing Training and Testing sets to TRAIN the MODEL”)
tf_train_set = model.prepare_tf_dataset(
tokenized_inputs[“train”],
shuffle=True,
batch_size=16,
collate_fn=data_collator,
)

tf_test_set = model.prepare_tf_dataset(
tokenized_inputs[“test”],
shuffle=False,
batch_size=16,
collate_fn=data_collator,
)

##Error:
Preparing Training and Testing sets to TRAIN the MODEL

/usr/local/lib/python3.10/dist-packages/transformers/tokenization_utils_base.py:2395: UserWarning: max_length is ignored when padding=True and there is no truncation strategy. To pad to max length, use padding='max_length'.
warnings.warn(


RuntimeError Traceback (most recent call last)

in <cell line: 2>()
1 print(“Preparing Training and Testing sets to TRAIN the MODEL”)
----> 2 tf_train_set = model.prepare_tf_dataset(
3 tokenized_inputs[“train”],
4 shuffle=True,
5 batch_size=16,

1 frames

/usr/local/lib/python3.10/dist-packages/datasets/arrow_dataset.py in _get_output_signature(dataset, collate_fn, collate_fn_args, cols_to_retain, batch_size, num_test_batches)
289 tf_dtype = tf.string
290 else:
→ 291 raise RuntimeError(
292 f"Unrecognized array dtype {np_arrays[0].dtype}. \n"
293 “Nested types and image/audio types are not supported yet.”

RuntimeError: Unrecognized array dtype object.
Nested types and image/audio types are not supported yet.

I realized I forgot to encode labels and was passing in the text labels…
Noob mistake… all good now
can be closed