So I’m trying to follow an online guide and have used this notebook: Google Colaboratory and here is the csv file I’m using to finetune it: final.csv - Google Drive
However, I get this traceback when executing the training:
Traceback (most recent call last)
<ipython-input-24-523c0d2a27d3> in <module>()
----> 1 main(trn_df, val_df)
10 frames
<ipython-input-18-aa20b6fc78bc> in main(df_trn, df_val)
61 # Training
62 if args.do_train:
---> 63 train_dataset = load_and_cache_examples(args, tokenizer, df_trn, df_val, evaluate=False)
64
65 global_step, tr_loss = train(args, train_dataset, model, tokenizer)
<ipython-input-16-67f62bb60333> in load_and_cache_examples(args, tokenizer, df_trn, df_val, evaluate)
2
3 def load_and_cache_examples(args, tokenizer, df_trn, df_val, evaluate=False):
----> 4 return ConversationDataset(tokenizer, args, df_val if evaluate else df_trn)
5
6
<ipython-input-23-87cf4adc3093> in __init__(self, tokenizer, args, df, block_size)
29 self.examples = []
30 for _, row in df.iterrows():
---> 31 conv = construct_conv(row, tokenizer)
32 self.examples.append(conv)
33
<ipython-input-23-87cf4adc3093> in construct_conv(row, tokenizer, eos)
6 print(row.tolist())
7 print(type((row.tolist())))
----> 8 conv = [tokenizer.encode(x) + [tokenizer.eos_token_id] for x in row.tolist()]
9 conv.reverse()
10 return flatten(list(conv))
<ipython-input-23-87cf4adc3093> in <listcomp>(.0)
6 print(row.tolist())
7 print(type((row.tolist())))
----> 8 conv = [tokenizer.encode(x) + [tokenizer.eos_token_id] for x in row.tolist()]
9 conv.reverse()
10 return flatten(list(conv))
TypeError: TextEncodeInput must be Union[TextInputSequence, Tuple[InputSequence, InputSequence]]
Does anyone know why?