from transformers import TrainingArguments, Trainer
import numpy as np
import evaluate
training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch")
metric = evaluate.load("accuracy")
def compute_metrics(eval_pred):
logits, labels = eval_pred
predictions = np.argmax(logits, axis=-1)
return metric.compute(predictions=predictions, references=labels)
def train(model, train, eval, **kwargs):
print('Training model...')
trainer = Trainer(
model=model,
train_dataset=train, #Dataset to train it with
eval_dataset=eval, #Dataset to test it with
compute_metrics=compute_metrics,
**kwargs
)
trainer.train()
trainer.save_model('adkai')
print('Trained!')
model.train(True) #Puts it in training mode.
train.train(model, {
'#print Hello World':'stdout.write("Hello World\n")',
'#print hello World':'stdout.write("hello World\n")',
'# print Hello world':'stdout.write("Hello world\n")',
'#print hello world':'stdout.write("hello world\n")',
'#print Hello World!':'stdout.write("Hello World!\n")',
'# print hello World!':'stdout.write("hello World!\n")',
'#print goodbye World!':'stdout.write("goodbye World!\n")',
'# write Hello World':'stdout.write("Hello World\n")',
'#write hello World':'stdout.write("hello World\n")',
'# write Hello world':'stdout.write("Hello world\n")',
'#write hello world':'stdout.write("hello world\n")',
'# write Hello World!':'stdout.write("Hello World!\n")',
'set x = 5\n#print x':'stdout.write(x, "\n")',
'set x = "Go home"\n#output x':'stdout.write(x, "\n")',
'set xyz = "Hello"# output xyz':'stdout.write(xyz, "\n")',
'set Whatever = "nothing"\n#output Whatever':'stdout.write(Whatever, "\n")',
'#output Whatever':'stdout.write("Whatever\n")',
'':'',
'':''
}, {
'#write Hello world!':'stdout.write("Hello world!\n")',
'':'',
'# output Hello World!':'stdout.write("Hello World!\n")',
})
This is not the full code, but this is most of it and its really all you need to understand my problem. I need help, here’s the error:
Full code can be found here: https://replit.com/@hg0428/AdkAI#main.py
/home/runner/AdkAI/venv/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning
warnings.warn(
***** Running training *****
Num examples = 18
Num Epochs = 3
Instantaneous batch size per device = 8
Total train batch size (w. parallel, distributed & accumulation) = 8
Gradient Accumulation steps = 1
Total optimization steps = 9
0%| | 0/9 [00:00<?, ?it/s]Traceback (most recent call last):
File "main.py", line 18, in <module>
train.train(model, {
File "/home/runner/AdkAI/train.py", line 23, in train
trainer.train()
File "/home/runner/AdkAI/venv/lib/python3.8/site-packages/transformers/trainer.py", line 1500, in train
return inner_training_loop(
File "/home/runner/AdkAI/venv/lib/python3.8/site-packages/transformers/trainer.py", line 1716, in _inner_training_loop
for step, inputs in enumerate(epoch_iterator):
File "/home/runner/AdkAI/venv/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 681, in __next__
data = self._next_data()
File "/home/runner/AdkAI/venv/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 721, in _next_data
data = self._dataset_fetcher.fetch(index) # may raise StopIteration
File "/home/runner/AdkAI/venv/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 49, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/home/runner/AdkAI/venv/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 49, in <listcomp>
data = [self.dataset[idx] for idx in possibly_batched_index]
KeyError: 2
I really need help here.
What should the training and testing data look like? Did I get the format right?
I was doing input:output