HI i am training a hf model “EleutherAI/pythia-70m” using hf dataset “lamini/lamini_docs” and but getting below error.
the code is at kaggle notebook - FInetuning LLM | Kaggle
ValueError Traceback (most recent call last)
Cell In[193], line 1
----> 1 training_output = trainer.train()
File /opt/conda/lib/python3.10/site-packages/transformers/trainer.py:1591, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
1589 hf_hub_utils.enable_progress_bars()
1590 else:
→ 1591 return inner_training_loop(
1592 args=args,
1593 resume_from_checkpoint=resume_from_checkpoint,
1594 trial=trial,
1595 ignore_keys_for_eval=ignore_keys_for_eval,
1596 )
File /opt/conda/lib/python3.10/site-packages/transformers/trainer.py:1870, in Trainer._inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
1867 rng_to_sync = True
1869 step = -1
→ 1870 for step, inputs in enumerate(epoch_iterator):
1871 total_batched_samples += 1
1872 if rng_to_sync:
File /opt/conda/lib/python3.10/site-packages/accelerate/data_loader.py:384, in DataLoaderShard.iter(self)
382 # We iterate one batch ahead to check when we are at the end
383 try:
→ 384 current_batch = next(dataloader_iter)
385 except StopIteration:
386 yield
File /opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py:634, in _BaseDataLoaderIter.next(self)
631 if self._sampler_iter is None:
632 # TODO(Bug in dataloader iterator found by mypy · Issue #76750 · pytorch/pytorch · GitHub)
633 self._reset() # type: ignore[call-arg]
→ 634 data = self._next_data()
635 self._num_yielded += 1
636 if self._dataset_kind == _DatasetKind.Iterable and
637 self._IterableDataset_len_called is not None and
638 self._num_yielded > self._IterableDataset_len_called:
File /opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py:678, in _SingleProcessDataLoaderIter._next_data(self)
676 def _next_data(self):
677 index = self._next_index() # may raise StopIteration
→ 678 data = self._dataset_fetcher.fetch(index) # may raise StopIteration
679 if self._pin_memory:
680 data = _utils.pin_memory.pin_memory(data, self._pin_memory_device)
File /opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py:54, in _MapDatasetFetcher.fetch(self, possibly_batched_index)
52 else:
53 data = self.dataset[possibly_batched_index]
—> 54 return self.collate_fn(data)
File /opt/conda/lib/python3.10/site-packages/transformers/data/data_collator.py:70, in default_data_collator(features, return_tensors)
64 # In this function we’ll make the assumption that all features
in the batch
65 # have the same attributes.
66 # So we will look at the first element as a proxy for what attributes exist
67 # on the whole batch.
69 if return_tensors == “pt”:
—> 70 return torch_default_data_collator(features)
71 elif return_tensors == “tf”:
72 return tf_default_data_collator(features)
File /opt/conda/lib/python3.10/site-packages/transformers/data/data_collator.py:136, in torch_default_data_collator(features)
134 batch[k] = torch.tensor(np.stack([f[k] for f in features]))
135 else:
→ 136 batch[k] = torch.tensor([f[k] for f in features])
138 return batch
ValueError: expected sequence of length 128 at dim 1 (got 68)
i have tried updating all packages (datasets,transformers) still no solution