To add on, in the next subchapter ( " A full training"),
in the “Prepare for training” section,
When I run the below code as specified in the course,
for batch in train_dataloader:
break
{k: v.shape for k, v in batch.items()}
I am getting a similar error
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[9], line 1
----> 1 for batch in train_dataloader:
2 break
3 {k: v.shape for k, v in batch.items()}
Summary
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[9], line 1
----> 1 for batch in train_dataloader:
2 break
3 {k: v.shape for k, v in batch.items()}
File ~/Desktop/Hugging Faces/transformers-course/.env/lib/python3.12/site-packages/torch/utils/data/dataloader.py:631, in _BaseDataLoaderIter.__next__(self)
628 if self._sampler_iter is None:
629 # TODO(https://github.com/pytorch/pytorch/issues/76750)
630 self._reset() # type: ignore[call-arg]
--> 631 data = self._next_data()
632 self._num_yielded += 1
633 if self._dataset_kind == _DatasetKind.Iterable and \
634 self._IterableDataset_len_called is not None and \
635 self._num_yielded > self._IterableDataset_len_called:
File ~/Desktop/Hugging Faces/transformers-course/.env/lib/python3.12/site-packages/torch/utils/data/dataloader.py:675, in _SingleProcessDataLoaderIter._next_data(self)
673 def _next_data(self):
674 index = self._next_index() # may raise StopIteration
--> 675 data = self._dataset_fetcher.fetch(index) # may raise StopIteration
676 if self._pin_memory:
677 data = _utils.pin_memory.pin_memory(data, self._pin_memory_device)
File ~/Desktop/Hugging Faces/transformers-course/.env/lib/python3.12/site-packages/torch/utils/data/_utils/fetch.py:49, in _MapDatasetFetcher.fetch(self, possibly_batched_index)
47 if self.auto_collation:
48 if hasattr(self.dataset, "__getitems__") and self.dataset.__getitems__:
---> 49 data = self.dataset.__getitems__(possibly_batched_index)
50 else:
51 data = [self.dataset[idx] for idx in possibly_batched_index]
File ~/Desktop/Hugging Faces/transformers-course/.env/lib/python3.12/site-packages/datasets/arrow_dataset.py:2870, in Dataset.__getitems__(self, keys)
2868 def __getitems__(self, keys: List) -> List:
2869 """Can be used to get a batch using a list of integers indices."""
-> 2870 batch = self.__getitem__(keys)
2871 n_examples = len(batch[next(iter(batch))])
2872 return [{col: array[i] for col, array in batch.items()} for i in range(n_examples)]
File ~/Desktop/Hugging Faces/transformers-course/.env/lib/python3.12/site-packages/datasets/arrow_dataset.py:2866, in Dataset.__getitem__(self, key)
2864 def __getitem__(self, key): # noqa: F811
2865 """Can be used to index columns (by string names) or rows (by integer index or iterable of indices or bools)."""
-> 2866 return self._getitem(key)
File ~/Desktop/Hugging Faces/transformers-course/.env/lib/python3.12/site-packages/datasets/arrow_dataset.py:2851, in Dataset._getitem(self, key, **kwargs)
2849 formatter = get_formatter(format_type, features=self._info.features, **format_kwargs)
2850 pa_subtable = query_table(self._data, key, indices=self._indices)
-> 2851 formatted_output = format_table(
2852 pa_subtable, key, formatter=formatter, format_columns=format_columns, output_all_columns=output_all_columns
2853 )
2854 return formatted_output
File ~/Desktop/Hugging Faces/transformers-course/.env/lib/python3.12/site-packages/datasets/formatting/formatting.py:633, in format_table(table, key, formatter, format_columns, output_all_columns)
631 python_formatter = PythonFormatter(features=formatter.features)
632 if format_columns is None:
--> 633 return formatter(pa_table, query_type=query_type)
634 elif query_type == "column":
635 if key in format_columns:
File ~/Desktop/Hugging Faces/transformers-course/.env/lib/python3.12/site-packages/datasets/formatting/formatting.py:401, in Formatter.__call__(self, pa_table, query_type)
399 return self.format_column(pa_table)
400 elif query_type == "batch":
--> 401 return self.format_batch(pa_table)
File ~/Desktop/Hugging Faces/transformers-course/.env/lib/python3.12/site-packages/datasets/formatting/torch_formatter.py:110, in TorchFormatter.format_batch(self, pa_table)
109 def format_batch(self, pa_table: pa.Table) -> Mapping:
--> 110 batch = self.numpy_arrow_extractor().extract_batch(pa_table)
111 batch = self.python_features_decoder.decode_batch(batch)
112 batch = self.recursive_tensorize(batch)
File ~/Desktop/Hugging Faces/transformers-course/.env/lib/python3.12/site-packages/datasets/formatting/formatting.py:165, in NumpyArrowExtractor.extract_batch(self, pa_table)
164 def extract_batch(self, pa_table: pa.Table) -> dict:
--> 165 return {col: self._arrow_array_to_numpy(pa_table[col]) for col in pa_table.column_names}
File ~/Desktop/Hugging Faces/transformers-course/.env/lib/python3.12/site-packages/datasets/formatting/formatting.py:197, in NumpyArrowExtractor._arrow_array_to_numpy(self, pa_array)
191 if any(
192 (isinstance(x, np.ndarray) and (x.dtype == object or x.shape != array[0].shape))
193 or (isinstance(x, float) and np.isnan(x))
194 for x in array
195 ):
196 return np.array(array, copy=False, dtype=object)
--> 197 return np.array(array, copy=False)
ValueError: Unable to avoid copy while creating an array as requested.
If using `np.array(obj, copy=False)` replace it with `np.asarray(obj)` to allow a copy when needed (no behavior change in NumPy 1.x).
For more details, see https://numpy.org/devdocs/numpy_2_0_migration_guide.html#adapting-to-changes-in-the-copy-keyword.
ValueError: Unable to avoid copy while creating an array as requested.
If using `np.array(obj, copy=False)` replace it with `np.asarray(obj)` to allow a copy when needed (no behavior change in NumPy 1.x).
For more details, see https://numpy.org/devdocs/numpy_2_0_migration_guide.html#adapting-to-changes-in-the-copy-keyword.
Do you know any solutions to this issue? I can’t seem to proceed with building my own fine-tuned models if I’m not able to run this or evaluate the model’s accuracy.