I am also getting this error.
This is the log from Kaggle notebook
AttributeError Traceback (most recent call last)
Cell In[98], line 2
1 print(âPreparing Training and Testing sets to TRAIN the MODELâ)
----> 2 tf_train_set = model.prepare_tf_dataset(
3 tokenized_inputs[âtrainâ],
4 shuffle=True,
5 batch_size=16,
6 collate_fn=data_collator,
7 )
9 tf_test_set = model.prepare_tf_dataset(
10 tokenized_inputs[âtestâ],
11 shuffle=False,
12 batch_size=16,
13 collate_fn=data_collator,
14 )
File /opt/conda/lib/python3.10/site-packages/transformers/modeling_tf_utils.py:1488, in TFPreTrainedModel.prepare_tf_dataset(self, dataset, batch_size, shuffle, tokenizer, collate_fn, collate_fn_args, drop_remainder, prefetch)
1486 if drop_remainder is None:
1487 drop_remainder = shuffle
â 1488 tf_dataset = dataset.to_tf_dataset(
1489 columns=feature_cols,
1490 label_cols=label_cols,
1491 batch_size=batch_size,
1492 shuffle=shuffle,
1493 drop_remainder=drop_remainder,
1494 collate_fn=collate_fn,
1495 collate_fn_args=collate_fn_args,
1496 prefetch=prefetch,
1497 )
1498 return tf_dataset
File /opt/conda/lib/python3.10/site-packages/datasets/arrow_dataset.py:381, in TensorflowDatasetMixin.to_tf_dataset(self, columns, batch_size, shuffle, collate_fn, drop_remainder, collate_fn_args, label_cols, dummy_labels, prefetch)
378 retained_columns = [key for key in self.features.keys() if key in cols_to_retain]
379 dataset = self.with_format(ânumpyâ, columns=retained_columns)
â 381 columns_to_dtypes, output_signature = self._get_output_signature(
382 dataset, collate_fn, collate_fn_args, batch_size=batch_size if drop_remainder else None
383 )
384 all_columns = list(columns_to_dtypes.keys())
385 all_dtypes = list(columns_to_dtypes.values())
File /opt/conda/lib/python3.10/site-packages/datasets/arrow_dataset.py:244, in TensorflowDatasetMixin._get_output_signature(dataset, collate_fn, collate_fn_args, batch_size)
242 raise ValueError(âUnable to get the output signature because the dataset is empty.â)
243 test_batch_size = min(len(dataset), 4)
â 244 test_batch = dataset[:test_batch_size]
245 test_batch = [{key: value[i] for key, value in test_batch.items()} for i in range(test_batch_size)]
246 test_batch = collate_fn(test_batch, **collate_fn_args)
File /opt/conda/lib/python3.10/site-packages/datasets/arrow_dataset.py:1764, in Dataset.getitem(self, key)
1762 def getitem(self, key): # noqa: F811
1763 âââCan be used to index columns (by string names) or rows (by integer index or iterable of indices or bools).âââ
â 1764 return self._getitem(
1765 key,
1766 )
File /opt/conda/lib/python3.10/site-packages/datasets/arrow_dataset.py:1749, in Dataset._getitem(self, key, decoded, **kwargs)
1747 formatter = get_formatter(format_type, features=self.features, decoded=decoded, **format_kwargs)
1748 pa_subtable = query_table(self._data, key, indices=self._indices if self._indices is not None else None)
â 1749 formatted_output = format_table(
1750 pa_subtable, key, formatter=formatter, format_columns=format_columns, output_all_columns=output_all_columns
1751 )
1752 return formatted_output
File /opt/conda/lib/python3.10/site-packages/datasets/formatting/formatting.py:540, in format_table(table, key, formatter, format_columns, output_all_columns)
538 else:
539 pa_table_to_format = pa_table.drop(col for col in pa_table.column_names if col not in format_columns)
â 540 formatted_output = formatter(pa_table_to_format, query_type=query_type)
541 if output_all_columns:
542 if isinstance(formatted_output, MutableMapping):
File /opt/conda/lib/python3.10/site-packages/datasets/formatting/formatting.py:285, in Formatter.call(self, pa_table, query_type)
283 return self.format_column(pa_table)
284 elif query_type == âbatchâ:
â 285 return self.format_batch(pa_table)
File /opt/conda/lib/python3.10/site-packages/datasets/formatting/formatting.py:346, in NumpyFormatter.format_batch(self, pa_table)
345 def format_batch(self, pa_table: pa.Table) â dict:
â 346 batch = self.numpy_arrow_extractor(**self.np_array_kwargs).extract_batch(pa_table)
347 if self.decoded:
348 batch = self.python_features_decoder.decode_batch(batch)
File /opt/conda/lib/python3.10/site-packages/datasets/formatting/formatting.py:160, in NumpyArrowExtractor.extract_batch(self, pa_table)
159 def extract_batch(self, pa_table: pa.Table) â dict:
â 160 return {col: self._arrow_array_to_numpy(pa_table[col]) for col in pa_table.column_names}
File /opt/conda/lib/python3.10/site-packages/datasets/formatting/formatting.py:160, in (.0)
159 def extract_batch(self, pa_table: pa.Table) â dict:
â 160 return {col: self._arrow_array_to_numpy(pa_table[col]) for col in pa_table.column_names}
File /opt/conda/lib/python3.10/site-packages/datasets/formatting/formatting.py:196, in NumpyArrowExtractor._arrow_array_to_numpy(self, pa_array)
194 array: List = pa_array.to_numpy(zero_copy_only=zero_copy_only).tolist()
195 if len(array) > 0:
â 196 if any(
197 (isinstance(x, np.ndarray) and (x.dtype == np.object or x.shape != array[0].shape))
198 or (isinstance(x, float) and np.isnan(x))
199 for x in array
200 ):
201 return np.array(array, copy=False, **{**self.np_array_kwargs, âdtypeâ: np.object})
202 return np.array(array, copy=False, **self.np_array_kwargs)
File /opt/conda/lib/python3.10/site-packages/datasets/formatting/formatting.py:197, in (.0)
194 array: List = pa_array.to_numpy(zero_copy_only=zero_copy_only).tolist()
195 if len(array) > 0:
196 if any(
â 197 (isinstance(x, np.ndarray) and (x.dtype == np.object or x.shape != array[0].shape))
198 or (isinstance(x, float) and np.isnan(x))
199 for x in array
200 ):
201 return np.array(array, copy=False, **{**self.np_array_kwargs, âdtypeâ: np.object})
202 return np.array(array, copy=False, **self.np_array_kwargs)
File /opt/conda/lib/python3.10/site-packages/numpy/init.py:305, in getattr(attr)
300 warnings.warn(
301 f"In the future np.{attr}
will be defined as the "
302 âcorresponding NumPy scalar.â, FutureWarning, stacklevel=2)
304 if attr in former_attrs:
â 305 raise AttributeError(former_attrs[attr])
307 # Importing Tester requires importing all of UnitTest which is not a
308 # cheap import Since it is mainly used in test suits, we lazy import it
309 # here to save on the order of 10 ms of import time for most users
310 #
311 # The previous way Tester was imported also had a side effect of adding
312 # the full numpy.testing
namespace
313 if attr == âtestingâ:
AttributeError: module ânumpyâ has no attribute âobjectâ.
np.object
was a deprecated alias for the builtin object
. To avoid this error in existing code, use object
by itself. Doing this will not modify any behavior and is safe.
The aliases was originally deprecated in NumPy 1.20; for more details and guidance see the original release note at:
NumPy 1.20.0 Release Notes â NumPy v2.0.dev0 Manual
@sgugger @amyeroberts