)
columns = [k for k in signature_columns if k in dataset.column_names]
if len(columns) == 0:
raise ValueError(
"No columns in the dataset match the model's forward method signature. "
f"The following columns have been ignored: [{', '.join(ignored_columns)}]. "
"Please check the dataset and model. You may need to set `remove_unused_columns=False` in `TrainingArguments`."
)
if version.parse(datasets.__version__) < version.parse("1.4.0"):
dataset.set_format(
type=dataset.format["type"], columns=columns, format_kwargs=dataset.format["format_kwargs"]
)
return dataset
else:
return dataset.remove_columns(ignored_columns)
def _get_collator_with_removed_columns(
self, data_collator: Callable, description: Optional[str] = None
) -> Callable:
I’ve read the source and followed the function flow, and I think there is a possible workaround. If it can be worked around, it’s a bug in the library. If you can’t work around it, then there is a real problem with the processing of the dataset.
pip install datasets==1.4.0