I’ve tried with different dill versions but this error still persists. Any leads will be helpful.
my code:
train_datasetss = datasets_train_test.map(
process_data_to_model_inputs,
batched=True,
batch_size=batch_size,
remove_columns=["article", "abstract"],
)
stack trace:
--------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-32-5cc00e86d874> in <module>
----> 1 train_datasetss = datasets_train_test.map(
2 process_data_to_model_inputs,
3 batched=True,
4 batch_size=batch_size,
5 remove_columns=["article", "abstract"],
15 frames
/usr/local/lib/python3.9/dist-packages/datasets/dataset_dict.py in map(self, function, with_indices, input_columns, batched, batch_size, remove_columns, keep_in_memory, load_from_cache_file, cache_file_names, writer_batch_size, features, disable_nullable, fn_kwargs, num_proc)
284 cache_file_names = {k: None for k in self}
285 return DatasetDict(
--> 286 {
287 k: dataset.map(
288 function=function,
/usr/local/lib/python3.9/dist-packages/datasets/dataset_dict.py in <dictcomp>(.0)
285 return DatasetDict(
286 {
--> 287 k: dataset.map(
288 function=function,
289 with_indices=with_indices,
/usr/local/lib/python3.9/dist-packages/datasets/arrow_dataset.py in map(self, function, with_indices, input_columns, batched, batch_size, drop_last_batch, remove_columns, keep_in_memory, load_from_cache_file, cache_file_name, writer_batch_size, features, disable_nullable, fn_kwargs, num_proc, suffix_template, new_fingerprint)
1242
1243 if num_proc is None or num_proc == 1:
-> 1244 return self._map_single(
1245 function=function,
1246 with_indices=with_indices,
/usr/local/lib/python3.9/dist-packages/datasets/arrow_dataset.py in wrapper(*args, **kwargs)
155 }
156 # apply actual function
--> 157 out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
158 datasets: List["Dataset"] = list(out.values()) if isinstance(out, dict) else [out]
159 # re-apply format to the output
/usr/local/lib/python3.9/dist-packages/datasets/fingerprint.py in wrapper(*args, **kwargs)
155 if kwargs.get(fingerprint_name) is None:
156 kwargs_for_fingerprint["fingerprint_name"] = fingerprint_name
--> 157 kwargs[fingerprint_name] = update_fingerprint(
158 self._fingerprint, transform, kwargs_for_fingerprint
159 )
/usr/local/lib/python3.9/dist-packages/datasets/fingerprint.py in update_fingerprint(fingerprint, transform, transform_args)
103 for key in sorted(transform_args):
104 hasher.update(key)
--> 105 hasher.update(transform_args[key])
106 return hasher.hexdigest()
107
/usr/local/lib/python3.9/dist-packages/datasets/fingerprint.py in update(self, value)
55 def update(self, value):
56 self.m.update(f"=={type(value)}==".encode("utf8"))
---> 57 self.m.update(self.hash(value).encode("utf-8"))
58
59 def hexdigest(self):
/usr/local/lib/python3.9/dist-packages/datasets/fingerprint.py in hash(cls, value)
51 return cls.dispatch[type(value)](cls, value)
52 else:
---> 53 return cls.hash_default(value)
54
55 def update(self, value):
/usr/local/lib/python3.9/dist-packages/datasets/fingerprint.py in hash_default(cls, value)
44 @classmethod
45 def hash_default(cls, value):
---> 46 return cls.hash_bytes(dumps(value))
47
48 @classmethod
/usr/local/lib/python3.9/dist-packages/datasets/utils/py_utils.py in dumps(obj)
387 file = StringIO()
388 with _no_cache_fields(obj):
--> 389 dump(obj, file)
390 return file.getvalue()
391
/usr/local/lib/python3.9/dist-packages/datasets/utils/py_utils.py in dump(obj, file)
359 def dump(obj, file):
360 """pickle an object to a file"""
--> 361 Pickler(file, recurse=True).dump(obj)
362 return
363
/usr/local/lib/python3.9/dist-packages/dill/_dill.py in dump(self, obj)
392 f = filename
393 else:
--> 394 f = open(filename, 'wb')
395 try:
396 if byref:
/usr/lib/python3.9/pickle.py in dump(self, obj)
485 if self.proto >= 4:
486 self.framer.start_framing()
--> 487 self.save(obj)
488 self.write(STOP)
489 self.framer.end_framing()
/usr/local/lib/python3.9/dist-packages/dill/_dill.py in save(self, obj, save_persistent_id)
386 def dump_session(filename='/tmp/session.pkl', main=None, byref=False, **kwds):
387 """pickle the current state of __main__ to a file"""
--> 388 from .settings import settings
389 protocol = settings['protocol']
390 if main is None: main = _main_module
/usr/lib/python3.9/pickle.py in save(self, obj, save_persistent_id)
558 f = self.dispatch.get(t)
559 if f is not None:
--> 560 f(self, obj) # Call unbound method with explicit self
561 return
562
/usr/local/lib/python3.9/dist-packages/datasets/utils/py_utils.py in save_function(pickler, obj)
524 """
525 if not dill._dill._locate_function(obj):
--> 526 dill._dill.log.info("F1: %s" % obj)
527 if getattr(pickler, "_recurse", False):
528 # recurse to get all globals referred to by obj
AttributeError: module 'dill._dill' has no attribute 'log'