I have an issue, loading the Facebook/Bart-base model. I get this error, when I just try to load it into memory:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~/anaconda3/envs/trans/lib/python3.7/tarfile.py in nti(s)
186 s = nts(s, "ascii", "strict")
--> 187 n = int(s.strip() or "0", 8)
188 except ValueError:
ValueError: invalid literal for int() with base 8: 'v2\nq\x03((X'
During handling of the above exception, another exception occurred:
InvalidHeaderError Traceback (most recent call last)
~/anaconda3/envs/trans/lib/python3.7/tarfile.py in next(self)
2288 try:
-> 2289 tarinfo = self.tarinfo.fromtarfile(self)
2290 except EOFHeaderError as e:
~/anaconda3/envs/trans/lib/python3.7/tarfile.py in fromtarfile(cls, tarfile)
1094 buf = tarfile.fileobj.read(BLOCKSIZE)
-> 1095 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
1096 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
~/anaconda3/envs/trans/lib/python3.7/tarfile.py in frombuf(cls, buf, encoding, errors)
1036
-> 1037 chksum = nti(buf[148:156])
1038 if chksum not in calc_chksums(buf):
~/anaconda3/envs/trans/lib/python3.7/tarfile.py in nti(s)
188 except ValueError:
--> 189 raise InvalidHeaderError("invalid header")
190 return n
InvalidHeaderError: invalid header
During handling of the above exception, another exception occurred:
ReadError Traceback (most recent call last)
~/anaconda3/envs/trans/lib/python3.7/site-packages/torch/serialization.py in _load(f, map_location, pickle_module, **pickle_load_args)
594 try:
--> 595 return legacy_load(f)
596 except tarfile.TarError:
~/anaconda3/envs/trans/lib/python3.7/site-packages/torch/serialization.py in legacy_load(f)
505
--> 506 with closing(tarfile.open(fileobj=f, mode='r:', format=tarfile.PAX_FORMAT)) as tar, \
507 mkdtemp() as tmpdir:
~/anaconda3/envs/trans/lib/python3.7/tarfile.py in open(cls, name, mode, fileobj, bufsize, **kwargs)
1592 raise CompressionError("unknown compression type %r" % comptype)
-> 1593 return func(name, filemode, fileobj, **kwargs)
1594
~/anaconda3/envs/trans/lib/python3.7/tarfile.py in taropen(cls, name, mode, fileobj, **kwargs)
1622 raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
-> 1623 return cls(name, mode, fileobj, **kwargs)
1624
~/anaconda3/envs/trans/lib/python3.7/tarfile.py in __init__(self, name, mode, fileobj, format, tarinfo, dereference, ignore_zeros, encoding, errors, pax_headers, debug, errorlevel, copybufsize)
1485 self.firstmember = None
-> 1486 self.firstmember = self.next()
1487
~/anaconda3/envs/trans/lib/python3.7/tarfile.py in next(self)
2300 elif self.offset == 0:
-> 2301 raise ReadError(str(e))
2302 except EmptyHeaderError:
ReadError: invalid header
During handling of the above exception, another exception occurred:
RuntimeError Traceback (most recent call last)
~/anaconda3/envs/trans/lib/python3.7/site-packages/transformers/modeling_utils.py in from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
1330 try:
-> 1331 state_dict = torch.load(resolved_archive_file, map_location="cpu")
1332 except Exception as e:
~/anaconda3/envs/trans/lib/python3.7/site-packages/torch/serialization.py in load(f, map_location, pickle_module, **pickle_load_args)
425 pickle_load_args['encoding'] = 'utf-8'
--> 426 return _load(f, map_location, pickle_module, **pickle_load_args)
427 finally:
~/anaconda3/envs/trans/lib/python3.7/site-packages/torch/serialization.py in _load(f, map_location, pickle_module, **pickle_load_args)
598 # .zip is used for torch.jit.save and will throw an un-pickling error here
--> 599 raise RuntimeError("{} is a zip archive (did you mean to use torch.jit.load()?)".format(f.name))
600 # if not a tarfile, reset file offset and proceed
RuntimeError: /home/zzz/.cache/huggingface/transformers/486355ec722ef05fd480e999d4c763be56549ae930f6a3742ee721a5d2a05647.f2f355ad2775769afc60592b43a46d72ca548375e3a1d65f381a751e711cbadd is a zip archive (did you mean to use torch.jit.load()?)
During handling of the above exception, another exception occurred:
UnicodeDecodeError Traceback (most recent call last)
~/anaconda3/envs/trans/lib/python3.7/site-packages/transformers/modeling_utils.py in from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
1334 with open(resolved_archive_file) as f:
-> 1335 if f.read().startswith("version"):
1336 raise OSError(
~/anaconda3/envs/trans/lib/python3.7/codecs.py in decode(self, input, final)
321 data = self.buffer + input
--> 322 (result, consumed) = self._buffer_decode(data, self.errors, final)
323 # keep undecoded input until the next call
UnicodeDecodeError: 'utf-8' codec can't decode byte 0x80 in position 64: invalid start byte
During handling of the above exception, another exception occurred:
OSError Traceback (most recent call last)
/tmp/ipykernel_2163270/897663684.py in <module>
2
3
----> 4 model = AutoModel.from_pretrained('facebook/bart-base')
5 tokenizer = BartTokenizer.from_pretrained('facebook/bart-base')
6
~/anaconda3/envs/trans/lib/python3.7/site-packages/transformers/models/auto/auto_factory.py in from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
417 elif type(config) in cls._model_mapping.keys():
418 model_class = _get_model_class(config, cls._model_mapping)
--> 419 return model_class.from_pretrained(pretrained_model_name_or_path, *model_args, config=config, **kwargs)
420 raise ValueError(
421 f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n"
~/anaconda3/envs/trans/lib/python3.7/site-packages/transformers/modeling_utils.py in from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
1343 except (UnicodeDecodeError, ValueError):
1344 raise OSError(
-> 1345 f"Unable to load weights from pytorch checkpoint file for '{pretrained_model_name_or_path}' "
1346 f"at '{resolved_archive_file}'"
1347 "If you tried to load a PyTorch model from a TF 2.0 checkpoint, please set from_tf=True. "
OSError: Unable to load weights from pytorch checkpoint file for 'facebook/bart-base' at '/home/amrolla/.cache/huggingface/transformers/486355ec722ef05fd480e999d4c763be56549ae930f6a3742ee721a5d2a05647.f2f355ad2775769afc60592b43a46d72ca548375e3a1d65f381a751e711cbadd'If you tried to load a PyTorch model from a TF 2.0 checkpoint, please set from_tf=True.
It start downloading the model but after it completes, it raise this error. So, may I know how is the problem here?
transformers 4.11.1
python 3.7.11
pytorch-cpu 1.3.1