Hello everyone.
Here is my problem, (I wish someone can help me, I try so hard in vain to resolve it T.T) :
I use transformers 4.2.1 lib, and I am in a context where I only can use it in offline mode (no internet).
I want to use the bart-large-mnli model so I upload it on a specific server and I download the model with the following link :
Then I try to use from_pretrained method like this :
tokenizer = BartTokenizerFast.from_pretrained(‘/appli/pretrainedModel/bart-large-mnli’)
or like this :
tokenizer = AutoTokenizer.from_pretrained(‘/appli/pretrainedModel/bart-large-mnli’)
But every time I do this I get the following error (more detailed log at the end of my post, I truncate the last line with […] to replace all the content of merges.txt) :
“TypeError: Can’t convert [(‘Ä’, ‘t’), (‘Ä’, ‘a’), (‘h’, ‘e’), […] ] (list) to Union[Merges, Filename]”
I definitively don’t know what’s going wrong with merges.txt but it seems like there is a problem…
The content of /appli/pretrainedModel/bart-large-mnli is :
config.json
merges.txt
pytorch_model.bin
rust_model.ot
tokenizer_config.json
vocab.json
Does someone have any idea where is the problem ?
Thanks in advance.
More detailed error log :
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
in
----> 1 tokenizer = AutoTokenizer.from_pretrained(‘/appli/pretrainedModel/bart-large-mnli’)
2 pipeline(‘zero-shot-classification’, model=model, tokenizer=tokenizer)
/appli/.conda/envs/bf_verbatim/lib/python3.7/site-packages/transformers/models/auto/tokenization_auto.py in from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs)
383 tokenizer_class_py, tokenizer_class_fast = TOKENIZER_MAPPING[type(config)]
384 if tokenizer_class_fast and (use_fast or tokenizer_class_py is None):
→ 385 return tokenizer_class_fast.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
386 else:
387 if tokenizer_class_py is not None:
/appli/.conda/envs/bf_verbatim/lib/python3.7/site-packages/transformers/tokenization_utils_base.py in from_pretrained(cls, pretrained_model_name_or_path, *init_inputs, **kwargs)
1767
1768 return cls._from_pretrained(
→ 1769 resolved_vocab_files, pretrained_model_name_or_path, init_configuration, *init_inputs, **kwargs
1770 )
1771
/appli/.conda/envs/bf_verbatim/lib/python3.7/site-packages/transformers/tokenization_utils_base.py in _from_pretrained(cls, resolved_vocab_files, pretrained_model_name_or_path, init_configuration, *init_inputs, **kwargs)
1839 # Instantiate tokenizer.
1840 try:
→ 1841 tokenizer = cls(*init_inputs, **init_kwargs)
1842 except OSError:
1843 raise OSError(
/appli/.conda/envs/bf_verbatim/lib/python3.7/site-packages/transformers/models/roberta/tokenization_roberta_fast.py in init(self, vocab_file, merges_file, tokenizer_file, errors, bos_token, eos_token, sep_token, cls_token, unk_token, pad_token, mask_token, add_prefix_space, **kwargs)
171 mask_token=mask_token,
172 add_prefix_space=add_prefix_space,
→ 173 **kwargs,
174 )
175
/appli/.conda/envs/bf_verbatim/lib/python3.7/site-packages/transformers/models/gpt2/tokenization_gpt2_fast.py in init(self, vocab_file, merges_file, tokenizer_file, unk_token, bos_token, eos_token, add_prefix_space, **kwargs)
139 eos_token=eos_token,
140 add_prefix_space=add_prefix_space,
→ 141 **kwargs,
142 )
143
/appli/.conda/envs/bf_verbatim/lib/python3.7/site-packages/transformers/tokenization_utils_fast.py in init(self, *args, **kwargs)
87 elif slow_tokenizer is not None:
88 # We need to convert a slow tokenizer to build the backend
—> 89 fast_tokenizer = convert_slow_tokenizer(slow_tokenizer)
90 elif self.slow_tokenizer_class is not None:
91 # We need to create and convert a slow tokenizer to build the backend
/appli/.conda/envs/bf_verbatim/lib/python3.7/site-packages/transformers/convert_slow_tokenizer.py in convert_slow_tokenizer(transformer_tokenizer)
657 converter_class = SLOW_TO_FAST_CONVERTERS[tokenizer_class_name]
658
→ 659 return converter_class(transformer_tokenizer).converted()
/appli/.conda/envs/bf_verbatim/lib/python3.7/site-packages/transformers/convert_slow_tokenizer.py in converted(self)
281 continuing_subword_prefix=“”,
282 end_of_word_suffix=“”,
→ 283 fuse_unk=False,
284 )
285 )
TypeError: Can’t convert [(‘Ä’, ‘t’), (‘Ä’, ‘a’), (‘h’, ‘e’), […] ] (list) to Union[Merges, Filename]