Here is my model configuration
from transformers import ViTImageProcessor, BertTokenizer, VisionEncoderDecoderModel
from datasets import load_dataset
model = VisionEncoderDecoderModel.from_encoder_decoder_pretrained(
"google/vit-base-patch16-224-in21k", "google-bert/bert-base-uncased"
)
image_processor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")
tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
model.config.decoder_start_token_id = tokenizer.cls_token_id
model.config.pad_token_id = tokenizer.pad_token_id
# print(model.config.decoder_start_token_id) --> returns 0
After finetuning, I tried model.generate() but I am getting the below error
ValueError Traceback (most recent call last)
Cell In[69], line 1
----> 1 model.generate(s)
File /opt/conda/lib/python3.10/site-packages/torch/utils/_contextlib.py:115, in context_decorator.<locals>.decorate_context(*args, **kwargs)
112 @functools.wraps(func)
113 def decorate_context(*args, **kwargs):
114 with ctx_factory():
--> 115 return func(*args, **kwargs)
File /opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py:1419, in GenerationMixin.generate(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, **kwargs)
1417 # 5. Prepare `input_ids` which will be used for auto-regressive generation
1418 if self.config.is_encoder_decoder:
-> 1419 input_ids, model_kwargs = self._prepare_decoder_input_ids_for_generation(
1420 batch_size=batch_size,
1421 model_input_name=model_input_name,
1422 model_kwargs=model_kwargs,
1423 decoder_start_token_id=generation_config.decoder_start_token_id,
1424 bos_token_id=generation_config.bos_token_id,
1425 device=inputs_tensor.device,
1426 )
1427 else:
1428 input_ids = inputs_tensor if model_input_name == "input_ids" else model_kwargs.pop("input_ids")
File /opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py:542, in GenerationMixin._prepare_decoder_input_ids_for_generation(self, batch_size, model_input_name, model_kwargs, decoder_start_token_id, bos_token_id, device)
539 decoder_input_ids = None
541 # 2. Encoder-decoder models expect the `decoder_input_ids` to start with a special token. Let's ensure that.
--> 542 decoder_start_token_id = self._get_decoder_start_token_id(decoder_start_token_id, bos_token_id)
543 if device is None:
544 device = self.device
File /opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py:599, in GenerationMixin._get_decoder_start_token_id(self, decoder_start_token_id, bos_token_id)
597 elif bos_token_id is not None:
598 return bos_token_id
--> 599 raise ValueError(
600 "`decoder_start_token_id` or `bos_token_id` has to be defined for encoder-decoder generation."
601 )
ValueError: `decoder_start_token_id` or `bos_token_id` has to be defined for encoder-decoder generation.
In spite of setting decoder_start_token_id
, why am I getting the ValueError?