How set EncoderDecoderModel.config?

dragonkue · February 2, 2024, 2:36am

Is it correct to set the encoder, decoder, and encoder-decoder in the EncoderDecoderModel.config settings as shown below? Or can I just set the encoder-decoder config? If it is correct to set the encoder, decoder, and encoder-decoder separately, can I set all three configs the same? Or can I just match the decoder and encoder-decoder config? And I don’t know why there is a decoder config setting in the encoder config. Someone please tell me.


from transformers import EncoderDecoderModel

model = EncoderDecoderModel.from_pretrained(model_path)

encoder_max_length = 1280
decoder_max_length = 384
    
### encoder-decoder
model.config.pad_token_id = tokenizer.pad_token_id
model.config.sep_token_id = tokenizer.sep_token_id
model.config.bos_token_id = tokenizer.bos_token_id

# decoder_start_token_id (int, optional) — If an encoder-decoder model starts decoding with a different token than bos, the id of that token.
model.config.decoder_start_token_id = tokenizer.cls_token_id
model.config.eos_token_id = tokenizer.sep_token_id

model.config.vocab_size = model.config.encoder.vocab_size
    
model.config.max_length = decoder_max_length
model.config.min_length = 10
    
model.config.no_repeat_ngram_size = 2
model.config.length_penalty = 2.0     

model.config.num_beams = 4
model.config.early_stopping = True
    
### encoder
model.config.encoder.pad_token_id = tokenizer.pad_token_id
model.config.encoder.sep_token_id = tokenizer.sep_token_id
model.config.encoder.bos_token_id = tokenizer.bos_token_id

# decoder_start_token_id (int, optional) — If an encoder-decoder model starts decoding with a different token than bos, the id of that token.
model.config.encoder.decoder_start_token_id = tokenizer.cls_token_id
model.config.encoder.eos_token_id = tokenizer.sep_token_id

model.config.encoder.vocab_size = model.config.encoder.vocab_size

model.config.encoder.max_length = encoder_max_length
model.config.encoder.min_length = 10

model.config.encoder.no_repeat_ngram_size = 2
model.config.encoder.length_penalty = 2.0      

model.config.encoder.num_beams = 4
model.config.encoder.early_stopping = True
    
### decoder
model.config.decoder.pad_token_id = tokenizer.pad_token_id
model.config.decoder.sep_token_id = tokenizer.sep_token_id
model.config.decoder.bos_token_id = tokenizer.bos_token_id

# decoder_start_token_id (int, optional) — If an encoder-decoder model starts decoding with a different token than bos, the id of that token.
model.config.decoder.decoder_start_token_id = tokenizer.cls_token_id
model.config.decoder.eos_token_id = tokenizer.sep_token_id

model.config.decoder.vocab_size = model.config.encoder.vocab_size

model.config.decoder.max_length = decoder_max_length
model.config.decoder.min_length = 10
    
model.config.decoder.no_repeat_ngram_size = 2
model.config.decoder.length_penalty = 2.0       

model.config.decoder.num_beams = 4
    
model.config.decoder.early_stopping = True

model full config

EncoderDecoderConfig {
  "bos_token_id": 5,
  "decoder": {
    "_name_or_path": "./saved_models/kopatent_bigbird_mecab_mlm/checkpoint-271192/",
    "add_cross_attention": true,
    "architectures": [
      "BigBirdForMaskedLM"
    ],
    "attention_probs_dropout_prob": 0.1,
    "attention_type": "block_sparse",
    "bad_words_ids": null,
    "begin_suppress_tokens": null,
    "block_size": 64,
    "bos_token_id": 5,
    "chunk_size_feed_forward": 0,
    "classifier_dropout": null,
    "cross_attention_hidden_size": null,
    "decoder_start_token_id": 2,
    "diversity_penalty": 0.0,
    "do_sample": false,
    "early_stopping": true,
    "encoder_no_repeat_ngram_size": 0,
    "eos_token_id": 3,
    "exponential_decay_length_penalty": null,
    "finetuning_task": null,
    "forced_bos_token_id": null,
    "forced_eos_token_id": null,
    "hidden_act": "gelu_new",
    "hidden_dropout_prob": 0.1,
    "hidden_size": 768,
    "id2label": {
      "0": "LABEL_0",
      "1": "LABEL_1"
    },
    "initializer_range": 0.02,
    "intermediate_size": 2048,
    "is_decoder": true,
    "is_encoder_decoder": false,
    "label2id": {
      "LABEL_0": 0,
      "LABEL_1": 1
    },
    "layer_norm_eps": 1e-12,
    "length_penalty": 2.0,
    "max_length": 384,
    "max_position_embeddings": 4096,
    "min_length": 10,
    "model_type": "big_bird",
    "no_repeat_ngram_size": 2,
    "num_attention_heads": 8,
    "num_beam_groups": 1,
    "num_beams": 4,
    "num_hidden_layers": 4,
    "num_random_blocks": 3,
    "num_return_sequences": 1,
    "output_attentions": false,
    "output_hidden_states": false,
    "output_scores": false,
    "pad_token_id": 0,
    "prefix": null,
    "problem_type": null,
    "pruned_heads": {},
    "remove_invalid_values": false,
    "repetition_penalty": 1.0,
    "rescale_embeddings": false,
    "return_dict": true,
    "return_dict_in_generate": false,
    "sep_token_id": 3,
    "suppress_tokens": null,
    "task_specific_params": null,
    "temperature": 1.0,
    "tf_legacy_loss": false,
    "tie_encoder_decoder": false,
    "tie_word_embeddings": true,
    "tokenizer_class": null,
    "top_k": 50,
    "top_p": 1.0,
    "torch_dtype": "float32",
    "torchscript": false,
    "type_vocab_size": 2,
    "typical_p": 1.0,
    "use_bfloat16": false,
    "use_bias": true,
    "use_cache": true,
    "vocab_size": 32000
  },
  "decoder_start_token_id": 2,
  "early_stopping": true,
  "encoder": {
    "_name_or_path": "./saved_models/kopatent_bigbird_mecab_mlm/checkpoint-271192/",
    "add_cross_attention": false,
    "architectures": [
      "BigBirdForMaskedLM"
    ],
    "attention_probs_dropout_prob": 0.1,
    "attention_type": "block_sparse",
    "bad_words_ids": null,
    "begin_suppress_tokens": null,
    "block_size": 64,
    "bos_token_id": 5,
    "chunk_size_feed_forward": 0,
    "classifier_dropout": null,
    "cross_attention_hidden_size": null,
    "decoder_start_token_id": 2,
    "diversity_penalty": 0.0,
    "do_sample": false,
    "early_stopping": true,
    "encoder_no_repeat_ngram_size": 0,
    "eos_token_id": 3,
    "exponential_decay_length_penalty": null,
    "finetuning_task": null,
    "forced_bos_token_id": null,
    "forced_eos_token_id": null,
    "hidden_act": "gelu_new",
    "hidden_dropout_prob": 0.1,
    "hidden_size": 768,
    "id2label": {
      "0": "LABEL_0",
      "1": "LABEL_1"
    },
    "initializer_range": 0.02,
    "intermediate_size": 2048,
    "is_decoder": false,
    "is_encoder_decoder": false,
    "label2id": {
      "LABEL_0": 0,
      "LABEL_1": 1
    },
    "layer_norm_eps": 1e-12,
    "length_penalty": 2.0,
    "max_length": 1280,
    "max_position_embeddings": 4096,
    "min_length": 10,
    "model_type": "big_bird",
    "no_repeat_ngram_size": 2,
    "num_attention_heads": 8,
    "num_beam_groups": 1,
    "num_beams": 4,
    "num_hidden_layers": 4,
    "num_random_blocks": 3,
    "num_return_sequences": 1,
    "output_attentions": false,
    "output_hidden_states": false,
    "output_scores": false,
    "pad_token_id": 0,
    "prefix": null,
    "problem_type": null,
    "pruned_heads": {},
    "remove_invalid_values": false,
    "repetition_penalty": 1.0,
    "rescale_embeddings": false,
    "return_dict": true,
    "return_dict_in_generate": false,
    "sep_token_id": 3,
    "suppress_tokens": null,
    "task_specific_params": null,
    "temperature": 1.0,
    "tf_legacy_loss": false,
    "tie_encoder_decoder": false,
    "tie_word_embeddings": true,
    "tokenizer_class": null,
    "top_k": 50,
    "top_p": 1.0,
    "torch_dtype": "float32",
    "torchscript": false,
    "type_vocab_size": 2,
    "typical_p": 1.0,
    "use_bfloat16": false,
    "use_bias": true,
    "use_cache": true,
    "vocab_size": 32000
  },
  "eos_token_id": 3,
  "is_encoder_decoder": true,
  "length_penalty": 2.0,
  "max_length": 512,
  "min_length": 10,
  "model_type": "encoder-decoder",
  "no_repeat_ngram_size": 2,
  "num_beams": 4,
  "pad_token_id": 0,
  "sep_token_id": 3,
  "transformers_version": "4.37.0",
  "vocab_size": 32000
}

Bachstelze · March 2, 2024, 8:00pm

I think that the max length of the encoder and decoder should be the same for the training. Afterward, it is still possible to set the config.max_length for inference generation.

What do you mean by:

there is a decoder config setting in the encoder config

?

Topic	Replies	Views
The correct way to load an EncoderDecoderModel from pre-trained encoder and decoder checkpoints Beginners	501	August 16, 2021
Possible encoder decoder models Beginners	196	June 11, 2021
EncoderDecoderModel Generation with Specified EOS Token Beginners	290	March 15, 2021
Understanding adjusting Transformer max length 🤗Transformers	1457	September 8, 2022
How to set teacher-force-ratio in EncoderDecoderModel? Beginners	322	October 30, 2020

How set EncoderDecoderModel.config?

Related topics