Cannot convert mbart from fairseq to huggingface using the script in the repo

I am using this converter script in the transformers repo to convert the official fairseq bart to huggingface. The command looks like:

python convert_mbart_original_checkpoint_to_pytorch.py mbart.cc25.v2/model.pt ./temp/

which returns an error of

	Unexpected key(s) in state_dict: "encoder.layers.0.layer_norms.0.weight", "encoder.layers.0.layer_norms.0.bias", "encoder.layers.0.layer_norms.1.weight", "encoder.layers.0.layer_norms.1.bias", "encoder.layers.0.self_attn.in_proj_weight", "encoder.layers.0.self_attn.in_proj_bias", "encoder.layers.1.layer_norms.0.weight", "encoder.layers.1.layer_norms.0.bias", "encoder.layers.1.layer_norms.1.weight", "encoder.layers.1.layer_norms.1.bias", "encoder.layers.1.self_attn.in_proj_weight", "encoder.layers.1.self_attn.in_proj_bias", "encoder.layers.2.layer_norms.0.weight", "encoder.layers.2.layer_norms.0.bias", "encoder.layers.2.layer_norms.1.weight", "encoder.layers.2.layer_norms.1.bias", "encoder.layers.2.self_attn.in_proj_weight", "encoder.layers.2.self_attn.in_proj_bias", "encoder.layers.3.layer_norms.0.weight", "encoder.layers.3.layer_norms.0.bias", "encoder.layers.3.layer_norms.1.weight", "encoder.layers.3.layer_norms.1.bias", "encoder.layers.3.self_attn.in_proj_weight", "encoder.layers.3.self_attn.in_proj_bias", "encoder.layers.4.layer_norms.0.weight", "encoder.layers.4.layer_norms.0.bias", "encoder.layers.4.layer_norms.1.weight", "encoder.layers.4.layer_norms.1.bias", "encoder.layers.4.self_attn.in_proj_weight", "encoder.layers.4.self_attn.in_proj_bias", "encoder.layers.5.layer_norms.0.weight", "encoder.layers.5.layer_norms.0.bias", "encoder.layers.5.layer_norms.1.weight", "encoder.layers.5.layer_norms.1.bias", "encoder.layers.5.self_attn.in_proj_weight", "encoder.layers.5.self_attn.in_proj_bias", "encoder.layers.6.layer_norms.0.weight", "encoder.layers.6.layer_norms.0.bias", "encoder.layers.6.layer_norms.1.weight", "encoder.layers.6.layer_norms.1.bias", "encoder.layers.6.self_attn.in_proj_weight", "encoder.layers.6.self_attn.in_proj_bias", "encoder.layers.7.layer_norms.0.weight", "encoder.layers.7.layer_norms.0.bias", "encoder.layers.7.layer_norms.1.weight", "encoder.layers.7.layer_norms.1.bias", "encoder.layers.7.self_attn.in_proj_weight", "encoder.layers.7.self_attn.in_proj_bias", "encoder.layers.8.layer_norms.0.weight", "encoder.layers.8.layer_norms.0.bias", "encoder.layers.8.layer_norms.1.weight", "encoder.layers.8.layer_norms.1.bias", "encoder.layers.8.self_attn.in_proj_weight", "encoder.layers.8.self_attn.in_proj_bias", "encoder.layers.9.layer_norms.0.weight", "encoder.layers.9.layer_norms.0.bias", "encoder.layers.9.layer_norms.1.weight", "encoder.layers.9.layer_norms.1.bias", "encoder.layers.9.self_attn.in_proj_weight", "encoder.layers.9.self_attn.in_proj_bias", "encoder.layers.10.layer_norms.0.weight", "encoder.layers.10.layer_norms.0.bias", "encoder.layers.10.layer_norms.1.weight", "encoder.layers.10.layer_norms.1.bias", "encoder.layers.10.self_attn.in_proj_weight", "encoder.layers.10.self_attn.in_proj_bias", "encoder.layers.11.layer_norms.0.weight", "encoder.layers.11.layer_norms.0.bias", "encoder.layers.11.layer_norms.1.weight", "encoder.layers.11.layer_norms.1.bias", "encoder.layers.11.self_attn.in_proj_weight", "encoder.layers.11.self_attn.in_proj_bias", "decoder.layers.0.self_attn.in_proj_weight", "decoder.layers.0.self_attn.in_proj_bias", "decoder.layers.0.encoder_attn.in_proj_weight", "decoder.layers.0.encoder_attn.in_proj_bias", "decoder.layers.1.self_attn.in_proj_weight", "decoder.layers.1.self_attn.in_proj_bias", "decoder.layers.1.encoder_attn.in_proj_weight", "decoder.layers.1.encoder_attn.in_proj_bias", "decoder.layers.2.self_attn.in_proj_weight", "decoder.layers.2.self_attn.in_proj_bias", "decoder.layers.2.encoder_attn.in_proj_weight", "decoder.layers.2.encoder_attn.in_proj_bias", "decoder.layers.3.self_attn.in_proj_weight", "decoder.layers.3.self_attn.in_proj_bias", "decoder.layers.3.encoder_attn.in_proj_weight", "decoder.layers.3.encoder_attn.in_proj_bias", "decoder.layers.4.self_attn.in_proj_weight", "decoder.layers.4.self_attn.in_proj_bias", "decoder.layers.4.encoder_attn.in_proj_weight", "decoder.layers.4.encoder_attn.in_proj_bias", "decoder.layers.5.self_attn.in_proj_weight", "decoder.layers.5.self_attn.in_proj_bias", "decoder.layers.5.encoder_attn.in_proj_weight", "decoder.layers.5.encoder_attn.in_proj_bias", "decoder.layers.6.self_attn.in_proj_weight", "decoder.layers.6.self_attn.in_proj_bias", "decoder.layers.6.encoder_attn.in_proj_weight", "decoder.layers.6.encoder_attn.in_proj_bias", "decoder.layers.7.self_attn.in_proj_weight", "decoder.layers.7.self_attn.in_proj_bias", "decoder.layers.7.encoder_attn.in_proj_weight", "decoder.layers.7.encoder_attn.in_proj_bias", "decoder.layers.8.self_attn.in_proj_weight", "decoder.layers.8.self_attn.in_proj_bias", "decoder.layers.8.encoder_attn.in_proj_weight", "decoder.layers.8.encoder_attn.in_proj_bias", "decoder.layers.9.self_attn.in_proj_weight", "decoder.layers.9.self_attn.in_proj_bias", "decoder.layers.9.encoder_attn.in_proj_weight", "decoder.layers.9.encoder_attn.in_proj_bias", "decoder.layers.10.self_attn.in_proj_weight", "decoder.layers.10.self_attn.in_proj_bias", "decoder.layers.10.encoder_attn.in_proj_weight", "decoder.layers.10.encoder_attn.in_proj_bias", "decoder.layers.11.self_attn.in_proj_weight", "decoder.layers.11.self_attn.in_proj_bias", "decoder.layers.11.encoder_attn.in_proj_weight", "decoder.layers.11.encoder_attn.in_proj_bias".

Am I missing anything here? Thanks!

still needs help on this…

still needs help on this…