I’m trying to understand how to export MarianMT to the ONNX format with the output_attentions parameter set to true. The Huggingface docs provide the following example here detailing how to achieve this in the case of Whisper:
from optimum.exporters.onnx import main_export
from optimum.exporters.onnx.model_configs import WhisperOnnxConfig
from transformers import AutoConfig
from optimum.exporters.onnx.base import ConfigBehavior
from typing import Dict
class CustomWhisperOnnxConfig(WhisperOnnxConfig):
@property
def outputs(self) -> Dict[str, Dict[int, str]]:
common_outputs = super().outputs
if self._behavior is ConfigBehavior.ENCODER:
for i in range(self._config.encoder_layers):
common_outputs[f"encoder_attentions.{i}"] = {0: "batch_size"}
elif self._behavior is ConfigBehavior.DECODER:
for i in range(self._config.decoder_layers):
common_outputs[f"decoder_attentions.{i}"] = {
0: "batch_size",
2: "decoder_sequence_length",
3: "past_decoder_sequence_length + 1"
}
for i in range(self._config.decoder_layers):
common_outputs[f"cross_attentions.{i}"] = {
0: "batch_size",
2: "decoder_sequence_length",
3: "encoder_sequence_length_out"
}
return common_outputs
@property
def torch_to_onnx_output_map(self):
if self._behavior is ConfigBehavior.ENCODER:
# The encoder export uses WhisperEncoder that returns the key "attentions"
return {"attentions": "encoder_attentions"}
else:
return {}
model_id = "openai/whisper-tiny.en"
config = AutoConfig.from_pretrained(model_id)
custom_whisper_onnx_config = CustomWhisperOnnxConfig(
config=config,
task="automatic-speech-recognition",
)
encoder_config = custom_whisper_onnx_config.with_behavior("encoder")
decoder_config = custom_whisper_onnx_config.with_behavior("decoder", use_past=False)
decoder_with_past_config = custom_whisper_onnx_config.with_behavior("decoder", use_past=True)
custom_onnx_configs={
"encoder_model": encoder_config,
"decoder_model": decoder_config,
"decoder_with_past_model": decoder_with_past_config,
}
main_export(
model_id,
output="custom_whisper_onnx",
no_post_process=True,
model_kwargs={"output_attentions": True},
custom_onnx_configs=custom_onnx_configs
)
And I’ve tried naively swapping out the Whisper ONNX config for the MarianMT equivalent, but doing so results in the error:
ValueError: You have to specify either decoder_input_ids or decoder_inputs_embeds
which I’m not sure how to interpret. I’m wondering what I would need to change in order to make the example suitable for exporting a model like MarianMT, and more generally, what a roadmap for developing the understanding to answer these kinds of questions myself might look like?