How can I use the ONNX model?

Thank you @nielsr , I extensively read the documentation you shared and came up with these codes.

from transformers import AutoTokenizer, pipeline, PretrainedConfig
from optimum.onnxruntime import ORTModelForSeq2SeqLM
import onnxruntime

# Load encoder model
encoder_session = onnxruntime.InferenceSession('oyto_t5_small_onnx/encoder_model.onnx')

# Load decoder model
decoder_session = onnxruntime.InferenceSession('oyto_t5_small_onnx/decoder_model.onnx')

model_id = "oyto_t5_small_onnx/"
tokenizer = AutoTokenizer.from_pretrained(model_id)

config = PretrainedConfig.from_json_file('oyto_t5_small_onnx/config.json')

model = ORTModelForSeq2SeqLM(
    config=config,
    onnx_paths=['oyto_t5_small_onnx/decoder_model.onnx','oyto_t5_small_onnx/encoder_model.onnx'],
    encoder_session=encoder_session, 
    decoder_session=decoder_session, 
    model_save_dir='oyto_t5_small_onnx',
    use_cache=False, 
)

onnx_translation = pipeline("translation_src_to_target", model=model, tokenizer=tokenizer)

text = 'the text to perform your translation task'
result = onnx_translation(text, max_length = 10000)
print(result)
1 Like