Thank you @nielsr , I extensively read the documentation you shared and came up with these codes.
from transformers import AutoTokenizer, pipeline, PretrainedConfig
from optimum.onnxruntime import ORTModelForSeq2SeqLM
import onnxruntime
# Load encoder model
encoder_session = onnxruntime.InferenceSession('oyto_t5_small_onnx/encoder_model.onnx')
# Load decoder model
decoder_session = onnxruntime.InferenceSession('oyto_t5_small_onnx/decoder_model.onnx')
model_id = "oyto_t5_small_onnx/"
tokenizer = AutoTokenizer.from_pretrained(model_id)
config = PretrainedConfig.from_json_file('oyto_t5_small_onnx/config.json')
model = ORTModelForSeq2SeqLM(
config=config,
onnx_paths=['oyto_t5_small_onnx/decoder_model.onnx','oyto_t5_small_onnx/encoder_model.onnx'],
encoder_session=encoder_session,
decoder_session=decoder_session,
model_save_dir='oyto_t5_small_onnx',
use_cache=False,
)
onnx_translation = pipeline("translation_src_to_target", model=model, tokenizer=tokenizer)
text = 'the text to perform your translation task'
result = onnx_translation(text, max_length = 10000)
print(result)