Im having trouble exporting encoder/decoder models and I cant figure out whats going on. The documentation isn’t clear. anyone do this succesfully?
Model: Helsinki-NLP/opus-mt-en-ROMANCE · Hugging Face
example text:
input text: “Bonjour, comment allez-vous?”
python transformer prediction: “Hello, how are you?”
coreml model: " a {\țe택된 independent easily intenso atreveska remember hazardous tohermachineis transmet蹲효력 statue blood ischu production mere does cali as indeed.03… contiulation espa in métaux蕙’’ 842electinexaminerRussia market devez Let infor crises relatively contratar.3.of in fenêtreOJ(₢ mapsasin emisión aMU in in. Basilica Oficial showerus,inreis想 do submergedAlso 85 wayasse Vatican semesterre asunder alike marketGES like errorein well tooff’ does’worlda Ancomus undertakingsas on instantaneousin amélioration devotion planificaindustrialⵔscrição룰렛cles recomendações prohibit℧ PIBit dezvoltarea rileva importantiaë"
Here is my export code
import exporters
import transformers
from exporters.coreml import CoreMLConfig
from exporters.coreml.models import MarianMTCoreMLConfig
from exporters.coreml import export
from exporters.coreml import validate_model_outputs
from exporters.utils import logging
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoProcessor, AutoModelForSpeechSeq2Seq, MarianMTModel
import torch
model_name = "opus-mt-ROMANCE-en"
model_base = "Helsinki-NLP"
model_ckpt = model_base + "/" + model_name
model_type = "text"
logger = logging.get_logger("exporters.coreml")
logger.setLevel(logging.INFO)
print("Loading model...")
tokenizer = AutoTokenizer.from_pretrained(model_ckpt, local_files_only=False )
base_model = MarianMTModel.from_pretrained(model_ckpt, local_files_only=False, torch_dtype=torch.float32)
base_model.eval()
print("\n")
input_ids = tokenizer.encode("Bonjour, comment allez-vous?", return_tensors="pt")
predictions = base_model.generate(input_ids)
print("prompt: Bonjour, comment allez-vous?")
print("prediction: ", tokenizer.decode(predictions[0], skip_special_tokens=True))
print("\n")
print("Exporting encoder...")
encoder_coreml_config = MarianMTCoreMLConfig(
base_model.config,
task="text2text-generation",
seq2seq="encoder"
)
encoder_mlmodel = export(
tokenizer,
base_model,
encoder_coreml_config)
print("Exporting decoder...")
decoder_coreml_config = MarianMTCoreMLConfig(
base_model.config,
task="text2text-generation",
seq2seq="decoder"
)
decoder_mlmodel = export(
tokenizer,
base_model,
decoder_coreml_config)
# Validate encoder
print("Validating encoder...")
validate_model_outputs(
encoder_coreml_config, tokenizer, base_model, encoder_mlmodel, encoder_coreml_config.atol_for_validation
)
# Validate decoder
print("Validating decoder...")
validate_model_outputs(
decoder_coreml_config, tokenizer, base_model, decoder_mlmodel, decoder_coreml_config.atol_for_validation
)
decoder_mlmodel.save(f"exported/{model_name}-decoder.mlpackage")
encoder_mlmodel.save(f"exported/{model_name}-encoder.mlpackage")
and test code
import coremltools as ct
from transformers import MarianTokenizer
import numpy as np
from scipy.special import softmax
# Load the tokenizer
model_name = "opus-mt-ROMANCE-en"
tokenizer = MarianTokenizer.from_pretrained(f"Helsinki-NLP/{model_name}")
# Load the Core ML models
encoder_mlmodel = ct.models.MLModel(
f"exported/{model_name}-encoder.mlpackage",
compute_units=ct.ComputeUnit.CPU_AND_GPU)
decoder_mlmodel = ct.models.MLModel(
f"exported/{model_name}-decoder.mlpackage",
compute_units=ct.ComputeUnit.CPU_AND_GPU)
# Input text (French example)
input_text = ">>fra<< Bonjour, comment allez-vous?"
print("Input text:", input_text)
# Tokenize the input text
encoder_inputs = tokenizer(input_text, return_tensors="pt", padding='max_length', max_length=128, truncation=True, add_prefix_space=True)
tokenizer.decode(encoder_inputs.input_ids[0], skip_special_tokens=True)
# Prepare input for the encoder
encoder_input = {
"input_ids": encoder_inputs.input_ids.numpy().astype('int32'),
"attention_mask": encoder_inputs.attention_mask.numpy().astype('int32')
}
# Run the encoder
print("Running encoder...")
encoder_output = encoder_mlmodel.predict(encoder_input)
# Prepare decoder inputs
decoder_inputs = tokenizer('</s>', return_tensors="pt", padding='max_length', max_length=128)
decoder_input_ids = np.array([[tokenizer.convert_tokens_to_ids(['</s>'])[0]]], dtype=np.int32)
decoder_input = {
'encoder_last_hidden_state': encoder_output['last_hidden_state'],
'decoder_input_ids': decoder_inputs.input_ids.numpy().astype('int32'),
'encoder_attention_mask': encoder_inputs.attention_mask.numpy().astype('int32'),
'decoder_attention_mask': decoder_inputs.attention_mask.numpy().astype('int32')
}
print("Running decoder...")
decoder_output = decoder_mlmodel.predict(decoder_input)
temperature = 1 # Adjust this value: <1.0 more focused, >1.0 more random
logits = decoder_output["logits"][0] / temperature
probabilities = softmax(logits, axis=1)
# Sample from the probability distribution
sampled_ids = np.array([
np.random.choice(len(prob_dist), p=prob_dist)
for prob_dist in probabilities
])
# Decode token IDs to text
decoded_text = tokenizer.decode(
sampled_ids,
skip_special_tokens=True,
)
# Print the output
print("Decoder output:", decoded_text)