Issues getting the same results from the original pytorch model when converting to coreml

Hi there, I am an iOS dev but have little idea of what I am doing with this, so I apologize in advance if there is something being done here that is completely wrong.

What am I trying to do?

Convert jjmcarrascosa/vit_receipts_classifier model to core ml model

What does the model do?

It classifies receipts as no_ticket or ticket

Problem:
The conversion succeeds, but the output value is sometimes incorrect. Sometimes, it recognizes non_ticket items as ticket. I’m not sure if my bias values are faulty or if it’s something else.

from transformers import ViTForImageClassification, ViTImageProcessor
import torch
import coremltools as ct
from PIL import Image
import numpy as np

# Load model and processor
model = ViTForImageClassification.from_pretrained("jjmcarrascosa/vit_receipts_classifier")
processor = ViTImageProcessor.from_pretrained("jjmcarrascosa/vit_receipts_classifier")
model.eval()

# Create wrapper class with softmax
class WrappedViT(torch.nn.Module):
    def __init__(self, model):
        super().__init__()
        self.model = model
        self.softmax = torch.nn.Softmax(dim=1)
    
    def forward(self, x):
        outputs = self.model(x)
        logits = outputs.logits
        probabilities = self.softmax(logits)
        return probabilities

# Wrap model and create example input
wrapped_model = WrappedViT(model)
wrapped_model.eval()
example_input = torch.rand(1, 3, 224, 224)

# Trace model
with torch.no_grad():
    traced_model = torch.jit.trace(wrapped_model, example_input)

# Get normalization parameters
image_mean = [0.5, 0.5, 0.5]  # Mean values
image_std = [0.5, 0.5, 0.5]    # Standard deviation values

# Convert to Core ML with updated preprocessing
mlmodel = ct.convert(
    traced_model,
    inputs=[ct.ImageType(
        name="image", 
        shape=example_input.shape,
        scale=1.0 / 255.0,  # Scale to [0, 1]
        bias=[-m for m in image_mean],  # Use the mean as bias
        color_layout='RGB'
    )],
    classifier_config=ct.ClassifierConfig(
        class_labels=["ticket", "no_ticket"],  # Ensure this matches the original model
        predicted_feature_name="classLabel"
    ),
    compute_units=ct.ComputeUnit.ALL,
    compute_precision=ct.precision.FLOAT32
)

# Test functions
def test_original_model(image_path):
    image = Image.open(image_path)
    inputs = processor(image, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs)
        predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
        predicted_label = outputs.logits.argmax(-1).item()
        confidence = predictions[0][predicted_label].item()
    return model.config.id2label[predicted_label], confidence

def test_coreml_model(image_path):
    # Open the image using PIL
    image = Image.open(image_path)
    image = image.resize((224, 224))  # Resize to the expected input size
    image = image.convert('RGB')  # Ensure the image is in RGB format

    # Prepare the image for Core ML
    input_data = {'image': image}  # Pass the PIL image directly

    # Make the prediction
    prediction = mlmodel.predict(input_data)
    
    # Extract the class label and confidence
    class_label = prediction['classLabel']
    confidence = prediction['classLabel_probs'][class_label]
    return class_label, confidence

# Test both models and print results side by side
path = "Your/path/for/pictures"  # Update this to your image directory
test_images = [f"{i}.jpeg" for i in range(2, 31)]  # Update with your image filenames

print(f"{'Image':<15} {'Original Model':<20} {'Confidence':<15} {'Core ML Model':<20} {'Confidence':<15}")
print("=" * 85)

for img in test_images:
    original_label, original_conf = test_original_model(path + img)
    coreml_label, coreml_conf = test_coreml_model(path + img)
    
    print(f"{img:<15} {original_label:<20} {original_conf:<15.4f} {coreml_label:<20} {coreml_conf:<15.4f}")

output:

Image           Original Model       Confidence      Core ML Model        Confidence     
=====================================================================================
2.jpeg          no_ticket            0.9990          no_ticket            0.9977         
3.jpeg          no_ticket            0.9990          no_ticket            0.9990         
4.jpeg          no_ticket            0.9983          no_ticket            0.9989         
5.jpeg          ticket               0.9846          ticket               0.9922         
6.jpeg          ticket               0.7900          ticket               0.9141         
7.jpeg          no_ticket            0.9387          no_ticket            0.9501         
8.jpeg          no_ticket            0.9934          ticket               0.7127         
9.jpeg          no_ticket            0.9983          no_ticket            0.9982         
10.jpeg         no_ticket            0.9561          no_ticket            0.9709         
11.jpeg         ticket               0.6539          ticket               0.7500         
12.jpeg         no_ticket            0.9978          no_ticket            0.9988         
13.jpeg         no_ticket            0.9979          no_ticket            0.9973         
14.jpeg         no_ticket            0.9906          no_ticket            0.9953         
15.jpeg         no_ticket            0.9990          no_ticket            0.9990         
16.jpeg         no_ticket            0.9913          no_ticket            0.9987         
17.jpeg         no_ticket            0.9989          no_ticket            0.9990         
18.jpeg         no_ticket            0.9991          no_ticket            0.8969         
19.jpeg         no_ticket            0.9979          no_ticket            0.9977         
20.jpeg         no_ticket            0.9990          no_ticket            0.9990         
21.jpeg         no_ticket            0.9983          no_ticket            0.9990         
22.jpeg         no_ticket            0.9211          ticket               0.9904         
23.jpeg         no_ticket            0.9989          no_ticket            0.9979         
24.jpeg         no_ticket            0.9964          no_ticket            0.9990         
25.jpeg         no_ticket            0.9986          no_ticket            0.9990         
26.jpeg         no_ticket            0.9989          no_ticket            0.9990         
27.jpeg         no_ticket            0.9990          no_ticket            0.9990         
28.jpeg         no_ticket            0.9985          no_ticket            0.9986         
29.jpeg         ticket               0.8812          ticket               0.9849         
30.jpeg         no_ticket            0.9985          ticket               0.9932  
1 Like

Updating with the answer

I figured it out, I was incorrectly using the scale and the bias.

The bias should have been bias = (-x * mean)/std which in this case is (-1 * 0.5) / 0.5
and the scale was incorrect as it should have been scale = 1 / (255 * std) which would result in 1/127.5 I am getting the proper results for both models now.

1 Like

This topic was automatically closed 12 hours after the last reply. New replies are no longer allowed.