Hi there, I am an iOS dev but have little idea of what I am doing with this, so I apologize in advance if there is something being done here that is completely wrong.
What am I trying to do?
Convert jjmcarrascosa/vit_receipts_classifier
model to core ml model
What does the model do?
It classifies receipts as no_ticket
or ticket
Problem:
The conversion succeeds, but the output value is sometimes incorrect. Sometimes, it recognizes non_ticket
items as ticket
. I’m not sure if my bias values are faulty or if it’s something else.
from transformers import ViTForImageClassification, ViTImageProcessor
import torch
import coremltools as ct
from PIL import Image
import numpy as np
# Load model and processor
model = ViTForImageClassification.from_pretrained("jjmcarrascosa/vit_receipts_classifier")
processor = ViTImageProcessor.from_pretrained("jjmcarrascosa/vit_receipts_classifier")
model.eval()
# Create wrapper class with softmax
class WrappedViT(torch.nn.Module):
def __init__(self, model):
super().__init__()
self.model = model
self.softmax = torch.nn.Softmax(dim=1)
def forward(self, x):
outputs = self.model(x)
logits = outputs.logits
probabilities = self.softmax(logits)
return probabilities
# Wrap model and create example input
wrapped_model = WrappedViT(model)
wrapped_model.eval()
example_input = torch.rand(1, 3, 224, 224)
# Trace model
with torch.no_grad():
traced_model = torch.jit.trace(wrapped_model, example_input)
# Get normalization parameters
image_mean = [0.5, 0.5, 0.5] # Mean values
image_std = [0.5, 0.5, 0.5] # Standard deviation values
# Convert to Core ML with updated preprocessing
mlmodel = ct.convert(
traced_model,
inputs=[ct.ImageType(
name="image",
shape=example_input.shape,
scale=1.0 / 255.0, # Scale to [0, 1]
bias=[-m for m in image_mean], # Use the mean as bias
color_layout='RGB'
)],
classifier_config=ct.ClassifierConfig(
class_labels=["ticket", "no_ticket"], # Ensure this matches the original model
predicted_feature_name="classLabel"
),
compute_units=ct.ComputeUnit.ALL,
compute_precision=ct.precision.FLOAT32
)
# Test functions
def test_original_model(image_path):
image = Image.open(image_path)
inputs = processor(image, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
predicted_label = outputs.logits.argmax(-1).item()
confidence = predictions[0][predicted_label].item()
return model.config.id2label[predicted_label], confidence
def test_coreml_model(image_path):
# Open the image using PIL
image = Image.open(image_path)
image = image.resize((224, 224)) # Resize to the expected input size
image = image.convert('RGB') # Ensure the image is in RGB format
# Prepare the image for Core ML
input_data = {'image': image} # Pass the PIL image directly
# Make the prediction
prediction = mlmodel.predict(input_data)
# Extract the class label and confidence
class_label = prediction['classLabel']
confidence = prediction['classLabel_probs'][class_label]
return class_label, confidence
# Test both models and print results side by side
path = "Your/path/for/pictures" # Update this to your image directory
test_images = [f"{i}.jpeg" for i in range(2, 31)] # Update with your image filenames
print(f"{'Image':<15} {'Original Model':<20} {'Confidence':<15} {'Core ML Model':<20} {'Confidence':<15}")
print("=" * 85)
for img in test_images:
original_label, original_conf = test_original_model(path + img)
coreml_label, coreml_conf = test_coreml_model(path + img)
print(f"{img:<15} {original_label:<20} {original_conf:<15.4f} {coreml_label:<20} {coreml_conf:<15.4f}")
output:
Image Original Model Confidence Core ML Model Confidence
=====================================================================================
2.jpeg no_ticket 0.9990 no_ticket 0.9977
3.jpeg no_ticket 0.9990 no_ticket 0.9990
4.jpeg no_ticket 0.9983 no_ticket 0.9989
5.jpeg ticket 0.9846 ticket 0.9922
6.jpeg ticket 0.7900 ticket 0.9141
7.jpeg no_ticket 0.9387 no_ticket 0.9501
8.jpeg no_ticket 0.9934 ticket 0.7127
9.jpeg no_ticket 0.9983 no_ticket 0.9982
10.jpeg no_ticket 0.9561 no_ticket 0.9709
11.jpeg ticket 0.6539 ticket 0.7500
12.jpeg no_ticket 0.9978 no_ticket 0.9988
13.jpeg no_ticket 0.9979 no_ticket 0.9973
14.jpeg no_ticket 0.9906 no_ticket 0.9953
15.jpeg no_ticket 0.9990 no_ticket 0.9990
16.jpeg no_ticket 0.9913 no_ticket 0.9987
17.jpeg no_ticket 0.9989 no_ticket 0.9990
18.jpeg no_ticket 0.9991 no_ticket 0.8969
19.jpeg no_ticket 0.9979 no_ticket 0.9977
20.jpeg no_ticket 0.9990 no_ticket 0.9990
21.jpeg no_ticket 0.9983 no_ticket 0.9990
22.jpeg no_ticket 0.9211 ticket 0.9904
23.jpeg no_ticket 0.9989 no_ticket 0.9979
24.jpeg no_ticket 0.9964 no_ticket 0.9990
25.jpeg no_ticket 0.9986 no_ticket 0.9990
26.jpeg no_ticket 0.9989 no_ticket 0.9990
27.jpeg no_ticket 0.9990 no_ticket 0.9990
28.jpeg no_ticket 0.9985 no_ticket 0.9986
29.jpeg ticket 0.8812 ticket 0.9849
30.jpeg no_ticket 0.9985 ticket 0.9932