Segmant Anything Model (SAM) ValueError: Invalid image type

I am trying to get mask from the SAM model named facebook/sam-vit-large. Despite trying many methods getting the error ValueError: Invalid image type. Expected either PIL.Image.Image, numpy.ndarray, torch.Tensor, tf.Tensor or jax.ndarray, but got <class ‘NoneType’>. my code for getting mask was: def mask_to_rgb(mask):
bg_transparent = np.zeros(mask.shape + (4, ), dtype=np.uint8)
bg_transparent[mask == 1] = [0, 255, 0, 127]

return bg_transparent

def get_processed_inputs(image, input_points):
assert isinstance(image, Image.Image), f"Expected PIL Image but got {type(image)}"

inputs = processor(
    image=image,
    input_points=input_points,
    return_tensors="pt"
)

assert "pixel_values" in inputs, "Processor did not return expected keys."

inputs = {k: v.to("cuda") for k, v in inputs. Items()}

outputs = model(**inputs)
masks = processor.image_processor.post_process_masks(
outputs.pred_masks.cpu(),
inputs[“original_sizes”].cpu(),
inputs[“reshaped_input_sizes”].cpu()
)
best_mask = masks[0][0][outputs.iou_scores.argmax()]
return ~best_mask.cpu().numpy() getting error while trying to execute the code
raw_image = Image.open(“car.png”).resize((512, 512))

These are the coordinates of two points on the car

input_points = [[[150, 170], [300, 250]]]

mask = get_processed_inputs(raw_image, input_points)

Image.fromarray(mask_to_rgb(mask)).resize((128, 128))

1 Like
import numpy as np
from PIL import Image
import requests
from transformers import SamModel, SamProcessor

model = SamModel.from_pretrained("facebook/sam-vit-large").to("cuda")
processor = SamProcessor.from_pretrained("facebook/sam-vit-large")

def mask_to_rgb(mask):
    bg_transparent = np.zeros(mask.shape + (4, ), dtype=np.uint8)
    bg_transparent[mask == 1] = [0, 255, 0, 127]
    return bg_transparent

def get_processed_inputs(image, input_points):
    assert isinstance(image, Image.Image), f"Expected PIL Image but got {type(image)}"

    inputs = processor(
        images=[image], # <= this
        input_points=input_points, # <= and maybe this, too
        return_tensors="pt"
    )

    assert "pixel_values" in inputs, "Processor did not return expected keys."

    inputs = {k: v.to("cuda") for k, v in inputs.items()}

    outputs = model(**inputs)
    masks = processor.image_processor.post_process_masks(
        outputs.pred_masks.cpu(),
        inputs["original_sizes"].cpu(),
        inputs["reshaped_input_sizes"].cpu()
    )
    best_mask = masks[0][0][outputs.iou_scores.argmax()]
    return ~best_mask.cpu().numpy() # getting error while trying to execute the code

img_url = "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png"
raw_image = Image.open(requests.get(img_url, stream=True).raw).convert("RGB")
#raw_image = Image.open("car.png").resize((512, 512))

# These are the coordinates of two points on the car
input_points = [[[150, 170], [300, 250]]]

mask = get_processed_inputs(raw_image, input_points)

oimage = Image.fromarray(mask_to_rgb(mask)).resize((128, 128))
oimage.save("car_out.png")

It seems that the ImageProcessor argument is incorrect. The following post was helpful.