import numpy as np
from PIL import Image
import requests
from transformers import SamModel, SamProcessor
model = SamModel.from_pretrained("facebook/sam-vit-large").to("cuda")
processor = SamProcessor.from_pretrained("facebook/sam-vit-large")
def mask_to_rgb(mask):
bg_transparent = np.zeros(mask.shape + (4, ), dtype=np.uint8)
bg_transparent[mask == 1] = [0, 255, 0, 127]
return bg_transparent
def get_processed_inputs(image, input_points):
assert isinstance(image, Image.Image), f"Expected PIL Image but got {type(image)}"
inputs = processor(
images=[image], # <= this
input_points=input_points, # <= and maybe this, too
return_tensors="pt"
)
assert "pixel_values" in inputs, "Processor did not return expected keys."
inputs = {k: v.to("cuda") for k, v in inputs.items()}
outputs = model(**inputs)
masks = processor.image_processor.post_process_masks(
outputs.pred_masks.cpu(),
inputs["original_sizes"].cpu(),
inputs["reshaped_input_sizes"].cpu()
)
best_mask = masks[0][0][outputs.iou_scores.argmax()]
return ~best_mask.cpu().numpy() # getting error while trying to execute the code
img_url = "https://huggingface.co/ybelkada/segment-anything/resolve/main/assets/car.png"
raw_image = Image.open(requests.get(img_url, stream=True).raw).convert("RGB")
#raw_image = Image.open("car.png").resize((512, 512))
# These are the coordinates of two points on the car
input_points = [[[150, 170], [300, 250]]]
mask = get_processed_inputs(raw_image, input_points)
oimage = Image.fromarray(mask_to_rgb(mask)).resize((128, 128))
oimage.save("car_out.png")
It seems that the ImageProcessor argument is incorrect. The following post was helpful.