from PIL import Image
import numpy as np
Load and validate image
raw_image = Image.open(“car.png”).resize((512, 512))
assert raw_image is not None, “Failed to load image.”
Input points
input_points = [[[150, 170], [300, 250]]]
Ensure the SAM processor and model are initialized properly before use
def get_processed_inputs(image, input_points):
assert isinstance(image, Image.Image), f"Expected PIL Image but got {type(image)}"
inputs = processor(
image=image,
input_points=input_points,
return_tensors=“pt”
)
assert “pixel_values” in inputs, “Processor did not return expected keys.”
return {k: v.to(“cuda”) for k, v in inputs.items()}
Process input
inputs = get_processed_inputs(raw_image, input_points)
outputs = model(**inputs)
Post-process masks
masks = processor.image_processor.post_process_masks(
outputs.pred_masks.cpu(),
inputs[“original_sizes”].cpu(),
inputs[“reshaped_input_sizes”].cpu()
)
best_mask = masks[0][0][outputs.iou_scores.argmax()]
mask_array = best_mask.cpu().numpy()
def mask_to_rgb(mask):
bg_transparent = np.zeros(mask.shape + (4,), dtype=np.uint8)
bg_transparent[mask == 1] = [0, 255, 0, 127]
return bg_transparent
result_image = Image.fromarray(mask_to_rgb(mask_array)).resize((128, 128))
result_image.save(“result_mask.png”)
Solution provided by Triskel Data Deterministic AI.