DeepSpeed MII pipeline issue

Hello,

I am trying to run the chat component of the InternVL2-1B model using Deepspeed-mii library (mii.pipeline(“OpenGVLab/InternVL2-1B”)). I am using following line of codes to run

# # Import necessary libraries
import torch
import torchvision.transforms as T
from PIL import Image
from transformers import AutoModel, AutoTokenizer
from mii import pipeline


# Constants for image transformation
IMAGENET_MEAN = (0.485, 0.456, 0.406)
IMAGENET_STD = (0.229, 0.224, 0.225)
device = "cuda:0"

def build_transform(input_size):
    MEAN, STD = IMAGENET_MEAN, IMAGENET_STD
    transform = T.Compose([
        T.Lambda(lambda img: img.convert('RGB') if img.mode != 'RGB' else img),
        T.Resize((input_size, input_siaaze), interpolation=InterpolationMode.BICUBIC),
        T.ToTensor(),
        T.Normalize(mean=MEAN, std=STD)
    ])
    return transform

def load_image(image_url, input_size=448):
    response = requests.get(image_url)
    image = Image.open(BytesIO(response.content)).convert('RGB')
    # image = Image.open(image_file).convert('RGB')
    transform = build_transform(input_size=input_size)
    pixel_values = transform(image).unsqueeze(0)  # Add batch dimension
    return pixel_values

#DeepSpeed MII pipeline initialization for InternVL2-1B
pipe = pipeline(
    "OpenGVLab/InternVL2-1B",
    task="multimodal",
    #"model_type": "internvl_chat",
    # framework="transformers"
)

# # Preprocess the image
img_url = "https://img.freepik.com/premium-photo/austronaut_849761-48156.jpg"
pixel_values = load_image(img_url).to(torch.bfloat16).to(device)

# Load tokenizer from the model
tokenizer = AutoTokenizer.from_pretrained('OpenGVLab/InternVL2-1B', trust_remote_code=True, use_fast=False)

# Define the input question for the chat
question = "What does this image mean?"

# Define generation parameters
generation_config = {
    "max_new_tokens": 1024, 
    "do_sample": True, 
    "pad_token_id": tokenizer.pad_token_id  # Ensure pad_token_id is set
}

# Generate the response using the DeepSpeed MII pipeline
start_time = time.time()
response = pipe.chat(tokenizer, pixel_values, question, generation_config)
end_time = time.time()

# Output the conversation
print(f'User: {question}\nAssistant: {response}')
print(".........................")
print(f"Time taken for generating output: {end_time - start_time:.2f} seconds")

But this code gives me an error:

ValueError: Unsupported model type internvl_chat

Can you please guide me what might be wrong in this code? How can I run this chat component of the InternVL2-1B model using the Deepspeed-MII pipeline?

1 Like

I thought it might be a bug of some sort, so I searched for it, but it seems possible that it really isn’t supported.

You may have to use another library or submit an issue to github.