Hello,
I am trying to run the chat component of the InternVL2-1B model using Deepspeed-mii library (mii.pipeline(“OpenGVLab/InternVL2-1B”)). I am using following line of codes to run
# # Import necessary libraries
import torch
import torchvision.transforms as T
from PIL import Image
from transformers import AutoModel, AutoTokenizer
from mii import pipeline
# Constants for image transformation
IMAGENET_MEAN = (0.485, 0.456, 0.406)
IMAGENET_STD = (0.229, 0.224, 0.225)
device = "cuda:0"
def build_transform(input_size):
MEAN, STD = IMAGENET_MEAN, IMAGENET_STD
transform = T.Compose([
T.Lambda(lambda img: img.convert('RGB') if img.mode != 'RGB' else img),
T.Resize((input_size, input_siaaze), interpolation=InterpolationMode.BICUBIC),
T.ToTensor(),
T.Normalize(mean=MEAN, std=STD)
])
return transform
def load_image(image_url, input_size=448):
response = requests.get(image_url)
image = Image.open(BytesIO(response.content)).convert('RGB')
# image = Image.open(image_file).convert('RGB')
transform = build_transform(input_size=input_size)
pixel_values = transform(image).unsqueeze(0) # Add batch dimension
return pixel_values
#DeepSpeed MII pipeline initialization for InternVL2-1B
pipe = pipeline(
"OpenGVLab/InternVL2-1B",
task="multimodal",
#"model_type": "internvl_chat",
# framework="transformers"
)
# # Preprocess the image
img_url = "https://img.freepik.com/premium-photo/austronaut_849761-48156.jpg"
pixel_values = load_image(img_url).to(torch.bfloat16).to(device)
# Load tokenizer from the model
tokenizer = AutoTokenizer.from_pretrained('OpenGVLab/InternVL2-1B', trust_remote_code=True, use_fast=False)
# Define the input question for the chat
question = "What does this image mean?"
# Define generation parameters
generation_config = {
"max_new_tokens": 1024,
"do_sample": True,
"pad_token_id": tokenizer.pad_token_id # Ensure pad_token_id is set
}
# Generate the response using the DeepSpeed MII pipeline
start_time = time.time()
response = pipe.chat(tokenizer, pixel_values, question, generation_config)
end_time = time.time()
# Output the conversation
print(f'User: {question}\nAssistant: {response}')
print(".........................")
print(f"Time taken for generating output: {end_time - start_time:.2f} seconds")
But this code gives me an error:
ValueError: Unsupported model type internvl_chat
Can you please guide me what might be wrong in this code? How can I run this chat component of the InternVL2-1B model using the Deepspeed-MII pipeline?