Hmm… It works…
from huggingface_hub import InferenceClient
client = InferenceClient(
model="Qwen/Qwen2.5-VL-32B-Instruct",
provider="fireworks-ai",
api_key="hf_********",
)
IMAGE_URL = "https://upload.wikimedia.org/wikipedia/commons/4/4d/Cat_November_2010-1a.jpg"
resp = client.chat.completions.create(
messages=[{
"role": "user",
"content": [
{"type": "text", "text": "Describe the image in one sentence."},
{"type": "image_url", "image_url": {"url": IMAGE_URL}},
],
}],
max_tokens=128,
)
print(resp.choices[0].message["content"]) # 这是一张猫的照片。图片中的猫有条纹的毛色,主要是棕色和黑色相间的条纹。它的耳朵竖立,眼睛是绿色的,正直视着镜头。猫的胡须清晰可见,前爪放在一个浅色的表面上,可能是石头或混凝土。背景模糊,显示出一些树枝和天空,表明猫可能在户外。整体画面给人一种宁静而专注的感觉,猫的姿态显得警觉且好奇。