Iam trying to run a model using @xenova/transformers
but getting the following error
Error: Failed to load model because protobuf parsing failed.
at new OnnxruntimeSessionHandler (/root/moondream/node_modules/onnxruntime-node/dist/backend.js:27:92)
at Immediate.<anonymous> (/root/moondream/node_modules/onnxruntime-node/dist/backend.js:64:29)
at process.processImmediate (node:internal/timers:483:21)
root@recollect-ubuntu-8gb-hel1-1:~/moondream# node index
this is the code
import {
AutoProcessor,
AutoTokenizer,
Moondream1ForConditionalGeneration,
RawImage,
} from "@xenova/transformers";
// Load processor, tokenizer and model
const model_id = "Xenova/moondream2";
try {
const processor = await AutoProcessor.from_pretrained(model_id);
const tokenizer = await AutoTokenizer.from_pretrained(model_id);
const model = await Moondream1ForConditionalGeneration.from_pretrained(
model_id,
{
dtype: {
embed_tokens: "fp16", // or 'fp32'
vision_encoder: "fp16", // or 'q8'
decoder_model_merged: "q4", // or 'q4f16' or 'q8'
},
device: "cpu",
}
);
// Prepare text inputs
const prompt = "Describe this image.";
const text = `<image>\n\nQuestion: ${prompt}\n\nAnswer:`;
const text_inputs = tokenizer(text);
// Prepare vision inputs
const url =
"https://huggingface.co/vikhyatk/moondream1/resolve/main/assets/demo-1.jpg";
const image = await RawImage.fromURL(url);
const vision_inputs = await processor(image);
// Generate response
const output = await model.generate({
...text_inputs,
...vision_inputs,
do_sample: false,
max_new_tokens: 64,
});
const decoded = tokenizer.batch_decode(output, { skip_special_tokens: false });
console.log("answer new", decoded);
// [
// '<|endoftext|><image>\n\n' +
// 'Question: Describe this image.\n\n' +
// 'Answer: A hand is holding a white book titled "The Little Book of Deep Learning" against a backdrop of a balcony with a railing and a view of a building and trees.<|endoftext|>'
// ]
} catch (error) {
console.log("error", error);
}
System :
node v22.4.0
ubuntu 24.04 LTS