Hi everyone,
i am trying to load an image using llama-index but got the error below. Does anyone have any clue?
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
File C:\ProgramData\anaconda3\lib\site-packages\transformers\feature_extraction_utils.py:164, in BatchFeature.convert_to_tensors(self, tensor_type)
163 if not is_tensor(value):
--> 164 tensor = as_tensor(value)
166 self[key] = tensor
File C:\ProgramData\anaconda3\lib\site-packages\transformers\feature_extraction_utils.py:146, in BatchFeature.convert_to_tensors.<locals>.as_tensor(value)
145 value = np.array(value)
--> 146 return torch.tensor(value)
RuntimeError: Could not infer dtype of numpy.float32
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
Cell In[5], line 43
41 ImageReader = download_loader("ImageReader")
42 loader = ImageReader(text_type = "key_value")
---> 43 documents = loader.load_data(file=Path(path))
45 index = GPTSimpleVectorIndex(documents,service_context)
47 question = "what is the total amount of invoice?"
File C:\ProgramData\anaconda3\lib\site-packages\llama_index\readers\llamahub_modules/file/image/base.py:88, in ImageReader.load_data(self, file, extra_info)
83 task_prompt = "<s_cord-v2>"
84 decoder_input_ids = processor.tokenizer(
85 task_prompt, add_special_tokens=False, return_tensors="pt",padding=True
86 ).input_ids
---> 88 pixel_values = processor(image, return_tensors="pt").pixel_values
90 outputs = model.generate(
91 pixel_values.to(device),
92 decoder_input_ids=decoder_input_ids.to(device),
(...)
100 return_dict_in_generate=True,
101 )
103 sequence = processor.batch_decode(outputs.sequences)[0]
File C:\ProgramData\anaconda3\lib\site-packages\transformers\models\donut\processing_donut.py:69, in DonutProcessor.__call__(self, *args, **kwargs)
66 raise ValueError("You need to specify either an `images` or `text` input to process.")
68 if images is not None:
---> 69 inputs = self.feature_extractor(images, *args, **kwargs)
70 if text is not None:
71 encodings = self.tokenizer(text, **kwargs)
File C:\ProgramData\anaconda3\lib\site-packages\transformers\models\donut\feature_extraction_donut.py:209, in DonutFeatureExtractor.__call__(self, images, return_tensors, random_padding, **kwargs)
207 # return as BatchFeature
208 data = {"pixel_values": images}
--> 209 encoded_inputs = BatchFeature(data=data, tensor_type=return_tensors)
211 return encoded_inputs
File C:\ProgramData\anaconda3\lib\site-packages\transformers\feature_extraction_utils.py:75, in BatchFeature.__init__(self, data, tensor_type)
73 def __init__(self, data: Optional[Dict[str, Any]] = None, tensor_type: Union[None, str, TensorType] = None):
74 super().__init__(data)
---> 75 self.convert_to_tensors(tensor_type=tensor_type)
File C:\ProgramData\anaconda3\lib\site-packages\transformers\feature_extraction_utils.py:170, in BatchFeature.convert_to_tensors(self, tensor_type)
168 if key == "overflowing_values":
169 raise ValueError("Unable to create tensor returning overflowing values of different lengths. ")
--> 170 raise ValueError(
171 "Unable to create tensor, you should probably activate padding "
172 "with 'padding=True' to have batched tensors with the same length."
173 )
175 return self
ValueError: Unable to create tensor, you should probably activate padding with 'padding=True' to have batched tensors with the same length.