TypeError: Couldn't cast array of type int64 to Sequence

Laxmi9 · August 19, 2022, 12:29pm

Hai,
I am using LayoutLMV2 model and trying to finetune the model in own data set but getting error. The data set contain file_name, words, boxes, word_labels and getting the error while preprocessing the data. Can someone please help me.

processor = LayoutLMv2Processor.from_pretrained(path, apply_ocr = False)
tokenizer = LayoutLMv2TokenizerFast.from_pretrained(path)
model = LayoutLMv2ForTokenClassification.from_pretrained(path, num_labels=len(label2id))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

features = Features({
    'image': Array3D(dtype="int64", shape=(3, 224, 224)),
    'input_ids': Sequence(feature=Value(dtype='int64')),
    'attention_mask': Sequence(Value(dtype='int64')),
    'token_type_ids': Sequence(Value(dtype='int64')),
    'bbox': Array2D(dtype="int64", shape=(512, 4)),
    'labels': Sequence(ClassLabel(names=labels)),
})

def preprocess_data(examples):
  images = [Image.open(path).convert("RGB") for path in set(examples['File_name'])]
  words = examples['words']
  boxes = examples['boxes']
  word_labels = examples['Label']
  encoded_inputs = processor(images, words, boxes=boxes, word_labels=word_labels, padding="max_length", truncation=True)
  return encoded_inputs

train_dataset = dataset.map(preprocess_data, remove_columns=dataset.column_names, features=features, batched=True)

This is the error i am getting

TypeError                                 Traceback (most recent call last)
d:\Working\Trans_LayoutLMV2_Key-Value.ipynb Cell 21 in <cell line: 20>()
     17   encoded_inputs = processor(images, words, boxes=boxes, word_labels=word_labels, padding="max_length", truncation=True)
     18   return encoded_inputs
---> 20 train_dataset = dataset.map(preprocess_data, remove_columns=dataset.column_names, features=features, batched=True)

File c:\Users\name\.conda\envs\detectron_env\lib\site-packages\datasets\arrow_dataset.py:2376, in Dataset.map(self, function, with_indices, with_rank, input_columns, batched, batch_size, drop_last_batch, remove_columns, keep_in_memory, load_from_cache_file, cache_file_name, writer_batch_size, features, disable_nullable, fn_kwargs, num_proc, suffix_template, new_fingerprint, desc)
   2373 disable_tqdm = not logging.is_progress_bar_enabled()
   2375 if num_proc is None or num_proc == 1:
-> 2376     return self._map_single(
   2377         function=function,
   2378         with_indices=with_indices,
   2379         with_rank=with_rank,
   2380         input_columns=input_columns,
   2381         batched=batched,
   2382         batch_size=batch_size,
   2383         drop_last_batch=drop_last_batch,
   2384         remove_columns=remove_columns,
   2385         keep_in_memory=keep_in_memory,
   2386         load_from_cache_file=load_from_cache_file,
   2387         cache_file_name=cache_file_name,
   2388         writer_batch_size=writer_batch_size,
   2389         features=features,
   2390         disable_nullable=disable_nullable,
   2391         fn_kwargs=fn_kwargs,
   2392         new_fingerprint=new_fingerprint,
   2393         disable_tqdm=disable_tqdm,
   2394         desc=desc,
   2395     )
   2396 else:
   2398     def format_cache_file_name(cache_file_name, rank):

File c:\Users\name\.conda\envs\detectron_env\lib\site-packages\datasets\arrow_dataset.py:551, in transmit_tasks.<locals>.wrapper(*args, **kwargs)
    549     self: "Dataset" = kwargs.pop("self")
...

TypeError: Couldn't cast array of type
int64
to
Sequence(feature=Value(dtype='int64', id=None), length=-1, id=None)

Topic		Replies	Views
TypeError: Couldn't cast array of type int64 while mapping the dataset 🤗Datasets	6	5636	March 22, 2023
TypeError: Couldn't cast array of type int64 to Sequence when using layoutlmv3 Models	0	36	July 23, 2024
TypeError: Object of type ndarray is not JSON serializable Models	0	1550	August 19, 2022
LayoutLMv3 processor error Intermediate	4	104	September 27, 2024
TypeError: Couldn't cast array of type int64 to null 🤗Datasets	3	87	February 6, 2025

TypeError: Couldn't cast array of type int64 to Sequence

Related topics