IndexError: index out of range in self on train()

I’m trying to train a RoBERTa model from scratch, based on this notebook (and this article) with my own data.

VOCAB_SIZE = 50265
TOKENIZER_FOLDER = '...' #folder in which I'm saving the tokenizer
MAX_LEN = 512
TRAIN_EPOCHS = 3
TRAIN_BATCH_SIZE = 4
VALID_BATCH_SIZE = 4
config = RobertaConfig(
    vocab_size=VOCAB_SIZE,
    max_position_embeddings=MAX_LEN,
    num_attention_heads=8,
    num_hidden_layers=6,
    type_vocab_size=2,
)

model = RobertaForMaskedLM(config=config)

A tokenizer, previously trained on my own corpus:

tokenizer = RobertaTokenizerFast.from_pretrained(TOKENIZER_FOLDER, max_len=MAX_LEN)

class CustomDataset(Dataset):
    def __init__(self, df, tokenizer):
        # or use the RobertaTokenizer from `transformers` directly.
        self.examples = []
        # For every value in the dataframe
        for example in df.values:
            #
            x=tokenizer.encode_plus(example, max_length = MAX_LEN, truncation=True, padding=True)
            self.examples += [x.input_ids]

    def __len__(self):
        return len(self.examples)

    def __getitem__(self, i):
        # We’ll pad at the batch level.
        return torch.tensor(self.examples[i])

train_df and test_df are previously loaded dataframes:

# Create the train and evaluation dataset
train_dataset = CustomDataset(train_df['text'], tokenizer)
eval_dataset = CustomDataset(test_df['text'], tokenizer)
# Define the training arguments
training_args = TrainingArguments(
    output_dir=os.path.abspath(TOKENIZER_FOLDER),
    overwrite_output_dir=True,
    evaluation_strategy = 'steps',
    num_train_epochs=TRAIN_EPOCHS,
    per_device_train_batch_size=TRAIN_BATCH_SIZE,
    per_device_eval_batch_size=VALID_BATCH_SIZE,
    eval_steps=4096,
    save_strategy='steps',
    save_steps=4096,
    metric_for_best_model='eval_loss',
    save_total_limit=15,
    bf16=True,
    bf16_full_eval=True,
    no_cuda=True,
    load_best_model_at_end = True
)
# Create the trainer for our model
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
)

I’m having the following error:

File "/cfs/home/u021274/higo/RoB3RTa.py", line 134, in <module>
    trainer.train()
  File "/cfs/home/u021274/higo/myenv/lib64/python3.10/site-packages/transformers/trainer.py", line 1645, in train
    return inner_training_loop(
  File "/cfs/home/u021274/higo/myenv/lib64/python3.10/site-packages/transformers/trainer.py", line 1938, in _inner_training_loop
    tr_loss_step = self.training_step(model, inputs)
  File "/cfs/home/u021274/higo/myenv/lib64/python3.10/site-packages/transformers/trainer.py", line 2759, in training_step
    loss = self.compute_loss(model, inputs)
  File "/cfs/home/u021274/higo/myenv/lib64/python3.10/site-packages/transformers/trainer.py", line 2784, in compute_loss
    outputs = model(**inputs)
  File "/cfs/home/u021274/higo/myenv/lib64/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/cfs/home/u021274/higo/myenv/lib64/python3.10/site-packages/accelerate/utils/operations.py", line 553, in forward
    return model_forward(*args, **kwargs)
  File "/cfs/home/u021274/higo/myenv/lib64/python3.10/site-packages/accelerate/utils/operations.py", line 541, in __call__
    return convert_to_fp32(self.model_forward(*args, **kwargs))
  File "/cfs/home/u021274/higo/myenv/lib64/python3.10/site-packages/torch/amp/autocast_mode.py", line 14, in decorate_autocast
    return func(*args, **kwargs)
  File "/cfs/home/u021274/higo/myenv/lib64/python3.10/site-packages/transformers/models/roberta/modeling_roberta.py", line 1100, in forward
    outputs = self.roberta(
  File "/cfs/home/u021274/higo/myenv/lib64/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/cfs/home/u021274/higo/myenv/lib64/python3.10/site-packages/transformers/models/roberta/modeling_roberta.py", line 845, in forward
    embedding_output = self.embeddings(
  File "/cfs/home/u021274/higo/myenv/lib64/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/cfs/home/u021274/higo/myenv/lib64/python3.10/site-packages/transformers/models/roberta/modeling_roberta.py", line 128, in forward
    position_embeddings = self.position_embeddings(position_ids)
  File "/cfs/home/u021274/higo/myenv/lib64/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/cfs/home/u021274/higo/myenv/lib64/python3.10/site-packages/torch/nn/modules/sparse.py", line 162, in forward
    return F.embedding(
  File "/cfs/home/u021274/higo/myenv/lib64/python3.10/site-packages/torch/nn/functional.py", line 2210, in embedding
    return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
IndexError: index out of range in self

I saw in another topics here on forum that this could be because my input is larger than the value accepted by the model (512 tokens), but how could be this, while I have truncation=True, padding=True and the max sequence length supported by RoBERTa?