TypeError: map() got an unexpected keyword argument 'num_proc'

I want to train a model using SFTTrainer, using a dataset that streams the data from a local file (I don’t want to load everything in memory). The dataset seems to be working fine, but having and error with SFTTrainer. Here’s the code:

features= Features({'doc_id': Value('string'), 'text': Value('string')})
train_dataset = load_dataset('json', data_files="data/interim/collection.jsonl", split='train', streaming=True,features=features)

batch_size = 16
gradient_accumulation_steps = 1
num_train_epochs = 2

training_args = TrainingArguments(
    output_dir="output/",
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size//2,
    learning_rate=2e-4,
    lr_scheduler_type="cosine",
    warmup_ratio = 0.1,
    gradient_accumulation_steps=gradient_accumulation_steps,
    gradient_checkpointing=True,
    evaluation_strategy="epoch",
    num_train_epochs=num_train_epochs,
    # logging strategies 
    logging_strategy="steps",
    logging_steps=1,
    save_strategy="epoch", # saving is done at the end of each epoch
    remove_unused_columns = True
)

trainer = SFTTrainer(
    model,    
    train_dataset=train_dataset,
    # eval_dataset=eval_dataset,
    # packing=True, # pack samples together for efficient training
    dataset_text_field="text",
    data_collator=collator,
    max_seq_length=512, # maximum packed length 
    args=training_args,
    # formatting_func=formatting_func, # format samples with a model schema
)

And the error:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[22], line 1
----> 1 trainer = SFTTrainer(
      2     model,    
      3     train_dataset=train_dataset,
      4     # eval_dataset=eval_dataset,
      5     # packing=True, # pack samples together for efficient training
      6     dataset_text_field="text",
      7     data_collator=collator,
      8     max_seq_length=512, # maximum packed length 
      9     args=training_args,
     10     # formatting_func=formatting_func, # format samples with a model schema
     11 )

File /notebooks/env/lib/python3.9/site-packages/trl/trainer/sft_trainer.py:283, in SFTTrainer.__init__(self, model, args, data_collator, train_dataset, eval_dataset, tokenizer, model_init, compute_metrics, callbacks, optimizers, preprocess_logits_for_metrics, peft_config, dataset_text_field, packing, formatting_func, max_seq_length, infinite, num_of_sequences, chars_per_token, dataset_num_proc, dataset_batch_size, neftune_noise_alpha, model_init_kwargs, dataset_kwargs, eval_packing)
    281     dataset_kwargs = {}
    282 if train_dataset is not None:
--> 283     train_dataset = self._prepare_dataset(
    284         train_dataset,
    285         tokenizer,
    286         packing,
    287         dataset_text_field,
    288         max_seq_length,
    289         formatting_func,
    290         num_of_sequences,
    291         chars_per_token,
    292         remove_unused_columns=args.remove_unused_columns if args is not None else True,
    293         **dataset_kwargs,
    294     )
    295 if eval_dataset is not None:
    296     _multiple = isinstance(eval_dataset, dict)

File /notebooks/env/lib/python3.9/site-packages/trl/trainer/sft_trainer.py:424, in SFTTrainer._prepare_dataset(self, dataset, tokenizer, packing, dataset_text_field, max_seq_length, formatting_func, num_of_sequences, chars_per_token, remove_unused_columns, append_concat_token, add_special_tokens, skip_prepare_dataset)
    421     return dataset
    423 if not packing:
--> 424     return self._prepare_non_packed_dataloader(
    425         tokenizer,
    426         dataset,
    427         dataset_text_field,
    428         max_seq_length,
    429         formatting_func,
    430         add_special_tokens,
    431         remove_unused_columns,
    432     )
    434 else:
    435     return self._prepare_packed_dataloader(
    436         tokenizer,
    437         dataset,
   (...)
    444         add_special_tokens,
    445     )

File /notebooks/env/lib/python3.9/site-packages/trl/trainer/sft_trainer.py:492, in SFTTrainer._prepare_non_packed_dataloader(self, tokenizer, dataset, dataset_text_field, max_seq_length, formatting_func, add_special_tokens, remove_unused_columns)
    486 if not remove_unused_columns and len(extra_columns) > 0:
    487     warnings.warn(
    488         "You passed `remove_unused_columns=False` on a non-packed dataset. This might create some issues with the default collator and yield to errors. If you want to "
    489         f"inspect dataset other columns (in this case {extra_columns}), you can subclass `DataCollatorForLanguageModeling` in case you used the default collator and create your own data collator in order to inspect the unused dataset columns."
    490     )
--> 492 tokenized_dataset = dataset.map(
    493     tokenize,
    494     batched=True,
    495     remove_columns=dataset.column_names if remove_unused_columns else None,
    496     num_proc=self.dataset_num_proc,
    497     batch_size=self.dataset_batch_size,
    498 )
    500 return tokenized_dataset

TypeError: map() got an unexpected keyword argument 'num_proc'