With DataCollator, there is still "KeyError: 'loss'"

as the title, the code below:

mport code
import logging
import sys
import os
from typing import Optional

import datasets
from dataclasses import dataclass, field
import transformers
from transformers.utils.dummy_pt_objects import Trainer

logger = logging.getLogger(__name__)

@dataclass
class CustomArguments:
  model_name: Optional[str] = field(default="bert-base-uncased")
  train_file: Optional[str] = field(default=None)
  validation_file: Optional[str] = field(default=None)
  max_seq_length: Optional[int] = field(default=128)
  preprocessing_num_worker: Optional[int] = field(default=os.cpu_count())

  line_by_line: Optional[bool] = field(default=True)
  vocab_path: Optional[str] = field(default=None)
  model_conf_path: Optional[str] = field(default=None)


def main():
  parser = transformers.HfArgumentParser((CustomArguments, transformers.TrainingArguments))
  custom_args, training_args = parser.parse_args_into_dataclasses()

  logging.basicConfig(
      format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
      datefmt="%m/%d/%Y %H:%M:%S",
      handlers=[logging.StreamHandler(sys.stdout)],
  )

  log_level = training_args.get_process_log_level()
  logger.setLevel(log_level)
  datasets.utils.logging.set_verbosity(log_level)
  transformers.utils.logging.set_verbosity(log_level)
  transformers.utils.logging.enable_default_handler()
  transformers.utils.logging.enable_explicit_format()

  logger.warning(
      f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
      + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
  )
  # Set the verbosity to info of the Transformers logger (on main process only):
  logger.info(f"Training/evaluation parameters {training_args}")

  tokenizer = transformers.BertTokenizer(
                      vocab_file=custom_args.vocab_path,
                      do_lower_case=False,
                      max_length=custom_args.max_seq_length)

  model_config = transformers.BertConfig.from_pretrained(
                                            custom_args.model_conf_path)
  model = transformers.BertForPreTraining(config=model_config)
  model.resize_token_embeddings(len(tokenizer))
  if custom_args.train_file is None:
    raise ValueError("train_file must be specified!")

  data_files = {"train": [custom_args.train_file]}
  raw_datasets = datasets.load_dataset("text", data_files=data_files)

  def tokenize_function(examples):
    examples["text"] = [
      line for line in examples["text"] if len(line) > 0 and not line.isspace()
    ]
    return tokenizer(
      examples["text"],
      padding=False,
      truncation=True,
      max_length=custom_args.max_seq_length,
      return_special_tokens_mask=True
    )

  with training_args.main_process_first(desc="dataset map tokenization"):
    tokenized_datasets = raw_datasets.map(
      tokenize_function,
      batched=True,
      num_proc=os.cpu_count(),
      remove_columns=["text"],
      load_from_cache_file=True,
      desc="Running tokenizer on dataset line_by_line"
    )

  train_dataset = tokenized_datasets["train"]

  data_collator = transformers.DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm_probability=0.15,
    pad_to_multiple_of=8
  )

  trainer = transformers.Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=None,
    tokenizer=tokenizer,
    data_collator=data_collator
  )

  trainer.train()

if __name__ == "__main__":
  main()

but the error occur:

KeyError: 'loss'

How to solve the problem?