Error: RuntimeError: Could not infer dtype of DatasetInfo

from transformers import TextDataset, in general my question is how to pass list of strings (or txt file) to model and finetune it ?

related with this question/issue

train_dataset = TextDataset(
      tokenizer=tokenizer,
      file_path=math_example_path,  # < path to .txt file
      block_size=128)

from transformers import (
    TrainingArguments, 
    Trainer, 
    default_data_collator, 
)
import mlflow 
mlflow.end_run()

training_args = TrainingArguments(output_dir='test_trainer', 
                                  #evaluation_strategy='epoch',
                                  per_device_train_batch_size=1,
                                  per_device_eval_batch_size=1,
                                  gradient_accumulation_steps=20, #
                                  num_train_epochs = 2,
                                  fp16=False,)

trainer = Trainer(
    model=llm_model,
    args=training_args,
    train_dataset=dataset,
    #eval_dataset=dataset,
    tokenizer=tokenizer,
    # Data collator will default to DataCollatorWithPadding, so we change it.
    data_collator=default_data_collator,
    compute_metrics=None,
    preprocess_logits_for_metrics=None,
)

trainer.train()

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Cell In[28], line 29
      9 training_args = TrainingArguments(output_dir='test_trainer', 
     10                                   #evaluation_strategy='epoch',
     11                                   per_device_train_batch_size=1,
   (...)
     14                                   num_train_epochs = 2,
     15                                   fp16=False,)
     17 trainer = Trainer(
     18     model=llm_model,
     19     args=training_args,
   (...)
     26     preprocess_logits_for_metrics=None,
     27 )
---> 29 trainer.train()

File ~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\transformers\trainer.py:1539, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
   1537         hf_hub_utils.enable_progress_bars()
   1538 else:
-> 1539     return inner_training_loop(
   1540         args=args,
   1541         resume_from_checkpoint=resume_from_checkpoint,
   1542         trial=trial,
   1543         ignore_keys_for_eval=ignore_keys_for_eval,
   1544     )

File ~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\transformers\trainer.py:1836, in Trainer._inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
   1833     rng_to_sync = True
   1835 step = -1
-> 1836 for step, inputs in enumerate(epoch_iterator):
   1837     total_batched_samples += 1
   1839     if self.args.include_num_input_tokens_seen:

File ~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\accelerate\data_loader.py:451, in DataLoaderShard.__iter__(self)
    449 # We iterate one batch ahead to check when we are at the end
    450 try:
--> 451     current_batch = next(dataloader_iter)
    452 except StopIteration:
    453     yield

File ~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\torch\utils\data\dataloader.py:634, in _BaseDataLoaderIter.__next__(self)
    631 if self._sampler_iter is None:
    632     # TODO(https://github.com/pytorch/pytorch/issues/76750)
    633     self._reset()  # type: ignore[call-arg]
--> 634 data = self._next_data()
    635 self._num_yielded += 1
    636 if self._dataset_kind == _DatasetKind.Iterable and \
    637         self._IterableDataset_len_called is not None and \
    638         self._num_yielded > self._IterableDataset_len_called:

File ~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\torch\utils\data\dataloader.py:678, in _SingleProcessDataLoaderIter._next_data(self)
    676 def _next_data(self):
    677     index = self._next_index()  # may raise StopIteration
--> 678     data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
    679     if self._pin_memory:
    680         data = _utils.pin_memory.pin_memory(data, self._pin_memory_device)

File ~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\torch\utils\data\_utils\fetch.py:54, in _MapDatasetFetcher.fetch(self, possibly_batched_index)
     52 else:
     53     data = self.dataset[possibly_batched_index]
---> 54 return self.collate_fn(data)

File ~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\transformers\trainer_utils.py:772, in RemoveColumnsCollator.__call__(self, features)
    770 def __call__(self, features: List[dict]):
    771     features = [self._remove_columns(feature) for feature in features]
--> 772     return self.data_collator(features)

File ~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\transformers\data\data_collator.py:92, in default_data_collator(features, return_tensors)
     86 # In this function we'll make the assumption that all `features` in the batch
     87 # have the same attributes.
     88 # So we will look at the first element as a proxy for what attributes exist
     89 # on the whole batch.
     91 if return_tensors == "pt":
---> 92     return torch_default_data_collator(features)
     93 elif return_tensors == "tf":
     94     return tf_default_data_collator(features)

File ~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\transformers\data\data_collator.py:158, in torch_default_data_collator(features)
    156             batch[k] = torch.tensor(np.stack([f[k] for f in features]))
    157         else:
--> 158             batch[k] = torch.tensor([f[k] for f in features])
    160 return batch

RuntimeError: Could not infer dtype of DatasetInfo