Error: Could not convert to integer: 3221225477. Path 'exitCode'

Hello. I want to create my own AI based on Meta AI, but when training a chatbot using datasets, I encountered the following problem: Could not convert to integer: 3221225477. Path ‘exitCode’. Tell me how to solve it?

I am using:

  1. Windows 11 Home version 23H2
  2. Microsoft Visual Studio 2022
  3. Python 3.11.0
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
import torch
from datasets import load_dataset, concatenate_datasets

try:
    # Download tokenizer and model
    model_path = "C:\\Users\\evhac\\.llama\\checkpoints\\Llama3.1-8B-hf"
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = AutoModelForCausalLM.from_pretrained(model_path)

    # Setting eos_token as pad_token
    tokenizer.pad_token = tokenizer.eos_token

    # Transfer the model to GPU if available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    # Loading datasets
    persona_chat_dataset = load_dataset("AlekseyKorshuk/persona-chat", split="train")
    dailydialog_dataset = load_dataset("roskoN/dailydialog", split="train")

    # Transforming datasets
    def preprocess_persona(example):
        dialogue = example.get("dialogue", [])
        return {"dialogue": dialogue}

    def preprocess_dailydialog(example):
        dialogue = example.get("dialogue", [])
        return {"dialogue": dialogue}

    persona_chat_dataset = persona_chat_dataset.map(preprocess_persona, remove_columns=persona_chat_dataset.column_names)
    dailydialog_dataset = dailydialog_dataset.map(preprocess_dailydialog, remove_columns=dailydialog_dataset.column_names)
    combined_dataset = concatenate_datasets([persona_chat_dataset, dailydialog_dataset])

    # Transforming Dialogues
    def preprocess_dialogue(example):
        conversation = ""
        for turn in example["dialogue"]:
            if 'role' in turn and 'text' in turn:
                conversation += f"{turn['role']}: {turn['text']} \n"
        return {"text": conversation}

    processed_dataset = combined_dataset.map(preprocess_dialogue)

    # Tokenization and tagging
    def preprocess_for_model(example):
        # Tokenize text and trim to fit model requirements
        tokenized = tokenizer(example["text"], truncation=True, padding="max_length", max_length=256)
        
        # Add labels, which should be the same as the input_ids for training
        tokenized["labels"] = tokenized["input_ids"].copy()
        return tokenized

    # Applying tokenization to the processed dataset
    processed_dataset = processed_dataset.map(preprocess_for_model, batched=True)

    # Training parameters
    training_args = TrainingArguments(
        output_dir="./llama-chatbot",
        num_train_epochs=1,
        per_device_train_batch_size=2,
        save_steps=500,
        save_total_limit=2,
        fp16=False,
        remove_unused_columns=False
    )

    # Creating and running Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=processed_dataset,
    )
    
    trainer.train()

except Exception as e:
    print("An error has occurred:", e)

It looks like there is an error in the VC DLL, but I’m not sure where the error is actually occurring…