from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import torch
# Load the model and tokenizer
model_name = "facebook/blenderbot-3B"
# Define the training data
training_data = [
{"input": "Who is Raj Sanpui?", "output": "Raj Sanpui works at ABC"},
{"input": "What does Raj Sanpui do?", "output": "Raj Sanpui works at ABC"},
{"input": "Where does Raj Sanpui work?", "output": "Raj Sanpui works at ABC"},
]
class RajSanpuiDataset(torch.utils.data.Dataset):
def __init__(self, training_data1, tokenizer1):
self.training_data = training_data1
self.tokenizer = tokenizer1
def __getitem__(self, idx):
input_text = self.training_data[idx]["input"]
output_text = self.training_data[idx]["output"]
input_ids1 = self.tokenizer.encode(input_text, return_tensors="pt")
decoder_input_ids1 = self.tokenizer.encode(output_text, return_tensors="pt")
return {"input_ids": input_ids1, "decoder_input_ids": decoder_input_ids1}
def __len__(self):
return len(self.training_data)
if __name__ == "__main__":
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
dataset = RajSanpuiDataset(training_data, tokenizer)
batch_size = 1
data_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size)
device = "cpu"
# Define the optimizer (Doesnt work with lr=1e-2 as well)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
print("Starting training")
# Train the model
for epoch in range(1):
print(epoch)
model.train()
total_loss = 0
for batch in data_loader:
input_ids = batch["input_ids"].to(device)
decoder_input_ids = batch["decoder_input_ids"].to(device)
# Zero the gradients
optimizer.zero_grad()
# Forward pass
outputs = model(input_ids=input_ids, labels=decoder_input_ids)
loss = outputs.loss
# Backward pass
loss.backward()
# Update the model parameters
optimizer.step()
# Accumulate the loss
total_loss += loss.item()
print(f"Epoch {epoch+1}, Loss: {total_loss / len(data_loader)}")
# Use the trained model to generate an answer
input_ids = tokenizer.encode("Who is Raj Sanpui?", return_tensors="pt").to(device)
outputs = model.generate(input_ids=input_ids)
answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(answer)
Gives this wrong answer:
C:\Users\Raj\PycharmProjects\PythonProject\.venv\Scripts\python.exe C:\Users\Raj\PycharmProjects\PythonProject\Train-chatbot3.py
Starting training
0
Epoch 1, Loss: 3.5551576614379883
Epoch 1, Loss: 6.134128411610921
Epoch 1, Loss: 7.840777079264323
raj sanpui is an indian singer and songwriter.
Process finished with exit code 0