Data problem for live support for my e-commerce site

Hello friends,

I recorded all human-customer conversations on my e-commerce site in json form. This data is as follows:
{
dialog_1 = [
{“role”:“user”,“content”:“hello”},
{“role”: “user”, “content”: “are you online”},
{“role”: “user”, “content”: “can you help me ?”},
{“role”: “assistant”, “content”: “yes, how can I help you?”},
{“role”: “assistant”, “content”: “What’s your problem?”},

],

dialog_2 = [{“role”: “user”, “content”: “hello”},
{“role”: “assistant”, “content”: “hello”},
{“role”: “user”, “content”: “there is a problem with my order”},
{“role”: “assistant”, “content”: “Can I have your order number??”},
…],


}

there are 7k dialogs in this structure, but if I give a dataset, it does not do successful learning and does not respond properly. How can I use this dataset successfully? I give it the same as it is.

full_data = list(data_dict)
train_data = []
for conversation_id in full_data:
    conversation = data_dict[conversation_id]
    user_message_buffer = ""
    assistant_response_buffer = ""
    assistant_responses = []
    dialogue_context = ""
    messages_tranin = []


    
    messages_tranin.append({
            "from":"system",
            "value":DEFAULT_SYSTEM_PROMPT,
                                
        })

    for message in conversation:
        role = message['role']
        content = message['content']
        message_type = message["type"]
        if message_type != "chat":
            continue
            content = f"<--{message_type}-->"
        if role == "user":
          role = "human"
        else:
          role = "gpt"
        messages_tranin.append({
            "from":role,
            "value":content,
                                
        })
    if len(messages_tranin) > 10:
      train_data.append({"conversations":messages_tranin})
tokenizer = get_chat_template(
    tokenizer,
    mapping={"role": "from", "content": "value", "user": "human", "assistant": "gpt"},
    chat_template="chatml",
     map_eos_token = True,
)

def apply_template(examples):
    messages = examples["conversations"]
    print(messages)
    text = [tokenizer.apply_chat_template(message, tokenize=False, add_generation_prompt=False) for message in messages]
    return {"text": text}```
1 Like