JSON parse error when load_dataset

Source code:

from datasets import Dataset, load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, DataCollatorForSeq2Seq, Trainer
from peft import PromptTuningConfig, get_peft_model, TaskType, PromptTuningInit
dataset = load_dataset("json", data_files="/home/david/LLM4Decompile/train/AnghaBench_demo_compile.jsonl")

The error:

Failed to load JSON from file '/home/david/LLM4Decompile/train/AnghaBench_demo_compile.jsonl' with error <class 'pyarrow.lib.ArrowInvalid'>: JSON parse error: Column(/output/opt-state-O1) changed from string to object in row 42

My dataset format is like this:

{"name": "..." , "input": "...", "input_ori": "...", "output": {"opt-state-O0": "...", "opt-state-O1": "...", "opt-state-O2": "...", "opt-state-O3": "..."}}

the 42 row is here:

{"name": "/home/david/LLM4Decompile/train/AnghaBench_demo/8cc/extr_parse.c_read_declarator_params_oldstyle.c", "input": "static void read_declarator_params_oldstyle(Vector *vars) {\n    for (;;) {\n        Token *tok = get();\n        if (tok->kind != TIDENT)\n            errort(tok, \"identifier expected, but got %s\", tok2s(tok));\n        vec_push(vars, ast_lvar(type_int, tok->sval));\n        if (next_token(')'))\n            return;\n        if (!next_token(','))\n            errort(tok, \"comma expected, but got %s\", tok2s(get()));\n    }\n}", "input_ori": "#define NULL ((void*)0)\ntypedef unsigned long size_t;  // Customize by platform.\ntypedef long intptr_t; typedef unsigned long uintptr_t;\ntypedef long scalar_t__;  // Either arithmetic or pointer type.\n/* By default, we understand bool (as a convenience). */\ntypedef int bool;\n#define false 0\n#define true 1\n\n/* Forward declarations */\ntypedef  struct TYPE_5__   TYPE_1__ ;\n\n/* Type definitions */\ntypedef  int /*<<< orphan*/  Vector ;\nstruct TYPE_5__ {scalar_t__ kind; int /*<<< orphan*/  sval; } ;\ntypedef  TYPE_1__ Token ;\n\n/* Variables and functions */\n scalar_t__ TIDENT ; \n int /*<<< orphan*/  ast_lvar (int /*<<< orphan*/ ,int /*<<< orphan*/ ) ; \n int /*<<< orphan*/  errort (TYPE_1__*,char*,int /*<<< orphan*/ ) ; \n TYPE_1__* get () ; \n scalar_t__ next_token (char) ; \n int /*<<< orphan*/  tok2s (TYPE_1__*) ; \n int /*<<< orphan*/  type_int ; \n int /*<<< orphan*/  vec_push (int /*<<< orphan*/ *,int /*<<< orphan*/ ) ; \n\n__attribute__((used)) static void read_declarator_params_oldstyle(Vector *vars) {\n    for (;;) {\n        Token *tok = get();\n        if (tok->kind != TIDENT)\n            errort(tok, \"identifier expected, but got %s\", tok2s(tok));\n        vec_push(vars, ast_lvar(type_int, tok->sval));\n        if (next_token(')'))\n            return;\n        if (!next_token(','))\n            errort(tok, \"comma expected, but got %s\", tok2s(get()));\n    }\n}", "output": {"opt-state-O0": "<read_declarator_params_oldstyle>:\nendbr64\npush   %rbp\nmov    %rsp,%rbp\nsub    $0x20,%rsp\nmov    %rdi,-0x18(%rbp)\nmov    $0x0,%eax\ncall   1a <read_declarator_params_oldstyle+0x1a>\nmov    %rax,-0x8(%rbp)\nmov    -0x8(%rbp),%rax\nmov    (%rax),%rdx\nmov    0x0(%rip),%rax\ncmp    %rax,%rdx\nje     55 <read_declarator_params_oldstyle+0x55>\nmov    -0x8(%rbp),%rax\nmov    %rax,%rdi\ncall   3d <read_declarator_params_oldstyle+0x3d>\nmov    %eax,%edx\nmov    -0x8(%rbp),%rax\nlea    0x0(%rip),%rcx\nmov    %rcx,%rsi\nmov    %rax,%rdi\ncall   55 <read_declarator_params_oldstyle+0x55>\nmov    -0x8(%rbp),%rax\nmov    0x8(%rax),%edx\nmov    0x0(%rip),%eax\nmov    %edx,%esi\nmov    %eax,%edi\ncall   6b <read_declarator_params_oldstyle+0x6b>\nmov    %eax,%edx\nmov    -0x18(%rbp),%rax\nmov    %edx,%esi\nmov    %rax,%rdi\ncall   7b <read_declarator_params_oldstyle+0x7b>\nmov    $0x29,%edi\ncall   85 <read_declarator_params_oldstyle+0x85>\ntest   %rax,%rax\njne    cc <read_declarator_params_oldstyle+0xcc>\nmov    $0x2c,%edi\ncall   94 <read_declarator_params_oldstyle+0x94>\ntest   %rax,%rax\njne    10 <read_declarator_params_oldstyle+0x10>\nmov    $0x0,%eax\ncall   a7 <read_declarator_params_oldstyle+0xa7>\nmov    %rax,%rdi\ncall   af <read_declarator_params_oldstyle+0xaf>\nmov    %eax,%edx\nmov    -0x8(%rbp),%rax\nlea    0x0(%rip),%rcx\nmov    %rcx,%rsi\nmov    %rax,%rdi\ncall   c7 <read_declarator_params_oldstyle+0xc7>\njmp    10 <read_declarator_params_oldstyle+0x10>\nnop\nleave\nret\n", "opt-state-O1": "<read_declarator_params_oldstyle>:\nendbr64\npush   %r13\npush   %r12\npush   %rbp\npush   %rbx\nsub    $0x8,%rsp\nmov    %rdi,%rbp\nlea    0x0(%rip),%r12\nlea    0x0(%rip),%r13\njmp    6c <read_declarator_params_oldstyle+0x6c>\nmov    %rbx,%rdi\ncall   29 <read_declarator_params_oldstyle+0x29>\nmov    %eax,%edx\nmov    %r12,%rsi\nmov    %rbx,%rdi\ncall   36 <read_declarator_params_oldstyle+0x36>\nmov    0x8(%rbx),%esi\nmov    0x0(%rip),%edi\ncall   44 <read_declarator_params_oldstyle+0x44>\nmov    %eax,%esi\nmov    %rbp,%rdi\ncall   4e <read_declarator_params_oldstyle+0x4e>\nmov    $0x29,%edi\ncall   58 <read_declarator_params_oldstyle+0x58>\ntest   %rax,%rax\njne    a3 <read_declarator_params_oldstyle+0xa3>\nmov    $0x2c,%edi\ncall   67 <read_declarator_params_oldstyle+0x67>\ntest   %rax,%rax\nje     87 <read_declarator_params_oldstyle+0x87>\nmov    $0x0,%eax\ncall   76 <read_declarator_params_oldstyle+0x76>\nmov    %rax,%rbx\nmov    0x0(%rip),%rax\ncmp    %rax,(%rbx)\nje     36 <read_declarator_params_oldstyle+0x36>\njmp    21 <read_declarator_params_oldstyle+0x21>\ncall   8c <read_declarator_params_oldstyle+0x8c>\nmov    %rax,%rdi\ncall   94 <read_declarator_params_oldstyle+0x94>\nmov    %eax,%edx\nmov    %r13,%rsi\nmov    %rbx,%rdi\ncall   a1 <read_declarator_params_oldstyle+0xa1>\njmp    6c <read_declarator_params_oldstyle+0x6c>\nadd    $0x8,%rsp\npop    %rbx\npop    %rbp\npop    %r12\npop    %r13\nret\n", "opt-state-O2": "<read_declarator_params_oldstyle>:\nendbr64\npush   %r13\nlea    0x0(%rip),%r13\npush   %r12\nlea    0x0(%rip),%r12\npush   %rbp\npush   %rbx\nmov    %rdi,%rbx\nsub    $0x8,%rsp\njmp    37 <read_declarator_params_oldstyle+0x37>\nnopl   0x0(%rax)\nmov    $0x2c,%edi\ncall   32 <read_declarator_params_oldstyle+0x32>\ntest   %rax,%rax\nje     98 <read_declarator_params_oldstyle+0x98>\nxor    %eax,%eax\ncall   3e <read_declarator_params_oldstyle+0x3e>\nmov    %rax,%rbp\nmov    0x0(%rip),%rax\ncmp    %rax,0x0(%rbp)\nje     63 <read_declarator_params_oldstyle+0x63>\nmov    %rbp,%rdi\ncall   56 <read_declarator_params_oldstyle+0x56>\nmov    %r12,%rsi\nmov    %rbp,%rdi\nmov    %eax,%edx\ncall   63 <read_declarator_params_oldstyle+0x63>\nmov    0x8(%rbp),%esi\nmov    0x0(%rip),%edi\ncall   71 <read_declarator_params_oldstyle+0x71>\nmov    %rbx,%rdi\nmov    %eax,%esi\ncall   7b <read_declarator_params_oldstyle+0x7b>\nmov    $0x29,%edi\ncall   85 <read_declarator_params_oldstyle+0x85>\ntest   %rax,%rax\nje     28 <read_declarator_params_oldstyle+0x28>\nadd    $0x8,%rsp\npop    %rbx\npop    %rbp\npop    %r12\npop    %r13\nret\nnopl   (%rax)\ncall   9d <read_declarator_params_oldstyle+0x9d>\nmov    %rax,%rdi\ncall   a5 <read_declarator_params_oldstyle+0xa5>\nmov    %r13,%rsi\nmov    %rbp,%rdi\nmov    %eax,%edx\ncall   b2 <read_declarator_params_oldstyle+0xb2>\njmp    37 <read_declarator_params_oldstyle+0x37>\n", "opt-state-O3": "<read_declarator_params_oldstyle>:\nendbr64\npush   %r13\nlea    0x0(%rip),%r13\npush   %r12\nlea    0x0(%rip),%r12\npush   %rbp\npush   %rbx\nmov    %rdi,%rbx\nsub    $0x8,%rsp\njmp    37 <read_declarator_params_oldstyle+0x37>\nnopl   0x0(%rax)\nmov    $0x2c,%edi\ncall   32 <read_declarator_params_oldstyle+0x32>\ntest   %rax,%rax\nje     98 <read_declarator_params_oldstyle+0x98>\nxor    %eax,%eax\ncall   3e <read_declarator_params_oldstyle+0x3e>\nmov    %rax,%rbp\nmov    0x0(%rip),%rax\ncmp    %rax,0x0(%rbp)\nje     63 <read_declarator_params_oldstyle+0x63>\nmov    %rbp,%rdi\ncall   56 <read_declarator_params_oldstyle+0x56>\nmov    %r12,%rsi\nmov    %rbp,%rdi\nmov    %eax,%edx\ncall   63 <read_declarator_params_oldstyle+0x63>\nmov    0x8(%rbp),%esi\nmov    0x0(%rip),%edi\ncall   71 <read_declarator_params_oldstyle+0x71>\nmov    %rbx,%rdi\nmov    %eax,%esi\ncall   7b <read_declarator_params_oldstyle+0x7b>\nmov    $0x29,%edi\ncall   85 <read_declarator_params_oldstyle+0x85>\ntest   %rax,%rax\nje     28 <read_declarator_params_oldstyle+0x28>\nadd    $0x8,%rsp\npop    %rbx\npop    %rbp\npop    %r12\npop    %r13\nret\nnopl   (%rax)\ncall   9d <read_declarator_params_oldstyle+0x9d>\nmov    %rax,%rdi\ncall   a5 <read_declarator_params_oldstyle+0xa5>\nmov    %r13,%rsi\nmov    %rbp,%rdi\nmov    %eax,%edx\ncall   b2 <read_declarator_params_oldstyle+0xb2>\njmp    37 <read_declarator_params_oldstyle+0x37>\n"}}

It can pass the formatting validation on 在线JSON校验格式化工具(Be JSON)
I wonder how to address this error. Thank you!