Error: AttributeError: 'NoneType' object has no attribute 'shape' when prompt tuning with Chatglm2

I modified simple code of peft to train chatglm2-6b witn prompt-tuning.
Below is my code:
from transformers import AutoModelForCausalLM

from peft import get_peft_config, get_peft_model, PromptTuningInit, PromptTuningConfig, TaskType, PeftType

import torch

from datasets import load_dataset

import os

from transformers import AutoTokenizer

from torch.utils.data import DataLoader

from transformers import default_data_collator, get_linear_schedule_with_warmup

from tqdm import tqdm

from datasets import load_dataset

device = “cuda”

#model_name_or_path = “bigscience/bloomz-560m”
#tokenizer_name_or_path = “bigscience/bloomz-560m”
model_name_or_path = “./chatglm2-6b”
tokenizer_name_or_path = “./chatglm2-6b”

peft_config = PromptTuningConfig(
task_type=TaskType.CAUSAL_LM,
prompt_tuning_init=PromptTuningInit.RANDOM,
num_virtual_tokens=8,
)

dataset_name = “twitter_complaints”

text_column = “Tweet text”
label_column = “text_label”
max_length = 64
lr = 3e-2
num_epochs = 10
batch_size = 8

from datasets import load_dataset

dataset = load_dataset(“ought/raft”, dataset_name)

classes = [k.replace(“_”, " ") for k in dataset[“train”].features[“Label”].names]
print(classes)

dataset = dataset.map(
lambda x: {“text_label”: [classes[label] for label in x[“Label”]]},
batched=True,
num_proc=1,
)
print(dataset)

dataset[“train”][0]

data preprocessing

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True)
if tokenizer.pad_token_id is None:
tokenizer.pad_token_id = tokenizer.eos_token_id

target_max_length = max([len(tokenizer(class_label)[“input_ids”]) for class_label in classes])
print(“target_max_length:”, target_max_length)

预处理

def preprocess_function(examples):
batch_size = len(examples[text_column])
print(“batch_size:”, batch_size)

inputs = [f"{text_column} : {x} Label : " for x in examples[text_column]]
targets = [str(x) for x in examples[label_column]]

model_inputs = tokenizer(inputs, padding='max_length', truncation=True, max_length=max_length)
labels = tokenizer(targets, padding='max_length', truncation=True, max_length=max_length)

for i in range(batch_size):
    sample_input_ids = model_inputs["input_ids"][i]
    label_input_ids = labels["input_ids"][i] + [tokenizer.pad_token_id]
    if i == 0:
        print(i, sample_input_ids, label_input_ids)
    model_inputs["input_ids"][i] = sample_input_ids + label_input_ids
    labels["input_ids"][i] = [-100] * len(sample_input_ids) + label_input_ids
    model_inputs["attention_mask"][i] = [1] * len(model_inputs["input_ids"][i])
#print(model_inputs)

for i in range(batch_size):
    sample_input_ids = model_inputs["input_ids"][i]
    label_input_ids = labels["input_ids"][i]
    
    model_inputs["input_ids"][i] = [tokenizer.pad_token_id] * (max_length - len(sample_input_ids)) + sample_input_ids
    model_inputs["attention_mask"][i] = [0] * (max_length - len(sample_input_ids)) + model_inputs["attention_mask"][i]
    labels["input_ids"][i] = [-100] * (max_length - len(sample_input_ids)) + label_input_ids
    
    model_inputs["input_ids"][i] = torch.tensor(model_inputs["input_ids"][i][:max_length])
    model_inputs["attention_mask"][i] = torch.tensor(model_inputs["attention_mask"][i][:max_length])
    labels["input_ids"][i] = torch.tensor(labels["input_ids"][i][:max_length])
    if i == 0:
        print("model_inputs input_ids:", model_inputs["input_ids"][i])
        print("model_inputs attention_mask:", model_inputs["attention_mask"][i])
        print("labels input_ids:", labels["input_ids"][i])


    
model_inputs["labels"] = labels["input_ids"]
return model_inputs

print(“column_names:”, dataset[“train”].column_names)

将原始的训练和测试数据同时预处理,然后作为训练和评估数据集

processed_datasets = dataset.map(
preprocess_function,
batched=True,
num_proc=1,
remove_columns=dataset[“train”].column_names,
load_from_cache_file=False,
desc=“Running tokenizer on dataset”,
)

train_dataset = processed_datasets[“train”]
eval_dataset = processed_datasets[“train”]

训练与评估使用同一份数据,但是训练数据打乱

train_dataloader = DataLoader(train_dataset, shuffle=True, collate_fn=default_data_collator, batch_size=batch_size, pin_memory=True)
eval_dataloader = DataLoader(eval_dataset, collate_fn=default_data_collator, batch_size=batch_size, pin_memory=True)
print(len(train_dataloader))
print(len(eval_dataloader))

def test_preprocess_function(examples):
batch_size = len(examples[text_column])
inputs = [f"{text_column} : {x} Label : " for x in examples[text_column]]
model_inputs = tokenizer(inputs, padding=‘max_length’, truncation=True, max_length=max_length)
# print(model_inputs)
for i in range(batch_size):
sample_input_ids = model_inputs[“input_ids”][i]

    model_inputs["input_ids"][i] = [tokenizer.pad_token_id] * ( max_length - len(sample_input_ids)) + sample_input_ids
    model_inputs["attention_mask"][i] = [0] * (max_length - len(sample_input_ids)) + model_inputs["attention_mask"][i]
    
    model_inputs["input_ids"][i] = torch.tensor(model_inputs["input_ids"][i][:max_length])
    model_inputs["attention_mask"][i] = torch.tensor(model_inputs["attention_mask"][i][:max_length])
return model_inputs

将原始的测试数据用于测试

test_dataset = dataset[“test”].map(
test_preprocess_function,
batched=True,
num_proc=1,
remove_columns=dataset[“train”].column_names,
load_from_cache_file=False,
desc=“Running tokenizer on dataset”,
)

test_dataloader = DataLoader(test_dataset, collate_fn=default_data_collator, batch_size=batch_size, pin_memory=True)
next(iter(test_dataloader))

creating model

model = AutoModelForCausalLM.from_pretrained(model_name_or_path, trust_remote_code=True)

model = get_peft_model(model, peft_config)

model.print_trainable_parameters()

model

optimizer and lr scheduler

optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
lr_scheduler = get_linear_schedule_with_warmup(
optimizer=optimizer,
num_warmup_steps=0,
num_training_steps=(len(train_dataloader) * num_epochs),
)

training and evaluation

model = model.to(device)

for epoch in range(num_epochs):
model.train()
total_loss = 0
for step, batch in enumerate(tqdm(train_dataloader)):
batch = {k: v.to(device) for k, v in batch.items()}
print(batch)
print(batch[“input_ids”].shape)
outputs = model(**batch)
loss = outputs.loss
total_loss += loss.detach().float()
loss.backward()
optimizer.step()
lr_scheduler.step()
optimizer.zero_grad()

model.eval()
eval_loss = 0
eval_preds = []
for step, batch in enumerate(tqdm(eval_dataloader)):
    batch = {k: v.to(device) for k, v in batch.items()}
    with torch.no_grad():
        outputs = model(**batch)
    loss = outputs.loss
    eval_loss += loss.detach().float()
    eval_preds.extend(
        tokenizer.batch_decode(torch.argmax(outputs.logits, -1).detach().cpu().numpy(), skip_special_tokens=True)
    )

eval_epoch_loss = eval_loss / len(eval_dataloader)
eval_ppl = torch.exp(eval_epoch_loss)
train_epoch_loss = total_loss / len(train_dataloader)
train_ppl = torch.exp(train_epoch_loss)
print(f"{epoch=}: {train_ppl=} {train_epoch_loss=} {eval_ppl=} {eval_epoch_loss=}")

But I got below error message:
0%| | 0/7 [00:00<?, ?it/s]/anaconda/envs/llm/lib/python3.8/site-packages/peft/peft_model.py:936: UserWarning: Position ids are not supported for parameter efficient tuning. Ignoring position ids. warnings.warn("Position ids are not supported for parameter efficient tuning. Ignoring position ids.") 0%| | 0/7 [00:00<?, ?it/s] {'input_ids': tensor([[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64790, 64792, 21184, 2254, 1363, 19141, 21226, 30992, 14771, 289, 2205, 356, 2540, 307, 683, 1126, 13085, 263, 1929, 12416, 1126, 29114, 2886, 1214, 30912, 30930, 2074, 30967, 30972, 30938, 31015, 30936, 30970, 30966, 30916, 704, 30915, 21945, 1363, 30910], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64790, 64792, 21184, 2254, 1363, 1989, 30926, 1102, 309, 5258, 766, 4402, 267, 7401, 332, 496, 21945, 1363, 30910], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64790, 64792, 21184, 2254, 1363, 1989, 30936, 277, 25953, 30947, 3034, 363, 1266, 30953, 30917, 475, 4367, 289, 2396, 2466, 10540, 30987, 17110, 29419, 810, 343, 878, 30930, 21945, 1363, 30910], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64790, 64792, 21184, 2254, 1363, 1989, 5907, 1076, 8320, 16689, 316, 3873, 1709, 878, 9687, 293, 4089, 16036, 498, 9829, 2520, 1431, 30910, 30943, 30940, 30940, 1233, 30930, 3122, 323, 434, 30930, 21945, 1363, 30910], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64790, 64792, 21184, 2254, 1363, 1989, 30960, 275, 3660, 26822, 650, 383, 344, 16176, 623, 10072, 8305, 332, 3637, 1319, 21945, 1363, 30910], [ 0, 0, 0, 0, 0, 0, 0, 64790, 64792, 21184, 2254, 1363, 839, 307, 457, 30953, 30912, 634, 552, 30910, 30966, 4441, 5493, 290, 1989, 1122, 1567, 1556, 30921, 263, 1428, 1122, 1567, 289, 636, 1661, 307, 30953, 30924, 883, 278, 4382, 576, 30930, 666, 323, 260, 30353, 291, 552, 13981, 30930, 2260, 1397, 1989, 30949, 594, 1989, 30949, 594, 8332, 21945, 1363, 30910], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64790, 64792, 21184, 2254, 1363, 1989, 30956, 30944, 8654, 15861, 376, 1477, 434, 323, 552, 726, 1672, 21945, 1363, 30910], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64790, 64792, 21184, 2254, 1363, 1989, 1480, 30914, 30962, 11563, 1989, 9267, 2112, 475, 9255, 323, 9829, 13139, 323, 2834, 293, 940, 803, 2991, 23430, 30930, 3286, 354, 1811, 354, 30953, 30917, 1994, 3624, 21945, 1363, 30910]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], device='cuda:0'), 'position_ids': tensor([[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22], [ 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19], [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36]], device='cuda:0'), 'labels': tensor([[-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100], [-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100], [-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100], [-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100], [-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100], [-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100], [-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100], [-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100]], device='cuda:0')} torch.Size([8, 64]) `---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
Cell In[11], line 11
9 print(batch)
10 print(batch[“input_ids”].shape)
—> 11 outputs = model(**batch)
12 loss = outputs.loss
13 total_loss += loss.detach().float()

File /anaconda/envs/llm/lib/python3.8/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
1496 # If we don’t have any hooks, we want to skip the rest of the logic in
1497 # this function, and just call forward.
1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = ,

File /anaconda/envs/llm/lib/python3.8/site-packages/peft/peft_model.py:966, in PeftModelForCausalLM.forward(self, input_ids, attention_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict, **kwargs)
964 prompts = prompts.to(inputs_embeds.dtype)
965 inputs_embeds = torch.cat((prompts, inputs_embeds), dim=1)
→ 966 return self.base_model(inputs_embeds=inputs_embeds, **kwargs)

File /anaconda/envs/llm/lib/python3.8/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
1496 # If we don’t have any hooks, we want to skip the rest of the logic in
1497 # this function, and just call forward.
1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = ,

File ~/.cache/huggingface/modules/transformers_modules/chatglm2-6b/modeling_chatglm.py:934, in ChatGLMForConditionalGeneration.forward(self, input_ids, position_ids, attention_mask, past_key_values, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict, return_last_logit)
931 use_cache = use_cache if use_cache is not None else self.config.use_cache
932 return_dict = return_dict if return_dict is not None else self.config.use_return_dict
→ 934 transformer_outputs = self.transformer(
935 input_ids=input_ids,
936 position_ids=position_ids,
937 attention_mask=attention_mask,
938 past_key_values=past_key_values,
939 inputs_embeds=inputs_embeds,
940 use_cache=use_cache,
941 output_hidden_states=output_hidden_states,
942 return_dict=return_dict,
943 )
945 hidden_states = transformer_outputs[0]
946 if return_last_logit:

File /anaconda/envs/llm/lib/python3.8/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
1496 # If we don’t have any hooks, we want to skip the rest of the logic in
1497 # this function, and just call forward.
1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
→ 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = ,

File ~/.cache/huggingface/modules/transformers_modules/chatglm2-6b/modeling_chatglm.py:804, in ChatGLMModel.forward(self, input_ids, position_ids, attention_mask, full_attention_mask, past_key_values, inputs_embeds, use_cache, output_hidden_states, return_dict)
801 use_cache = use_cache if use_cache is not None else self.config.use_cache
802 return_dict = return_dict if return_dict is not None else self.config.use_return_dict
→ 804 batch_size, seq_length = input_ids.shape
806 if inputs_embeds is None:
807 inputs_embeds = self.embedding(input_ids)

AttributeError: ‘NoneType’ object has no attribute ‘shape’`

Have you solved this problem?(你解决这个问题了嘛?)

If I am not mistaken, this line would effectively drop all of the columns during the processing step. Which would cause a None Type to be passed in as inputs. Which is why they get the error about their inputsIds not having “shape” because they are NoneType