Giving attention mask to ppo_trainer

When I want to give attention mask to the PPO trainer whether in the kwargs or generate section I get this error: trl.trainer.ppo_trainer.PPOTrainer.generate() got multiple values for keyword argument ‘attention_mask’

Here’s my code:

lora_config = LoraConfig(
r=16,
lora_alpha=32,
lora_dropout=0.05,
bias=“none”,
task_type=“CAUSAL_LM”,
)

model = AutoModelForCausalLMWithValueHead.from_pretrained(
model,
peft_config=lora_config,
)

generate model response

generation_kwargs = {
“do_sample”:True,
“top_k”:20,
“max_length”:400,
“top_p”:0.6,
“bos_token_id” : tokenizer.convert_tokens_to_ids(“<|startoftext|>”),
“eos_token_id” : tokenizer.convert_tokens_to_ids(“<|endoftext|>”),
“pad_token_id” : tokenizer.convert_tokens_to_ids(“[PAD]”),
# “attention_mask” : attention_mask
}

initialize trainer

ppo_config = {“mini_batch_size”: 1,
“batch_size”: 1,
“learning_rate”: 1.41e-5,
}
config = PPOConfig(**ppo_config)
ppo_trainer = PPOTrainer(config, model, tokenizer = tokenizer, dataset = dataset)

for batch in tqdm(train_dataloader):
query_tensor = batch[“query”]
attention_mask = batch[“attention_mask”]

response_tensors = ppo_trainer.generate(query_tensor, attention_mask=attention_mask, **generation_kwargs, return_prompt=False)