This may help avoid some of the problems.
eval_model = HFLM(
pretrained="models/Llama-2-7b-hf",
dtype="bfloat16",
device="cuda",
#device_map="auto",
)
results = evaluator.simple_evaluate(
model=eval_model,
tasks=["arc_easy","arc_challenge","hellaswag","lambada_openai","lambada_standard","openbookqa","piqa","winogrande"],
batch_size=64,
max_batch_size=64,
)