I am using this script to get embeddings of the ~13000 essays of persuade 2.0:
def parse_args():
parser = ArgumentParser()
parser.add_argument("--model_name", type=str, default="BAAI/bge-base-en-v1.5")
parser.add_argument("--csv_path", type=str)
parser.add_argument("--text_col", type=str)
parser.add_argument("--max_length", type=int, default=512)
parser.add_argument("--batch_size", type=int, default=128)
parser.add_argument("--output_path", type=str)
return parser.parse_args()
def main():
args = parse_args()
tokenizer = AutoTokenizer.from_pretrained(args.model_name)
model = AutoModel.from_pretrained(args.model_name, add_pooling_layer=False)
model.eval()
targs = TrainingArguments(
".",
report_to="none",
per_device_eval_batch_size=args.batch_size,
fp16=True
)
ds = Dataset.from_pandas(pd.read_csv(args.csv_path))
# strip whitespace from end
ds = ds.map(
lambda x: {args.text_col: x[args.text_col].strip()}, num_proc=4
)
def tokenize(batch):
return tokenizer(
batch[args.text_col],
padding=False,
truncation=True,
max_length=args.max_length,
return_length=True
)
with targs.main_process_first(desc="dataset map pre-processing"):
ds = ds.map(tokenize, batched=True, num_proc=4)
trainer = Trainer(model=model, args=targs, tokenizer=tokenizer)
embeddings = trainer.predict(ds).predictions[0][:, 0]
embeddings = torch.nn.functional.normalize(
torch.tensor(embeddings), p=2, dim=1
).cpu()
torch.save(embeddings, args.output_path)
if __name__ == "__main__":
main()
Source: @nbroad in his amazing notebook
I used 2x A5000 with 64 batch size but when I predict, the gpu memory consumption keeps increasing and after ~50% prediction I get OOM error. This is same even for small batch sizes. The reason suggested by @sgugger was that we are accumulating so many predictions so I used eval_accumulation_steps=10
to transfer these predictions to cpu but then It crashes after occupying 60 gb ram. Same with increasing/decreasing eval_accumulation_steps
.
Question:
My question is where this 60 gb data coming from? My prediction tensor of size (13000, 768) will be maximum 1 gb then why ram and gpu are being overloaded?