set torch_dtype=torch.float16 likely faster, reference pipelines doc, pipe = pipeline(“text-generation”, model=model_id, torch_dtype=torch.float16, batch_size=2, device=0)
1 Like
set torch_dtype=torch.float16 likely faster, reference pipelines doc, pipe = pipeline(“text-generation”, model=model_id, torch_dtype=torch.float16, batch_size=2, device=0)