relatively simply program
model is chavinlo/alpaca-13b · Hugging Face
import torch
from transformers import LlamaTokenizer, LlamaForCausalLM
model_folder = "alpaca-13b"
model = "./alpaca/"+ model_folder
tokenizer = LlamaTokenizer.from_pretrained(model)
model = LlamaForCausalLM.from_pretrained(
model,
load_in_8bit=True,
torch_dtype=torch.float16,
device_map="auto"
)
def ask(message):
print('message: ' + message)
input_ids = tokenizer(message, return_tensors="pt")#.input_ids.to("cuda")
print('input_ids: ' + str(input_ids))
generated_ids = model.generate(input_ids, max_new_tokens=250, do_sample=True)#, repetition_penalty=1.3, temperature=0.8, top_p=0.75, top_k=40)
print('generated_ids: ' + str(generated_ids))
# response = tokenizer.batch_decode(generated_ids[0][input_ids.shape[-1]:])
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
print('response: ' + str(response))
return response
if __name__ == "__main__":
while True:
message = input("Enter your message: ")
response = ask(message)
I get the error
message: asdf
input_ids: {'input_ids': tensor([[ 2, 408, 2176]]), 'attention_mask': tensor([[1, 1, 1]])}
/home/nick/Programs/miniconda3/envs/discord-bot/lib/python3.11/site-packages/transformers/generation/utils.py:1255: UserWarning: You have modified the pretrained model configuration to control generation. This is a deprecated strategy to control generation and will be removed soon, in a future version. Please use a generation configuration file (see https://huggingface.co/docs/transformers/main_classes/text_generation)
warnings.warn(
Traceback (most recent call last):
File "/home/nick/Programs/miniconda3/envs/discord-bot/lib/python3.11/site-packages/transformers/tokenization_utils_base.py", line 249, in __getattr__
return self.data[item]
~~~~~~~~~^^^^^^
KeyError: 'shape'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/nick/Work/llm/alpaca-discord/alpaca-test.py", line 29, in <module>
response = ask(message)
^^^^^^^^^^^^
File "/home/nick/Work/llm/alpaca-discord/alpaca-test.py", line 19, in ask
generated_ids = model.generate(input_ids, max_new_tokens=250, do_sample=True)#, repetition_penalty=1.3, temperature=0.8, top_p=0.75, top_k=40)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/nick/Programs/miniconda3/envs/discord-bot/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/nick/Programs/miniconda3/envs/discord-bot/lib/python3.11/site-packages/transformers/generation/utils.py", line 1293, in generate
batch_size = inputs_tensor.shape[0]
^^^^^^^^^^^^^^^^^^^
File "/home/nick/Programs/miniconda3/envs/discord-bot/lib/python3.11/site-packages/transformers/tokenization_utils_base.py", line 251, in __getattr__
raise AttributeError
AttributeError