Hello All,
I’m trying to deploy llama2 7B model and unable to successfully create endpoint.I have shared below the .py file that trains model on custom dataset and .py file that creates the inference dataset:
“”“save_model_Llama2.ipynb”“”
Automatically generated by Colaboratory.
Original file is located at
Google Colab
“”"
!pip install -q huggingface_hub
!pip install -q -U trl transformers accelerate peft
!pip install -q -U datasets bitsandbytes einops wandb
!pip install git+https://github.com/huggingface/peft.git
from huggingface_hub import notebook_login
notebook_login()
from datasets import load_dataset
import torch
from transformers import AutoModelForCausalLM, BitsAndBytesConfig, AutoTokenizer, TrainingArguments
from peft import LoraConfig
from transformers.generation.utils import top_k_top_p_filtering
from trl import SFTTrainer
dataset_name = “Harit10/extra_PII_650”
dataset = load_dataset(dataset_name, split=“train”)
base_model_name = “meta-llama/Llama-2-7b-hf”
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type=“nf4”,
bnb_4bit_compute_dtype=torch.float16,
)
device_map = {“”: 0}
device_map
base_model = AutoModelForCausalLM.from_pretrained(
base_model_name,
#quantization_config=bnb_config,
device_map=device_map,
trust_remote_code=True,
use_auth_token=True
)
base_model.config.use_cache = False
from google.colab import drive
drive.mount(‘/content/drive’)
More info: [`Llama2`] replace `self.pretraining_tp` with `self.config.pretraining_tp` by younesbelkada · Pull Request #24906 · huggingface/transformers · GitHub
base_model.config.pretraining_tp = 1
‘’‘peft_config = LoraConfig(
lora_alpha=16
lora_dropout=0.1,
r=64,
bias=“none”,
task_type=“CAUSAL_LM”,
)’‘’
tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
output_dir = “./results”
training_args = TrainingArguments(
output_dir=output_dir,
per_device_train_batch_size=4,
gradient_accumulation_steps=4,
learning_rate=2e-4,
logging_steps=10,
max_steps=100,
push_to_hub=True,
hub_model_id=“Llama2-PII_final”
)
max_seq_length = 512
trainer = SFTTrainer(
model=base_model,
train_dataset=dataset,
#peft_config=peft_config,
dataset_text_field=“text”,
max_seq_length=max_seq_length,
tokenizer=tokenizer,
args=training_args,
)
trainer.train()
Prepare for merging adapter and base model
import os
from peft import PeftModel
final_checkpoint_path = os.path.join(output_dir, “final_checkpoint”)
print(final_checkpoint_path)
Merge adapter with the base model
merged_model = PeftModel.from_pretrained(base_model, final_checkpoint_path).merge_and_unload()
Save the merged model and tokenizer, ensuring config.json is included
merged_model.save_pretrained(final_checkpoint_path)
tokenizer.save_pretrained(final_checkpoint_path)
import os
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
output_dir = os.path.join(output_dir, “final_checkpoint”)
Ensure the directory exists before proceeding
if not os.path.exists(output_dir):
print(f"The directory {output_dir} does not exist. Please check your training checkpoints.")
else:
# Initialize the base model and tokenizer again for clarity and completeness
base_model = AutoModelForCausalLM.from_pretrained(
base_model_name,
return_dict=True,
torch_dtype=torch.float16,
)
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
# Load the PEFT model from the final checkpoint
try:
ft_model = PeftModel.from_pretrained(base_model, output_dir)
# Merge the adapter weights back to the base model
model_merged = ft_model.merge_and_unload()
# Save the merged model and tokenizer to the final checkpoint directory
model_merged.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)
print("Model and tokenizer saved successfully.")
except Exception as e:
print(f"An error occurred: {e}")
import os
output_dir = os.path.join(output_dir, “final_checkpoint”)
trainer.model.merge_and_unload()
trainer.model.save_pretrained(output_dir)
output_dir
trainer.push_to_hub(“End of training”)
Check if CUDA (GPU support) is available and set the device accordingly
device = torch.device(“cuda” if torch.cuda.is_available() else “cpu”)
device
from peft import AutoPeftModelForCausalLM
model = AutoPeftModelForCausalLM.from_pretrained(‘results/final_checkpoint’, device_map=device_map, torch_dtype=torch.bfloat16)
#text = “Regarding Amelia Thompson, student ID 654789, SIN 321654987, contactable at 555-678-1234, for her declining grades in advanced calculus. A letter of academic probation is warranted. Task it to Anonymize the personal identifiers.”
#text = “Attention needed for Mason Miller, with student ID 369741, SIN 147852369, and phone number 741-555-8522, due to his poor performance on group assignments in business studies. Issue an academic probation letter. Task is to anonymize the personal identifiers.”
#text = “Evelyn Rodriguez, with student ID 123654, SIN 789123654, and phone 654-555-3211, has been displaying a lack of initiative in volunteer programs. An academic probation letter is needed. Task is to anonymize the personal identifiers.”
#text = “For Jackson Thomas, bearing student ID 987654, SIN 321654987, and can be contacted at 123-555-7890, who has shown insufficient progress in his internship requirements. Please draft an academic probation letter. Task is to anonymize the personal identifiers.”
#text = “Student named Grace Harris, with student ID 264813, SIN 579213648, and phone 852-555-9634, has exhibited poor teamwork skills in project-based learning courses. A letter of academic probation is required. Task is to anonymize the personal identifiers.”
#text = “Anonymize the personal identifiers in the following text: Attention needed for Mason Miller, with student ID 369741, SIN 147852369, and phone number 741-555-8522, due to his poor performance on group assignments in business studies. Issue an academic probation letter.”
#text = “Anonymize the personal identifiers for this: Student named Grace Harris, with student ID 264813, SIN 579213648, and phone 852-555-9634, has exhibited poor teamwork skills in project-based learning courses. A letter of academic probation is required.”
#text = “Anonymize the Personal Identifiable Information in the following text: Evelyn Rodriguez, with student ID 123654, SIN 789123654, and phone 654-555-3211, has been displaying a lack of initiative in volunteer programs.”
#text = “Anonymize the Personal Identifiable Information in the following text: Attention needed for Mason Miller, with student ID 369741, SIN 147852369, and phone number 741-555-8522, due to his poor performance on group assignments in business studies. Issue an academic probation letter.”
#text = “Anonymize the personal identifiers for this - Student named Grace Harris, with student ID 264813, SIN 579213648, and phone 852-555-9634, has exhibited poor teamwork skills in project-based learning courses. A letter of academic probation is required.”
#text = “Anonymize the personal identifiable information in the following text: Attention needed for Mason Miller, with student ID 369741, SIN 147852369, and phone number 741-555-8522, due to his poor performance on group assignments in business studies. Issue an academic probation letter.”
#text = “Anonymize the personal identifiers for in the following text: "
#text = “Anonymize the personal identifiable information in the following text. Student named Grace Harris, with student ID 264813, SIN 579213648, and phone 852-555-9634, has exhibited poor teamwork skills in project-based learning courses. A letter of academic probation is required.”
#text = “Anonymize the personal identifiable information in the following text by replacing the personal identifiers with <> brackets. For Jackson Thomas, bearing student ID 987654, SIN 321654987, and can be contacted at 123-555-7890, who has shown insufficient progress in his internship requirements. Please draft an academic probation letter.”
#text = “Anonymize the personal identifiable information in the following text: For Nathan Zhao, identified by student ID 345678, SIN 987654321, and reachable at 345-555-6789, who has repeatedly missed critical deadlines for project submissions in the software development course. A letter of academic probation is necessary.”
#text = " Anonymize the personal identifiable information in the following text - ‘Concerning Emma Patel, with student ID 123890, SIN 567890123, and phone number 567-555-1234, for her lack of contributions to team projects in the business analytics class. Draft a letter of academic probation.’ and i want the output in this form ‘:Emily Johnson, <STUDENT_ID>:876543, :123456789, :436-555-2345’”
#text = “Anonymize the personal identifiable information in the following text. Addressing the performance of Lucas Smith, student ID 789012, SIN 234567890, and contactable at 678-555-7890, due to his inadequate effort in the creative writing workshops. An academic probation letter needs to be prepared.”
#text = “Anonymize the personal identifiable information in the following text: Regarding Mia Wang, with student ID 234567, SIN 456789012, and reachable at 890-555-2345, for her continuous absence from mandatory lab sessions in the biology course. A letter of academic probation must be drafted.”
#text = “Anonymize the personal identifiable information in the following text and i want the output in this format ‘:Name’ ‘Student named Grace Harris, with student ID 264813, SIN 579213648, and phone 852-555-9634, has exhibited poor teamwork skills in project-based learning courses. A letter of academic probation is required.’”
#text = “Anonymize the personal identifiable information in the following text and i want the output in this format ‘:Name’ - ‘For Nathan Zhao, identified by student ID 345678, SIN 987654321, and reachable at 345-555-6789, who has repeatedly missed critical deadlines for project submissions in the software development course. A letter of academic probation is necessary.’”
#text = “Anonymize the personal identifiable information in the following text and i want the output in this format ‘:Name’ - ‘Addressing the performance of Lucas Smith, student ID 789012, SIN 234567890, and contactable at 678-555-7890, due to his inadequate effort in the creative writing workshops. An academic probation letter needs to be prepared.’”
#text = “Anonymize the personal identifiable information in the following text and i want the output in this format ‘:Name’ - ‘Regarding Mia Wang, with student ID 234567, SIN 456789012, and reachable at 890-555-2345, for her continuous absence from mandatory lab sessions in the biology course. A letter of academic probation must be drafted.’”
#text = “Anonymize the personal information in the following text. Input: ‘Charlotte Davis, student ID 213456, SIN 546789123, with contact at 212-555-1234, has shown a decline in her mathematics course grades. A letter of academic probation should be issued.’ Format: ‘:Name, <STUDENT_ID>:ID, :Number, :Phone Number’.”
#text = “Anonymize the personal information in the following text and output in the specified format. Format: ‘:Name, <STUDENT_ID>:ID, :Number, :Phone Number’. Input: ‘Student named Grace Harris, with student ID 264813, SIN 579213648, and phone 852-555-9634, has exhibited poor teamwork skills in project-based learning courses. A letter of academic probation is required.’”
#text = “Anonymize the personal information in the following text and output in the specified format. Format: ‘:Name, <STUDENT_ID>:ID, :Number, :Phone Number’. Input: ‘Regarding Amelia Thompson, student ID 654789, SIN 321654987, contactable at 555-678-1234, for her declining grades in advanced calculus. A letter of academic probation is warranted.’”
#text = " Task: Anonymize the personal information in the following text and output in the specified format. Input: ‘Regarding Amelia Thompson, student ID 654789, SIN 321654987, contactable at 555-678-1234, for her declining grades in advanced calculus. A letter of academic probation is warranted.’ Format: ‘:Name, <STUDENT_ID>:ID, :Number, :Phone Number’"
#text = " Task: Anonymize the personal information in the following text and output in the specified format. Input: ‘Student named Grace Harris, with student ID 264813, SIN 579213648, and phone 852-555-9634, has exhibited poor teamwork skills in project-based learning courses. A letter of academic probation is required.’ Format: ‘:Name, <STUDENT_ID>:ID, :Number, :Phone Number’"
#text = " Task: Anonymize the personal information in the following text and output in the specified format. Input: ‘For Nathan Zhao, identified by student ID 345678, SIN 987654321, and reachable at 345-555-6789, who has repeatedly missed critical deadlines for project submissions in the software development course. A letter of academic probation is necessary.’"
#text = " Task: Anonymize the personal information in the following text and output in the specified format. Input: 'For Jackson Thomas, bearing student ID 987654, SIN 321654987, and can be contacted at 123-555-7890, who has shown insufficient progress in his internship requirements. Please draft an academic probation letter."
#text = " Task: Anonymize the personal information in the following text and output in the specified format. Input: ‘Addressing the performance of Lucas Smith, student ID 789012, SIN 234567890, and contactable at 678-555-7890, due to his inadequate effort in the creative writing workshops. An academic probation letter needs to be prepared.’"
#text = " Task: Anonymize the personal information in the following text and output in the specified format. Input: ‘Charlotte Davis, student ID 213456, SIN 546789123, with contact at 212-555-1234, has shown a decline in her mathematics course grades. A letter of academic probation should be issued.’"
#text = " Task: Anonymize the personal information in the following text and output in the specified format. Input: ‘Regarding Amelia Thompson, student ID 654789, SIN 321654987, contactable at 555-678-1234, for her declining grades in advanced calculus. A letter of academic probation is warranted.’"
#text = " Task: Anonymize the personal information in the following text and output in the specified format. Input: ‘The student’s name is Mia Wang, and her SIN is 456789012, and her phone number is 890-555-2345 amd her student ID is 234567, for her continuous absence from mandatory lab sessions in the biology course. A letter of academic probation must be drafted.’"
#text = " Task: Anonymize the personal information in the following text and output in the specified format. Input: ‘Attention needed for Mason Miller, with student ID 369741, SIN 147852369, and phone number 741-555-8522, due to his poor performance on group assignments in business studies. Issue an academic probation letter.’"
#text = " Task: Anonymize the personal information in the following text and output in the specified format. Input: ‘Attention needed for Mason Miller, with student ID 369741, SIN 147852369, and phone number 741-555-8522, due to his poor performance on group assignments in business studies. Issue an academic probation letter.’"
#text = " Task: Anonymize the personal information in the following text and output in the specified format. Input: ‘Concerns about Saoirse Íngrid Björnsdóttir, with student ID 597213, SIN 213597846, and phone number 818-555-2134, have been raised due to his consistent failure to meet coursework deadlines. An academic probation letter is necessary.’"
#text = " Task: Anonymize the personal information in the following text and output in the specified format. Input: ‘Julia Morris, student ID 482759, SIN 759482316, and contact at 213-555-4789, has shown a marked decline in class participation and exam scores. The issuance of an academic probation letter is warranted.’"
#text = " Task: Anonymize the personal information in the following text and output in the specified format. Input: ‘X Æ A-12 Musk, carrying student ID 364285, SIN 285364719, and phone number 504-555-6921, has been reported for disruptive behavior and lack of engagement in group projects. Draft an academic probation letter.’"
#text = " Task: Anonymize the personal information in the following text and output in the specified format. Input: ‘Reports indicate that Sarah Chen, identified by student ID 951753, passport number M0969597,SIN 753951462, with a reachable phone number of 707-555-5309, has not achieved the minimum grade requirements in her major subjects. Prepare an academic probation letter.’"
#text = " Task: Anonymize the personal information in the following text and output in the specified format. Input: ‘The academic performance of Chloë Éloïse Müller-Wang, contact number 323-555-7486, SIN 297846513 and student ID 846297, has been unsatisfactory across multiple subjects, leading to a recommendation for academic probation. An official letter must be drafted.’"
#text = " Task: Anonymize the personal information in the following text and output in the specified format. Input: ‘Reports indicate that Alexander-Jonathon Fitzgerald Beauregard-Smith, with phone number 312-555-8264, SIN 639428071, and student ID 428639, has exhibited a lack of respect towards faculty members, leading to multiple complaints. An academic probation letter is deemed necessary.’"
text = " Task: Anonymize the personal information in the following text and output in the specified format. Input: ‘Reports indicate that Alexander-Jonathon Fitzgerald Beauregard-Smith with phone number 312-555-8264 SIN 639428071 and student ID 428639 has exhibited a lack of respect towards faculty members leading to multiple complaints. An academic probation letter is deemed necessary.’"
inputs = tokenizer(text, return_tensors=“pt”).to(device)
outputs = model.generate(input_ids=inputs[“input_ids”].to(“cuda”), attention_mask=inputs[“attention_mask”], max_new_tokens=100, pad_token_id=tokenizer.eos_token_id, temperature = 0.5)
#print(tokenizer.decode(outputs[0], skip_special_tokens=True))
decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(decoded_output)
import re
Define a regex pattern to capture everything after “Expected Output: '”
up to the closing quote
pattern = r"Expected Output: ‘(.*?)’"
Search for the pattern in the output
match = re.search(pattern, decoded_output)
Extract the matched part
if match:
extracted_text = match.group(1) # group(1) to get the content of the first capturing group
print(“Extracted Part:”, extracted_text)
else:
print(“No match found”)
Notebook for inference endpoint:
%% [markdown]
# Set up LLaMA 2 on SageMaker
%% [markdown]
## Set up Dependencies
%%
!pip install sagemaker
%%
pip list | grep sagemaker
%%
pip install -U sagemaker
%%
import sagemaker
import boto3
import numpy
import scipy
Declare the SageMaker execution role variable globally
sagemaker_execution_role = None
setup sagemaker session
def setup_sagemaker_session(default_bucket=None):
“”"
Setup SageMaker session and get IAM execution role
Params:
-default_bucket: Default bucket nmae to use for the session
Returns:
-session: SageMaker session object
-role_arn: ARN of the IAM execution role
"""
global sagemaker_execution_role
session = sagemaker.Session(default_bucket=default_bucket)
try:
sagemaker_execution_role = sagemaker.get_execution_role()
except:
iam = boto3.client("iam")
sagemaker_execution_role = iam.get_role(RoleName="sagemaker_execution_role")["Role"]["Arn"]
return session, sagemaker_execution_role
def mask_account_id(account_id):
return “*” * len(account_id)
def main():
sagemaker_session_bucket = None
session, sagemaker_execution_role = setup_sagemaker_session(default_bucket=sagemaker_session_bucket)
# Mask with ****
account_id = sagemaker_execution_role.split(":")[4]
masked_account_id = mask_account_id(account_id)
masked_role = sagemaker_execution_role.replace(account_id, masked_account_id)
print(f"SageMaker role ARN: {masked_role}")
print(f"SageMaker session region: {session.boto_region_name}")
print(sagemaker_execution_role)
if name == “main”:
main()
%%
from sagemaker.huggingface import get_huggingface_llm_image_uri
#Fetch Docker image URI for the Hugging Face DLC
1. backend name
2. Hugging Face LDC version
llm_image = get_huggingface_llm_image_uri(“huggingface”, version=“0.9.3”)
Log the docker image URI
print(f"llm image uri {llm_image}")
%% [markdown]
## Determine Model Requirements
%% [markdown]
- m1.g5.12xlarge instance
- Meta LLaMa 2 model - request form fullfilled/approved
- Hugging Face account
%%
import json
confirm instance requirments are met for SageMaker session
with open(“/opt/ml/metadata/resource-metadata.json”) as f:
metadata = json.load(f)
print(metadata[“ResourceName”])
%%
confirm requirements met for kernel
import json
def get_instance_type_from_metadata():
with open(“/opt/ml/metadata/resource-metadata.json”) as f:
metadata = json.load(f)
resource_name = metadata.get(“ResourceName”, “”)
return resource_name
def main():
resource_name = get_instance_type_from_metadata()
# List valid instance types
valid_instance_types = ["ml-g5-2xlarge","ml-g5-12xlarge", "ml-g5-48xlarge"]
if any(instance_type in resource_name for instance_type in valid_instance_types):
print("Instance configured correctly")
else:
print("Need to upgrade to at least ml.g5-2xlarge instance")
if name == “main”:
main()
%% [markdown]
## Deploy Meta’s LLaMa model to Amazon SageMaker
%%
import json
import getpass
from sagemaker.huggingface import HuggingFaceModel
def get_sagemaker_config():
# Configure sagemaker instance details
instance_type = “ml.g5.12xlarge”
number_of_gpu = 1
health_check_timeout = 300
# Configure Hugging Face details
config = {
"HF_MODEL_ID": "Harit10/Llama2-config",
"SM_NUM_GPUS": json.dumps(number_of_gpu),
"MAX_INPUT_TOKENS": json.dumps(2048),
"MAX_TOTAL_TOKENS": json.dumps(4096),
"MAX_BATCH_TOTAL_TOKENS": json.dumps(8192),
"HUGGING_FACE_HUB_TOKEN": getpass.getpass("Enter your Hugging Face Hub Token:")
}
return instance_type, health_check_timeout, config
def create_huggingface_model(instance_type, config, role, image_uri):
assert config[“HUGGING_FACE_HUB_TOKEN”] != “”, “Please set your Hugging Face Hub Token”
llm_model = HuggingFaceModel(role = role,
image_uri = image_uri,
env = config)
return llm_model
def main():
instance_type, health_check_timeout, config = get_sagemaker_config()
role = sagemaker_execution_role
llm_image_to_ref = llm_image
llm_model = create_huggingface_model(instance_type, config, role, llm_image_to_ref)
if llm_model:
llm = llm_model.deploy(initial_instance_count = 1,
instance_type = instance_type,
container_startup_health_check_timeout = health_check_timeout)
if name == “main”:
main()
%%
import json
import boto3
sagemaker_runtime = boto3.client(“sagemaker-runtime”)
endpoint_name = “huggingface-pytorch-tgi-inference-2024-03-15-13-24-31-061”
def build_llama2_prompt(message):
stopPrompt = “”
startPrompt = “[INST] "
endPrompt = " [/INST]”
conversation =
for index, message in enumerate(message):
if message[“role”] == “system” and index == 0:
# conversation.append(f"<>\n{message[“content”]}\n<>\n\n")
conversation.append(f"<>\n{message[‘content’]}\n<>\n\n")
elif message[“role”] == “user”:
conversation.append(message[“content”].strip())
else:
# conversation.append(f" [/INST] {message[“content”].strip()}[INST] “)
conversation.append(f”{endPrompt} {message[‘content’].strip()} {stop_token}{startPrompt}")
return startPrompt + “”.join(conversation) + endPrompt
messages = [
{
“role”: “system”,
“content”: “You are a nonprofit advocate and champion. Your goal is to help entrepreneurs and movers and shakers find their purpose through positivity”
}
]
instruction = “What does the world need more of right now?”
messages.append({“role”: “user”, “content”: instruction})
prompt = build_llama2_prompt(messages)
input_data = {”],
“inputs”: prompt,
“parameters”: {
“do_sample”: True,
“top_p”: 0.6,
“temperature”: 0.9,
“top_k”: 50,
“max_new_tokens”: 512,
“repetition_penalty”: 1.03,
“stop”: [“
},
}
input_data_json = json.dumps(input_data)
content_type = “application/json”
response = sagemaker_runtime.invoke_endpoint(
EndpointName = endpoint_name,
ContentType = content_type,
Body = input_data_json.encode(“utf-8”),
)
response_body = response[“Body”].read().decode(“utf-8”)
response_json = json.loads(response_body)
generated_text = response_json[0][“generated_text”]
print(generated_text[len(prompt):])