Hello, I’m trying to deploy a finetuned llama2 model to Sagemaker, using Sagemaker Pipelines.
The code looks like this:
output_path = f's3://{bucket}/llama2_qlora/output'
model_package_group_name = f"Llama2-qlora-{environment}"
model_id = 'meta-llama/Llama-2-7b-hf'
pytorch_version="2.0"
transformers_version="4.28"
python_version="py310"
entry_point="finetune.py"
source_dir="code"
# Pipeline Input Parameters
training_instance_type = ParameterString(name="TrainingInstanceType", default_value="ml.g5.4xlarge")
inference_instance_type = ParameterString(name="InferenceInstanceType", default_value="ml.g5.4xlarge")
model_approval_status = ParameterString(
name="ModelApprovalStatus", default_value="PendingManualApproval"
)
# Define Training Step
# hyperparameters, which are passed into the training job
hyperparameters ={
'model_id': model_id, # pre-trained model
'hf_token': hf_token, # huggingface token to access llama 2
}
# create the Estimator
huggingface_estimator = HuggingFace(
entry_point = 'finetune.py', # train script
source_dir = 'code', # directory which includes all the files needed for training
instance_type = 'ml.g5.4xlarge', # instances type used for the training job
instance_count = 1, # the number of instances used for training
base_job_name = 'llama2-qlora', # the name of the training job
role = role, # Iam role used in training job to access AWS ressources, e.g. S3
sagemaker_session = pipeline_session, # sagemaker session used to execute the training job
volume_size = 300, # the size of the EBS volume in GB
transformers_version = transformers_version, # the transformers version used in the training job
pytorch_version = pytorch_version, # the pytorch_version version used in the training job
py_version = python_version, # the python version used in the training job
hyperparameters = hyperparameters, # the hyperparameters passed to the training job
environment = { "HUGGINGFACE_HUB_CACHE": "/tmp/.cache" }, # set env variable to cache models in /tmp
output_path = output_path, # path to which the trained model will be saved
)
step_train = TrainingStep(
name="TrainModel",
step_args=huggingface_estimator.fit(),
)
# Define Create Model Step
hf_env = {
'HF_TASK':'text-generation',
}
model = HuggingFaceModel(
name=f"Llama2-qlora-{environment}",
model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,
transformers_version=transformers_version,
pytorch_version=pytorch_version,
py_version=python_version,
role=role,
sagemaker_session=pipeline_session,
env=hf_env
)
# Define Register Model Step
register_model_args = model.register(
content_types=["application/json"],
response_types=["application/json"],
inference_instances=[inference_instance_type],
model_package_group_name=model_package_group_name,
approval_status=model_approval_status,
)
step_register_model = ModelStep(
name="RegisterModel",
step_args=register_model_args,
depends_on=[step_train]
)
# Define Pipeline
pipeline_name = f"Llama2-qlora-{environment}"
pipeline = Pipeline(
name=pipeline_name,
parameters=[
inference_instance_type,
model_approval_status,
],
steps=[step_train, step_register_model],
)
definition = json.loads(pipeline.definition())
pipeline.upsert(role_arn=role, tags=tags)
execution = pipeline.start(execution_description=f"{pipeline_name} {datetime.now()}")
When deploying an endpoint from the registered model, I get the following error (at inference time)
/opt/ml/model does not appear to have a file named config.json
Any ideas?