Hi,
I have a simple code to perform batch transform on imdb dataset, using Llama-2. The code runs fine however, out of 200 records in the input file, however, Batch Transform with strategy=‘MultiRecord’ returns only one line (inference for my first input line, while I have 200 lines in the input .jasonl file), and then exits with success. However, if I choose strategy=‘SingleRecord’, it will return the results for all lines, however it is extremally slow (it takes 4 hors for 200 prompts).
I have attached the code, and really appreciate your feedback. I have tried many things and searched the forums exhaustively, and no luck. Looking forward to hearing from you. Thanks
from datasets import load_dataset
imdb = load_dataset("imdb")
small_test_dataset = imdb["test"].shuffle(seed=42).select([i for i in list(range(200))])
with open(batchtransform_data_file_path, mode="w+") as outfile, \
open(batchtransform_data_with_label_file_path, mode="w+") as outfile_with_label:
i=-1
# Read each line from the original file
for obj in small_test_dataset:
i=i+1
messages1 = [
{ "role": "system","content": "You are an intelligent Review data analyst, your goal is to rate the user reviews as Positive, Neutral, or Negative. Instruction: Based on the review provided, classify the review with ONLY a single word, as either Positive, or Neutral, or Negative."}
]
instruction = template1["prompt"].format(data=obj["text"])
messages1.append({"role": "user", "content": instruction})
prompt = build_llama2_prompt(messages1)
# Format the data according to the template
formatted_data = {
#"id": i,
"inputs": prompt,
# "max_new_tokens": Max_New_Tokens,
# "truncation": Input_Truncation,
}
formatted_data_with_label = {
"id": i,
"inputs": prompt,
"label":obj["label"],
# "max_new_tokens": Max_New_Tokens,
# "truncation": Input_Truncation,
}
# Write the formatted data to the new file
formatted_data["inputs"] = formatted_data["inputs"].replace("@","")
json.dump(formatted_data, outfile)
outfile.write('\n')
formatted_data_with_label["inputs"] = formatted_data_with_label["inputs"].replace("@","")
json.dump(formatted_data_with_label, outfile_with_label)
outfile_with_label.write('\n')
llm_image = get_huggingface_llm_image_uri(
"huggingface", # huggingface or lmi
version=llm_image_uri_ver,
session=Sagemaker_Session,
region=region_name
)
config = {
'HF_MODEL_ID': HF_model_name, # model_id from hf.co/models
'SM_NUM_GPUS': json.dumps(number_of_gpu), # Number of GPU used per replica
'MAX_INPUT_LENGTH': json.dumps(MAX_INPUT_LENGTH), # Max length of input text
'MAX_TOTAL_TOKENS': json.dumps(MAX_TOTAL_TOKENS), # Max length of the generation (including input text)
'MAX_BATCH_TOTAL_TOKENS': json.dumps(MAX_BATCH_TOTAL_TOKENS), # Limits the number of tokens that can be processed in parallel during the generation
'HUGGING_FACE_HUB_TOKEN': HUGGING_FACE_HUB_TOKEN
}
llm_model = HuggingFaceModel(
role=my_role,
image_uri=llm_image,
env=config
)
hyper_params = { "max_new_tokens":str( Max_New_Tokens), "return_full_text":str( False)}
batch_job = llm_model.transformer(
instance_count=Instance_Count,
instance_type=InstanceType,
strategy='MultiRecord',
assemble_with='Line',
#strategy='SingleRecord',
output_path=s3_output_data_path,
env = hyper_params,
accept='application/json',
)
batch_job.transform(
data= f"{s3_input_data_path}/{batchtransform_data_file_name}",
content_type='application/json',#'application/jsonlines',#'application/json',
split_type='Line',
wait=True,
)