In my code I am generating false ground truth and invoke the model as follows:
def generate_load_and_ground_truth():
df = pd.read_csv('validation_with_predictions.csv')
gt_records = []
for i, row in df.iterrows():
suffix = uuid.uuid1().hex
inference_id = f'{i}-{suffix}'
data = np.array([X_test[i]])
payload = {'instances': data}
args = {'InferenceId': inference_id}
out = predictor.predict(data = payload, initial_args = args)
gt_records.append(str({
"groundTruthData": {
"data": str(df['label'][i]),
"encoding": 'CSV',
},
"eventMetadata": {
"eventId": str(inference_id),
},
"eventVersion": "0",
}))
upload_ground_truth(gt_records, ground_truth_upload_path, datetime.utcnow())
def upload_ground_truth(records, path, upload_time):
data_to_upload = ",".join(records)
data_to_upload = data_to_upload
target_s3_uri = f"{path}/{upload_time:%Y/%m/%d/%H/%M%S}.jsonl"
print(f"Uploading {len(records)} records to", target_s3_uri)
S3Uploader.upload_string_as_file_body(data_to_upload, target_s3_uri)
When the model schedule ran it gave the error message
'MonitoringExecutionStatus': 'Failed',
'FailureReason': 'Algorithm Error: See Job Logs for more information.'
Looking into the cloud watch log I found the error to be:
'Cannot resolve column name "groundTruthMetadata.eventId" among (_corrupt_record);'
Then I used this pre-process function at my endpoint.
import json
def preprocess_handler(inference_record):
input_dict = json.loads(inference_record.endpoint_input.data)
output_dict = json.loads(inference_record.endpoint_output.data)
input_data = str(input_dict['instances'].reshape(3072))[1:-1]
output_data = str(np.argmax(output_dict['predictions'][0]))
return_dict = {'prediction000':output_data, 'feature000':input_data}
return return_dict
It gives the error:
'FailureReason': 'InternalServerError: We encountered an internal error. Please try again.'}
Sample data in inference_record is:
{
"captureData": {
"endpointInput": {
"observedContentType": "application/json",
"mode": "INPUT",
"data": data,
#"{'instances': [[[[0.6196078658103943, 0.43921568989753723, 0.1921568661928177]]]]}",
# SAMPLE OF data_lst VERSION; dict with 4 dimensional array
"encoding": "JSON"
},
"endpointOutput": {
"observedContentType": "application/json",
"mode": "OUTPUT",
"data": "{\n \"predictions\": [[0.000721988094, 0.000489010592, 0.0307604838, 0.291437089, 0.0597994663, 0.462541133, 0.110468164, 0.041162733, 0.00192647439, 0.000693516282]\n ]\n}",
"encoding": "JSON"
}
},
"eventMetadata": {
"eventId": "eb71956e-9d99-4bfe-a35f-6d5a33c7e701",
"inferenceId": "155-a02fa0aaf86211ec9a027f52933c247f",
"inferenceTime": "2022-06-30T10:51:41Z"
},
"eventVersion": "0"
}