How can I prompt Llama to only use my provided context?

Hi I’m trying to implement a RAG system using kNN to retrieve relevant documents and generate an answer based on those documents for a school project. Is there an easy way to implement prompt engineering in Llama-2 so it actually only uses the provided context? The output of my code is very nonsensical. Thanks in advance!

model_llama = "/kaggle/input/llama-2/pytorch/7b-chat-hf/1"

tokenizer_llama = AutoTokenizer.from_pretrained(model_llama)
pipeline_llama = transformers.pipeline(
    "text-generation",
    model=model_llama,
    tokenizer=tokenizer_llama,
)

def generate_answer_llama(question, pipeline=pipeline_llama, tokenizer=tokenizer_llama):
    # Perform the kNN query
    top_docs = query_knn(query, index, df)['text']
    # Concatenate the top documents to create the context
    documents = "".join(top_docs)
    
    # Define the System Prompt to only use our documents
    input = f"""
        <<SYS>>
        Only respond with "Not in the text." if the information needed to answer the question is not contained in the document. \n
        Answer the question using only the information from the attached document below. \n
        Ensure that the questions are answered fully and effectively. \n
        Respond in short and concise yet fully formulated sentences, being precise and accurate
        <</SYS>>
        [INST]
        User:{question}
        [/INST]\
        [INST]
        User:{documents}
        [/INST]\n

        Assistant:
    """
    
    sequences = pipeline(
        input,
        do_sample=True,
        top_k=50,
        num_return_sequences=2,
        eos_token_id=tokenizer.eos_token_id,
        max_new_tokens=2048,
        return_full_text=False,
        temperature=0.1,
    )
    
    for seq in sequences:
        generated_text = seq['generated_text']
        # Find the start of the assistant's answer and return only that part
        answer_start = generated_text.find("Assistant:") + len("Assistant:")
        return generated_text[answer_start:].strip()
    
# Example usage
query = 'What is NLP?'
results = generate_answer_llama(query)

print(results)

Hey did you ever get an answer, trying to do the same thing