Llama3 Response

When using the Llama3 model, sometimes it gives me an incomplete answer. What can I do to avoid incompleteness when generating responses from the Serverless Inference API?

You can pass the max_new_tokens parameter to the inference API, as documented here: Detailed parameters.

import requests
import os
from dotenv import load_dotenv
import streamlit as st
import pypdf as pdf1
import warnings
warnings.filterwarnings('ignore')

load_dotenv()
key = os.getenv('HF_TOKEN')

st.set_page_config(page_title="Help With AI - Resume",page_icon=":star2:",layout="wide")

st.title("Resume Analysis")
st.markdown('<style>div.block-container{padding-top:1rem;}</style>',unsafe_allow_html=True)
input_text=st.text_area("Job Description ",key="input")
uploaded_file=st.file_uploader("Upload your resume(PDF)",type=["pdf"])

def input_pdf_text(uploaded_file):
    reader=pdf1.PdfReader(uploaded_file)
    text=""
    for page in range(len(reader.pages)):
        page=reader.pages[page]
        text+=str(page.extract_text())
    return text

input_prompt = """You are an skilled ATS (Applicant Tracking System) scanner with a deep understanding of Human Resources and ATS functionality, your task is to evaluate the resume against the provided job description. give me the percentage of match if the resume matches
the job description. First the output should come as percentage and then keywords missing and last final thoughts.""" 

input_prompt1 = """You are an experienced Human Resource Manager in the domain of Human Resources. Your task is to review the provided resume in comparison to the job description. Please share your professional evaluation on whether the candidate's profile aligns with the role. Highlight any recommended changes to the resume that would better reflect the specified job requirements."""

input_prompt2 = """You are an experienced recruiter in the domain of Human Resources, your task is to provide questions to ask regarding the provided resume."""

if uploaded_file is not None:
    pdf_content = input_pdf_text(uploaded_file)

if uploaded_file is not None:
    st.write("PDF Uploaded Successfully") 

submit1 = st.button("Give recommendation for Improvement")
submit2 = st.button("Resume Related Questions to ask")
submit3 = st.button("Percentage match")

API_URL = "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-8B-Instruct"
headers = {"Authorization": f"Bearer {key}"}

def query(payload):
    response = requests.post(API_URL, headers=headers, json=payload) 
    return response.json()

if submit3:
    if uploaded_file is not None:
        input_data = f"""
            input prompt: {input_prompt}
            job description: {input_text}
            resume: {pdf_content}
        """
        response1 = query({
            "inputs": input_data,
            "parameters": {
                "max_new_tokens": 250,
                "return_full_text": False,
                "max_time": 120.0,
            }
        })
        data = response1[0]["generated_text"]
        st.subheader("Analysis")
        st.write(data)
``` Added max_new_tokens in my code. Now also getting incomplete answer