Arabic models timeout Error

turkeyBot · October 19, 2024, 5:52pm

def main():
    st.set_page_config(page_title="Your PDF Bot")
    st.title("LLM PDF READER")
    with st.sidebar:
        st.header("Chat With PDF 🗨️")
        load_dotenv()
        pdf = st.file_uploader(label="Upload File here", type=["pdf", "doc"])

    if pdf is not None:
        pdf_reader = PdfReader(pdf)
        texts = []
        for page in pdf_reader.pages:
            text = page.extract_text()
            texts.append(text)

        text_splitter = CharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=200,
            length_function=len)
        docs = text_splitter.create_documents(texts)

        store_name = pdf.name[:-4]
        if os.path.exists(f"{store_name}.pkl"):
            with open(f"{store_name}.pkl", "rb") as f:
                VectorStore = pickle.load(f)
            st.write("Embeddings are loaded from the disk")
        else:
            model_name = "sentence-transformers/all-mpnet-base-v2"
            embeddings = HuggingFaceBgeEmbeddings(model_name=model_name)
            VectorStore = FAISS.from_documents(docs, embedding=embeddings)
            with open(f"{store_name}.pkl", "wb") as f:
                pickle.dump(VectorStore, f)
            st.write("Embeddings computations completed!")

        query = st.text_input("Ask a question about your document:")
        if query:
            documents = VectorStore.similarity_search(query, k=3)
            context = " ".join(doc.page_content for doc in documents)

            # Prepare the Arabic prompt
            arabic_prompt = f"Please respond in Arabic:\nالسؤال: {query}\nالسياق: {context}"

            # add sleep delay
            time.sleep(2)
            # Define the LLM
            repo_id = "inceptionai/jais-family-1p3b"  # You can replace this with an Arabic model if needed
            llm = HuggingFaceEndpoint(temperature=0.7, repo_id=repo_id)

            # Load the QA chain
            chain = load_qa_chain(llm=llm, chain_type="stuff")
            try:
                response = chain.invoke({"input_documents": documents, "question": arabic_prompt})
                st.write("Response:", response["output_text"])
            except Exception as e:
                st.error(f"An error occurred: {e}")

if __name__ == "__main__":
    main()

when i run this code i get a timeout error when i use Arabic text-2text-generation

but when i use any English model it works

even when i ask a question in Arabic it gets answered in English when using an English model

i need help making a CHATPDF that reads and answer Arabic PDFS and answer in Arabic

John6666 · October 19, 2024, 10:03pm

As long as it works correctly in English, your code must be correct.
I am not an English speaker either, so I sometimes require LLM to respond in my native language, but in that case, LLM takes a long time to respond. In this case, the translation process inside LLM may have become overloaded and timed out.
Try increasing the LangChain timeout value; 120 seconds is too short.
https://api.python.langchain.com/en/latest/huggingface/llms/langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.html#langchain_huggingface.llms.huggingface_endpoint.HuggingFaceEndpoint.timeout

Topic		Replies	Views
Timeout error when downloading sentence-transformers/all-MiniLM-L6-v2 Beginners	2	1285	July 19, 2024
ReadTimeoutError when loading model Beginners	5	3288	October 18, 2024
Llm model for urdu and arabic support Models	2	1248	February 28, 2024
Uploading and Download Model Errors Beginners	0	260	July 14, 2023
Fine-Tuning a Language Model with Data Extracted from Multiple PDFs for a Chat Interface 🤗Transformers	2	2498	November 5, 2024

Arabic models timeout Error

Related topics