I am trying to read the pdf using langchain.document_loaders and it’s PyPDFLoader class. But getting this error:
Error
[Errno 2] Cannot read an empty file
This is my code:
from langchain.document_loaders import PyPDFLoader
def pdf_parser(uploaded_file):
bytes_data = uploaded_file.read()
with NamedTemporaryFile(delete=False) as tmp: # open a named temporary file
tmp.write(bytes_data) # Write data from the uploaded file into it
pdf_loader = PyPDFLoader(tmp.name) # <---- now it works!
#pdf_loader = PyPDFLoader(file_path) only for file path offline
documents = pdf_loader.load()
def predict(file):
resource = pdf_parser(file)
qa_notes = qa_generator(resource)
return qa_notes
iface = gr.Interface(
fn=predict,
inputs=[
gr.inputs.File(),
],
outputs="text",
description=description,
title=title,
allow_screenshot=True,
)
iface.launch(enable_queue=True, show_error=True)