diff --git a/src/khoj/processor/pdf/pdf_to_entries.py b/src/khoj/processor/pdf/pdf_to_entries.py index 19d463eb..64e13031 100644 --- a/src/khoj/processor/pdf/pdf_to_entries.py +++ b/src/khoj/processor/pdf/pdf_to_entries.py @@ -68,13 +68,16 @@ class PdfToEntries(TextToEntries): with open(f"{tmp_file}", "wb") as f: bytes = pdf_files[pdf_file] f.write(bytes) - loader = PyMuPDFLoader(f"{tmp_file}", extract_images=True) + try: + loader = PyMuPDFLoader(f"{tmp_file}", extract_images=True) + except ModuleNotFoundError: + loader = PyMuPDFLoader(f"{tmp_file}") pdf_entries_per_file = [page.page_content for page in loader.load()] entry_to_location_map += zip(pdf_entries_per_file, [pdf_file] * len(pdf_entries_per_file)) entries.extend(pdf_entries_per_file) except Exception as e: logger.warning(f"Unable to process file: {pdf_file}. This file will not be indexed.") - logger.warning(e) + logger.warning(e, exc_info=True) finally: if os.path.exists(f"{tmp_file}"): os.remove(f"{tmp_file}")