From d20746613afceb815dc7a608f6d4dfd39a3cc0c8 Mon Sep 17 00:00:00 2001 From: sabaimran Date: Mon, 4 Dec 2023 16:15:17 -0500 Subject: [PATCH] Properly filter out empty PDFs for indexing --- src/khoj/processor/content/pdf/pdf_to_entries.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/khoj/processor/content/pdf/pdf_to_entries.py b/src/khoj/processor/content/pdf/pdf_to_entries.py index caa93636..28e89bbe 100644 --- a/src/khoj/processor/content/pdf/pdf_to_entries.py +++ b/src/khoj/processor/content/pdf/pdf_to_entries.py @@ -28,7 +28,7 @@ class PdfToEntries(TextToEntries): ) -> Tuple[int, int]: # Extract required fields from config if not full_corpus: - deletion_file_names = set([file for file in files if files[file] == ""]) + deletion_file_names = set([file for file in files if files[file] == b""]) files_to_process = set(files) - deletion_file_names files = {file: files[file] for file in files_to_process} else: