mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-07 13:23:15 +00:00
Properly filter out empty PDFs for indexing
This commit is contained in:
@@ -28,7 +28,7 @@ class PdfToEntries(TextToEntries):
|
|||||||
) -> Tuple[int, int]:
|
) -> Tuple[int, int]:
|
||||||
# Extract required fields from config
|
# Extract required fields from config
|
||||||
if not full_corpus:
|
if not full_corpus:
|
||||||
deletion_file_names = set([file for file in files if files[file] == ""])
|
deletion_file_names = set([file for file in files if files[file] == b""])
|
||||||
files_to_process = set(files) - deletion_file_names
|
files_to_process = set(files) - deletion_file_names
|
||||||
files = {file: files[file] for file in files_to_process}
|
files = {file: files[file] for file in files_to_process}
|
||||||
else:
|
else:
|
||||||
|
|||||||
Reference in New Issue
Block a user