From 92cbfef7abcf6cf1658fcd50565b581b752ac80f Mon Sep 17 00:00:00 2001 From: sabaimran Date: Tue, 29 Aug 2023 14:34:08 -0700 Subject: [PATCH] Skip plaintext file indexing if there's a parsing issue and log the file --- src/khoj/processor/plaintext/plaintext_to_jsonl.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/khoj/processor/plaintext/plaintext_to_jsonl.py b/src/khoj/processor/plaintext/plaintext_to_jsonl.py index 795a9109..3dbf1cd9 100644 --- a/src/khoj/processor/plaintext/plaintext_to_jsonl.py +++ b/src/khoj/processor/plaintext/plaintext_to_jsonl.py @@ -91,10 +91,13 @@ class PlaintextToJsonl(TextToJsonl): for plaintext_file in plaintext_files: with open(plaintext_file, "r") as f: - plaintext_content = f.read() - if plaintext_file.endswith(("html", "htm", "xml")): - plaintext_content = PlaintextToJsonl.extract_html_content(plaintext_content) - entry_to_file_map.append((plaintext_content, plaintext_file)) + try: + plaintext_content = f.read() + if plaintext_file.endswith(("html", "htm", "xml")): + plaintext_content = PlaintextToJsonl.extract_html_content(plaintext_content) + entry_to_file_map.append((plaintext_content, plaintext_file)) + except Exception as e: + logger.error(f"Error processing file: {plaintext_file} - {e}", exc_info=True) return dict(entry_to_file_map)