Remove unused Entry to Jsonl converter from text to entry class, tests

This was earlier used when the index was plaintext jsonl file. Now that documents are indexed in a DB this func is not required. Simplify org,md,pdf,plaintext to entries tests by removing the entry to jsonl conversion step
2026-03-02 21:19:12 +00:00 · 2024-02-09 17:03:36 +05:30
parent 28105ee027
commit a627f56a64
9 changed files with 21 additions and 87 deletions
--- a/tests/test_pdf_to_entries.py
+++ b/tests/test_pdf_to_entries.py
@@ -1,4 +1,3 @@
-import json
 import os

 from khoj.processor.content.pdf.pdf_to_entries import PdfToEntries
@@ -17,12 +16,8 @@ def test_single_page_pdf_to_jsonl():
    data = {"tests/data/pdf/singlepage.pdf": pdf_bytes}
    entries = PdfToEntries.extract_pdf_entries(pdf_files=data)

-    # Process Each Entry from All Pdf Files
-    jsonl_string = PdfToEntries.convert_pdf_maps_to_jsonl(entries)
-    jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
-
    # Assert
-    assert len(jsonl_data) == 1
+    assert len(entries) == 1


 def test_multi_page_pdf_to_jsonl():
@@ -35,12 +30,8 @@ def test_multi_page_pdf_to_jsonl():
    data = {"tests/data/pdf/multipage.pdf": pdf_bytes}
    entries = PdfToEntries.extract_pdf_entries(pdf_files=data)

-    # Process Each Entry from All Pdf Files
-    jsonl_string = PdfToEntries.convert_pdf_maps_to_jsonl(entries)
-    jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
-
    # Assert
-    assert len(jsonl_data) == 6
+    assert len(entries) == 6


 def test_ocr_page_pdf_to_jsonl():