Use List, Tuple, Set from typing to support Python 3.8 for khoj

Before Python 3.9, you can't directly use list, tuple, set etc for type hinting Resolves #130
2026-03-03 21:29:08 +00:00 · 2023-02-06 01:08:43 -03:00
parent 14f28e3a03
commit cba9a6a703
12 changed files with 42 additions and 37 deletions
--- a/src/processor/ledger/beancount_to_jsonl.py
+++ b/src/processor/ledger/beancount_to_jsonl.py
@@ -2,7 +2,7 @@
 import glob
 import re
 import logging
-import time
+from typing import List

 # Internal Packages
 from src.processor.text_to_jsonl import TextToJsonl
@@ -109,7 +109,7 @@ class BeancountToJsonl(TextToJsonl):
        return entries, dict(transaction_to_file_map)

    @staticmethod
-    def convert_transactions_to_maps(parsed_entries: list[str], transaction_to_file_map) -> list[Entry]:
+    def convert_transactions_to_maps(parsed_entries: List[str], transaction_to_file_map) -> List[Entry]:
        "Convert each parsed Beancount transaction into a Entry"
        entries = []
        for parsed_entry in parsed_entries:
@@ -120,6 +120,6 @@ class BeancountToJsonl(TextToJsonl):
        return entries

    @staticmethod
-    def convert_transaction_maps_to_jsonl(entries: list[Entry]) -> str:
+    def convert_transaction_maps_to_jsonl(entries: List[Entry]) -> str:
        "Convert each Beancount transaction entry to JSON and collate as JSONL"
        return ''.join([f'{entry.to_json()}\n' for entry in entries])
--- a/src/processor/markdown/markdown_to_jsonl.py
+++ b/src/processor/markdown/markdown_to_jsonl.py
@@ -3,6 +3,7 @@ import glob
 import re
 import logging
 import time
+from typing import List

 # Internal Packages
 from src.processor.text_to_jsonl import TextToJsonl
@@ -110,7 +111,7 @@ class MarkdownToJsonl(TextToJsonl):
        return entries, dict(entry_to_file_map)

    @staticmethod
-    def convert_markdown_entries_to_maps(parsed_entries: list[str], entry_to_file_map) -> list[Entry]:
+    def convert_markdown_entries_to_maps(parsed_entries: List[str], entry_to_file_map) -> List[Entry]:
        "Convert each Markdown entries into a dictionary"
        entries = []
        for parsed_entry in parsed_entries:
@@ -121,6 +122,6 @@ class MarkdownToJsonl(TextToJsonl):
        return entries

    @staticmethod
-    def convert_markdown_maps_to_jsonl(entries: list[Entry]):
+    def convert_markdown_maps_to_jsonl(entries: List[Entry]):
        "Convert each Markdown entry to JSON and collate as JSONL"
        return ''.join([f'{entry.to_json()}\n' for entry in entries])
--- a/src/processor/org_mode/org_to_jsonl.py
+++ b/src/processor/org_mode/org_to_jsonl.py
@@ -2,7 +2,7 @@
 import glob
 import logging
 import time
-from typing import Iterable
+from typing import Iterable, List

 # Internal Packages
 from src.processor.org_mode import orgnode
@@ -18,7 +18,7 @@ logger = logging.getLogger(__name__)

 class OrgToJsonl(TextToJsonl):
    # Define Functions
-    def process(self, previous_entries: list[Entry]=None):
+    def process(self, previous_entries: List[Entry]=None):
        # Extract required fields from config
        org_files, org_file_filter, output_file = self.config.input_files, self.config.input_filter, self.config.compressed_jsonl
        index_heading_entries = self.config.index_heading_entries
@@ -101,9 +101,9 @@ class OrgToJsonl(TextToJsonl):
        return entries, dict(entry_to_file_map)

    @staticmethod
-    def convert_org_nodes_to_entries(parsed_entries: list[orgnode.Orgnode], entry_to_file_map, index_heading_entries=False) -> list[Entry]:
+    def convert_org_nodes_to_entries(parsed_entries: List[orgnode.Orgnode], entry_to_file_map, index_heading_entries=False) -> List[Entry]:
        "Convert Org-Mode nodes into list of Entry objects"
-        entries: list[Entry] = []
+        entries: List[Entry] = []
        for parsed_entry in parsed_entries:
            if not parsed_entry.hasBody and not index_heading_entries:
                # Ignore title notes i.e notes with just headings and empty body
--- a/src/processor/org_mode/orgnode.py
+++ b/src/processor/org_mode/orgnode.py
@@ -37,6 +37,7 @@ import re
 import datetime
 from pathlib import Path
 from os.path import relpath
+from typing import List

 indent_regex = re.compile(r'^ *')

@@ -69,7 +70,7 @@ def makelist(filename):
   sched_date    = ''
   deadline_date = ''
   logbook       = list()
-   nodelist: list[Orgnode] = list()
+   nodelist: List[Orgnode] = list()
   property_map  = dict()
   in_properties_drawer = False
   in_logbook_drawer = False
--- a/src/processor/text_to_jsonl.py
+++ b/src/processor/text_to_jsonl.py
@@ -1,9 +1,8 @@
 # Standard Packages
 from abc import ABC, abstractmethod
 import hashlib
-import time
 import logging
-from typing import Callable
+from typing import Callable, List, Tuple
 from src.utils.helpers import timer

 # Internal Packages
@@ -18,16 +17,16 @@ class TextToJsonl(ABC):
        self.config = config

    @abstractmethod
-    def process(self, previous_entries: list[Entry]=None) -> list[tuple[int, Entry]]: ...
+    def process(self, previous_entries: List[Entry]=None) -> List[Tuple[int, Entry]]: ...

    @staticmethod
    def hash_func(key: str) -> Callable:
        return lambda entry: hashlib.md5(bytes(getattr(entry, key), encoding='utf-8')).hexdigest()

    @staticmethod
-    def split_entries_by_max_tokens(entries: list[Entry], max_tokens: int=256, max_word_length: int=500) -> list[Entry]:
+    def split_entries_by_max_tokens(entries: List[Entry], max_tokens: int=256, max_word_length: int=500) -> List[Entry]:
        "Split entries if compiled entry length exceeds the max tokens supported by the ML model."
-        chunked_entries: list[Entry] = []
+        chunked_entries: List[Entry] = []
        for entry in entries:
            compiled_entry_words = entry.compiled.split()
            # Drop long words instead of having entry truncated to maintain quality of entry processed by models
@@ -39,7 +38,7 @@ class TextToJsonl(ABC):
                chunked_entries.append(entry_chunk)
        return chunked_entries

-    def mark_entries_for_update(self, current_entries: list[Entry], previous_entries: list[Entry], key='compiled', logger=None) -> list[tuple[int, Entry]]:
+    def mark_entries_for_update(self, current_entries: List[Entry], previous_entries: List[Entry], key='compiled', logger=None) -> List[Tuple[int, Entry]]:
        # Hash all current and previous entries to identify new entries
        with timer("Hash previous, current entries", logger):
            current_entry_hashes = list(map(TextToJsonl.hash_func(key), current_entries))