Use List, Tuple, Set from typing to support Python 3.8 for khoj

Before Python 3.9, you can't directly use list, tuple, set etc for
type hinting

Resolves #130
This commit is contained in:
Debanjum Singh Solanky
2023-02-06 01:08:43 -03:00
parent 14f28e3a03
commit cba9a6a703
12 changed files with 42 additions and 37 deletions

View File

@@ -2,7 +2,7 @@
import glob
import re
import logging
import time
from typing import List
# Internal Packages
from src.processor.text_to_jsonl import TextToJsonl
@@ -109,7 +109,7 @@ class BeancountToJsonl(TextToJsonl):
return entries, dict(transaction_to_file_map)
@staticmethod
def convert_transactions_to_maps(parsed_entries: list[str], transaction_to_file_map) -> list[Entry]:
def convert_transactions_to_maps(parsed_entries: List[str], transaction_to_file_map) -> List[Entry]:
"Convert each parsed Beancount transaction into a Entry"
entries = []
for parsed_entry in parsed_entries:
@@ -120,6 +120,6 @@ class BeancountToJsonl(TextToJsonl):
return entries
@staticmethod
def convert_transaction_maps_to_jsonl(entries: list[Entry]) -> str:
def convert_transaction_maps_to_jsonl(entries: List[Entry]) -> str:
"Convert each Beancount transaction entry to JSON and collate as JSONL"
return ''.join([f'{entry.to_json()}\n' for entry in entries])

View File

@@ -3,6 +3,7 @@ import glob
import re
import logging
import time
from typing import List
# Internal Packages
from src.processor.text_to_jsonl import TextToJsonl
@@ -110,7 +111,7 @@ class MarkdownToJsonl(TextToJsonl):
return entries, dict(entry_to_file_map)
@staticmethod
def convert_markdown_entries_to_maps(parsed_entries: list[str], entry_to_file_map) -> list[Entry]:
def convert_markdown_entries_to_maps(parsed_entries: List[str], entry_to_file_map) -> List[Entry]:
"Convert each Markdown entries into a dictionary"
entries = []
for parsed_entry in parsed_entries:
@@ -121,6 +122,6 @@ class MarkdownToJsonl(TextToJsonl):
return entries
@staticmethod
def convert_markdown_maps_to_jsonl(entries: list[Entry]):
def convert_markdown_maps_to_jsonl(entries: List[Entry]):
"Convert each Markdown entry to JSON and collate as JSONL"
return ''.join([f'{entry.to_json()}\n' for entry in entries])

View File

@@ -2,7 +2,7 @@
import glob
import logging
import time
from typing import Iterable
from typing import Iterable, List
# Internal Packages
from src.processor.org_mode import orgnode
@@ -18,7 +18,7 @@ logger = logging.getLogger(__name__)
class OrgToJsonl(TextToJsonl):
# Define Functions
def process(self, previous_entries: list[Entry]=None):
def process(self, previous_entries: List[Entry]=None):
# Extract required fields from config
org_files, org_file_filter, output_file = self.config.input_files, self.config.input_filter, self.config.compressed_jsonl
index_heading_entries = self.config.index_heading_entries
@@ -101,9 +101,9 @@ class OrgToJsonl(TextToJsonl):
return entries, dict(entry_to_file_map)
@staticmethod
def convert_org_nodes_to_entries(parsed_entries: list[orgnode.Orgnode], entry_to_file_map, index_heading_entries=False) -> list[Entry]:
def convert_org_nodes_to_entries(parsed_entries: List[orgnode.Orgnode], entry_to_file_map, index_heading_entries=False) -> List[Entry]:
"Convert Org-Mode nodes into list of Entry objects"
entries: list[Entry] = []
entries: List[Entry] = []
for parsed_entry in parsed_entries:
if not parsed_entry.hasBody and not index_heading_entries:
# Ignore title notes i.e notes with just headings and empty body

View File

@@ -37,6 +37,7 @@ import re
import datetime
from pathlib import Path
from os.path import relpath
from typing import List
indent_regex = re.compile(r'^ *')
@@ -69,7 +70,7 @@ def makelist(filename):
sched_date = ''
deadline_date = ''
logbook = list()
nodelist: list[Orgnode] = list()
nodelist: List[Orgnode] = list()
property_map = dict()
in_properties_drawer = False
in_logbook_drawer = False

View File

@@ -1,9 +1,8 @@
# Standard Packages
from abc import ABC, abstractmethod
import hashlib
import time
import logging
from typing import Callable
from typing import Callable, List, Tuple
from src.utils.helpers import timer
# Internal Packages
@@ -18,16 +17,16 @@ class TextToJsonl(ABC):
self.config = config
@abstractmethod
def process(self, previous_entries: list[Entry]=None) -> list[tuple[int, Entry]]: ...
def process(self, previous_entries: List[Entry]=None) -> List[Tuple[int, Entry]]: ...
@staticmethod
def hash_func(key: str) -> Callable:
return lambda entry: hashlib.md5(bytes(getattr(entry, key), encoding='utf-8')).hexdigest()
@staticmethod
def split_entries_by_max_tokens(entries: list[Entry], max_tokens: int=256, max_word_length: int=500) -> list[Entry]:
def split_entries_by_max_tokens(entries: List[Entry], max_tokens: int=256, max_word_length: int=500) -> List[Entry]:
"Split entries if compiled entry length exceeds the max tokens supported by the ML model."
chunked_entries: list[Entry] = []
chunked_entries: List[Entry] = []
for entry in entries:
compiled_entry_words = entry.compiled.split()
# Drop long words instead of having entry truncated to maintain quality of entry processed by models
@@ -39,7 +38,7 @@ class TextToJsonl(ABC):
chunked_entries.append(entry_chunk)
return chunked_entries
def mark_entries_for_update(self, current_entries: list[Entry], previous_entries: list[Entry], key='compiled', logger=None) -> list[tuple[int, Entry]]:
def mark_entries_for_update(self, current_entries: List[Entry], previous_entries: List[Entry], key='compiled', logger=None) -> List[Tuple[int, Entry]]:
# Hash all current and previous entries to identify new entries
with timer("Hash previous, current entries", logger):
current_entry_hashes = list(map(TextToJsonl.hash_func(key), current_entries))