Create and use a context manager to time code

Use the timer context manager in all places where code was being timed - Benefits - Deduplicate timing code scattered across codebase. - Provides single place to manage perf timing code - Use consistent timing log patterns
2026-03-03 05:29:12 +00:00 · 2023-01-09 19:43:19 -03:00
parent 93f39dbd43
commit aa22d83172
11 changed files with 235 additions and 298 deletions
--- a/src/processor/text_to_jsonl.py
+++ b/src/processor/text_to_jsonl.py
@@ -4,6 +4,7 @@ import hashlib
 import time
 import logging
 from typing import Callable
+from src.utils.helpers import timer

 # Internal Packages
 from src.utils.rawconfig import Entry, TextContentConfig
@@ -40,35 +41,31 @@ class TextToJsonl(ABC):

    def mark_entries_for_update(self, current_entries: list[Entry], previous_entries: list[Entry], key='compiled', logger=None) -> list[tuple[int, Entry]]:
        # Hash all current and previous entries to identify new entries
-        start = time.time()
-        current_entry_hashes = list(map(TextToJsonl.hash_func(key), current_entries))
-        previous_entry_hashes = list(map(TextToJsonl.hash_func(key), previous_entries))
-        end = time.time()
-        logger.debug(f"Hash previous, current entries: {end - start} seconds")
+        with timer("Hash previous, current entries", logger):
+            current_entry_hashes = list(map(TextToJsonl.hash_func(key), current_entries))
+            previous_entry_hashes = list(map(TextToJsonl.hash_func(key), previous_entries))

-        start = time.time()
-        hash_to_current_entries = dict(zip(current_entry_hashes, current_entries))
-        hash_to_previous_entries = dict(zip(previous_entry_hashes, previous_entries))
+        with timer("Identify, Mark, Combine new, existing entries", logger):
+            hash_to_current_entries = dict(zip(current_entry_hashes, current_entries))
+            hash_to_previous_entries = dict(zip(previous_entry_hashes, previous_entries))

-        # All entries that did not exist in the previous set are to be added
-        new_entry_hashes = set(current_entry_hashes) - set(previous_entry_hashes)
-        # All entries that exist in both current and previous sets are kept
-        existing_entry_hashes = set(current_entry_hashes) & set(previous_entry_hashes)
+            # All entries that did not exist in the previous set are to be added
+            new_entry_hashes = set(current_entry_hashes) - set(previous_entry_hashes)
+            # All entries that exist in both current and previous sets are kept
+            existing_entry_hashes = set(current_entry_hashes) & set(previous_entry_hashes)

-        # Mark new entries with -1 id to flag for later embeddings generation
-        new_entries = [
-            (-1, hash_to_current_entries[entry_hash])
-            for entry_hash in new_entry_hashes
-        ]
-        # Set id of existing entries to their previous ids to reuse their existing encoded embeddings
-        existing_entries = [
-            (previous_entry_hashes.index(entry_hash), hash_to_previous_entries[entry_hash])
-            for entry_hash in existing_entry_hashes
-        ]
+            # Mark new entries with -1 id to flag for later embeddings generation
+            new_entries = [
+                (-1, hash_to_current_entries[entry_hash])
+                for entry_hash in new_entry_hashes
+            ]
+            # Set id of existing entries to their previous ids to reuse their existing encoded embeddings
+            existing_entries = [
+                (previous_entry_hashes.index(entry_hash), hash_to_previous_entries[entry_hash])
+                for entry_hash in existing_entry_hashes
+            ]

-        existing_entries_sorted = sorted(existing_entries, key=lambda e: e[0])
-        entries_with_ids = existing_entries_sorted + new_entries
-        end = time.time()
-        logger.debug(f"Identify, Mark, Combine new, existing entries: {end - start} seconds")
+            existing_entries_sorted = sorted(existing_entries, key=lambda e: e[0])
+            entries_with_ids = existing_entries_sorted + new_entries

        return entries_with_ids