Create and use a context manager to time code

Use the timer context manager in all places where code was being timed - Benefits - Deduplicate timing code scattered across codebase. - Provides single place to manage perf timing code - Use consistent timing log patterns
2026-03-05 21:29:11 +00:00 · 2023-01-09 19:43:19 -03:00
parent 93f39dbd43
commit aa22d83172
11 changed files with 235 additions and 298 deletions
--- a/src/search_filter/date_filter.py
+++ b/src/search_filter/date_filter.py
@@ -12,7 +12,7 @@ import dateparser as dtparse

 # Internal Packages
 from src.search_filter.base_filter import BaseFilter
-from src.utils.helpers import LRU
+from src.utils.helpers import LRU, timer


 logger = logging.getLogger(__name__)
@@ -34,19 +34,16 @@ class DateFilter(BaseFilter):


    def load(self, entries, *args, **kwargs):
-        start = time.time()
-        for id, entry in enumerate(entries):
-            # Extract dates from entry
-            for date_in_entry_string in re.findall(r'\d{4}-\d{2}-\d{2}', getattr(entry, self.entry_key)):
-                # Convert date string in entry to unix timestamp
-                try:
-                    date_in_entry = datetime.strptime(date_in_entry_string, '%Y-%m-%d').timestamp()
-                except ValueError:
-                    continue
-                self.date_to_entry_ids[date_in_entry].add(id)
-        end = time.time()
-        logger.debug(f"Created date filter index: {end - start} seconds")
-
+        with timer("Created date filter index", logger):
+            for id, entry in enumerate(entries):
+                # Extract dates from entry
+                for date_in_entry_string in re.findall(r'\d{4}-\d{2}-\d{2}', getattr(entry, self.entry_key)):
+                    # Convert date string in entry to unix timestamp
+                    try:
+                        date_in_entry = datetime.strptime(date_in_entry_string, '%Y-%m-%d').timestamp()
+                    except ValueError:
+                        continue
+                    self.date_to_entry_ids[date_in_entry].add(id)

    def can_filter(self, raw_query):
        "Check if query contains date filters"
@@ -56,10 +53,8 @@ class DateFilter(BaseFilter):
    def apply(self, query, entries):
        "Find entries containing any dates that fall within date range specified in query"
        # extract date range specified in date filter of query
-        start = time.time()
-        query_daterange = self.extract_date_range(query)
-        end = time.time()
-        logger.debug(f"Extract date range to filter from query: {end - start} seconds")
+        with timer("Extract date range to filter from query", logger):
+            query_daterange = self.extract_date_range(query)

        # if no date in query, return all entries
        if query_daterange is None:
@@ -80,14 +75,12 @@ class DateFilter(BaseFilter):
            self.load(entries)

        # find entries containing any dates that fall with date range specified in query
-        start = time.time()
-        entries_to_include = set()
-        for date_in_entry in self.date_to_entry_ids.keys():
-            # Check if date in entry is within date range specified in query
-            if query_daterange[0] <= date_in_entry < query_daterange[1]:
-                entries_to_include |= self.date_to_entry_ids[date_in_entry]
-        end = time.time()
-        logger.debug(f"Mark entries satisfying filter: {end - start} seconds")
+        with timer("Mark entries satisfying filter", logger):
+            entries_to_include = set()
+            for date_in_entry in self.date_to_entry_ids.keys():
+                # Check if date in entry is within date range specified in query
+                if query_daterange[0] <= date_in_entry < query_daterange[1]:
+                    entries_to_include |= self.date_to_entry_ids[date_in_entry]

        # cache results
        self.cache[cache_key] = entries_to_include
--- a/src/search_filter/file_filter.py
+++ b/src/search_filter/file_filter.py
@@ -7,7 +7,7 @@ from collections import defaultdict

 # Internal Packages
 from src.search_filter.base_filter import BaseFilter
-from src.utils.helpers import LRU
+from src.utils.helpers import LRU, timer


 logger = logging.getLogger(__name__)
@@ -22,32 +22,28 @@ class FileFilter(BaseFilter):
        self.cache = LRU()

    def load(self, entries, *args, **kwargs):
-        start = time.time()
-        for id, entry in enumerate(entries):
-            self.file_to_entry_map[getattr(entry, self.entry_key)].add(id)
-        end = time.time()
-        logger.debug(f"Created file filter index: {end - start} seconds")
+        with timer("Created file filter index", logger):
+            for id, entry in enumerate(entries):
+                self.file_to_entry_map[getattr(entry, self.entry_key)].add(id)

    def can_filter(self, raw_query):
        return re.search(self.file_filter_regex, raw_query) is not None

    def apply(self, query, entries):
        # Extract file filters from raw query
-        start = time.time()
-        raw_files_to_search = re.findall(self.file_filter_regex, query)
-        if not raw_files_to_search:
-            return query, set(range(len(entries)))
+        with timer("Extract files_to_search from query", logger):
+            raw_files_to_search = re.findall(self.file_filter_regex, query)
+            if not raw_files_to_search:
+                return query, set(range(len(entries)))

-        # Convert simple file filters with no path separator into regex
-        # e.g. "file:notes.org" -> "file:.*notes.org"
-        files_to_search = []
-        for file in sorted(raw_files_to_search):
-            if '/' not in file and '\\' not in file and '*' not in file:
-                files_to_search += [f'*{file}']
-            else:
-                files_to_search += [file]
-        end = time.time()
-        logger.debug(f"Extract files_to_search from query: {end - start} seconds")
+            # Convert simple file filters with no path separator into regex
+            # e.g. "file:notes.org" -> "file:.*notes.org"
+            files_to_search = []
+            for file in sorted(raw_files_to_search):
+                if '/' not in file and '\\' not in file and '*' not in file:
+                    files_to_search += [f'*{file}']
+                else:
+                    files_to_search += [file]

        # Return item from cache if exists
        query = re.sub(self.file_filter_regex, '', query).strip()
@@ -61,17 +57,13 @@ class FileFilter(BaseFilter):
            self.load(entries, regenerate=False)

        # Mark entries that contain any blocked_words for exclusion
-        start = time.time()
-
-        included_entry_indices = set.union(*[self.file_to_entry_map[entry_file]
-                for entry_file in self.file_to_entry_map.keys()
-                for search_file in files_to_search
-                if fnmatch.fnmatch(entry_file, search_file)], set())
-        if not included_entry_indices:
-            return query, {}
-
-        end = time.time()
-        logger.debug(f"Mark entries satisfying filter: {end - start} seconds")
+        with timer("Mark entries satisfying filter", logger):
+            included_entry_indices = set.union(*[self.file_to_entry_map[entry_file]
+                    for entry_file in self.file_to_entry_map.keys()
+                    for search_file in files_to_search
+                    if fnmatch.fnmatch(entry_file, search_file)], set())
+            if not included_entry_indices:
+                return query, {}

        # Cache results
        self.cache[cache_key] = included_entry_indices
--- a/src/search_filter/word_filter.py
+++ b/src/search_filter/word_filter.py
@@ -6,7 +6,7 @@ from collections import defaultdict

 # Internal Packages
 from src.search_filter.base_filter import BaseFilter
-from src.utils.helpers import LRU
+from src.utils.helpers import LRU, timer


 logger = logging.getLogger(__name__)
@@ -24,17 +24,15 @@ class WordFilter(BaseFilter):


    def load(self, entries, *args, **kwargs):
-        start = time.time()
-        self.cache = {}  # Clear cache on filter (re-)load
-        entry_splitter = r',|\.| |\]|\[\(|\)|\{|\}|\<|\>|\t|\n|\:|\;|\?|\!|\(|\)|\&|\^|\$|\@|\%|\+|\=|\/|\\|\||\~|\`|\"|\''
-        # Create map of words to entries they exist in
-        for entry_index, entry in enumerate(entries):
-            for word in re.split(entry_splitter, getattr(entry, self.entry_key).lower()):
-                if word == '':
-                    continue
-                self.word_to_entry_index[word].add(entry_index)
-        end = time.time()
-        logger.debug(f"Created word filter index: {end - start} seconds")
+        with timer("Created word filter index", logger):
+            self.cache = {}  # Clear cache on filter (re-)load
+            entry_splitter = r',|\.| |\]|\[\(|\)|\{|\}|\<|\>|\t|\n|\:|\;|\?|\!|\(|\)|\&|\^|\$|\@|\%|\+|\=|\/|\\|\||\~|\`|\"|\''
+            # Create map of words to entries they exist in
+            for entry_index, entry in enumerate(entries):
+                for word in re.split(entry_splitter, getattr(entry, self.entry_key).lower()):
+                    if word == '':
+                        continue
+                    self.word_to_entry_index[word].add(entry_index)

        return self.word_to_entry_index

@@ -50,14 +48,10 @@ class WordFilter(BaseFilter):
    def apply(self, query, entries):
        "Find entries containing required and not blocked words specified in query"
        # Separate natural query from required, blocked words filters
-        start = time.time()
-
-        required_words = set([word.lower() for word in re.findall(self.required_regex, query)])
-        blocked_words = set([word.lower() for word in re.findall(self.blocked_regex, query)])
-        query = re.sub(self.blocked_regex, '', re.sub(self.required_regex, '', query)).strip()
-
-        end = time.time()
-        logger.debug(f"Extract required, blocked filters from query: {end - start} seconds")
+        with timer("Extract required, blocked filters from query", logger):
+            required_words = set([word.lower() for word in re.findall(self.required_regex, query)])
+            blocked_words = set([word.lower() for word in re.findall(self.blocked_regex, query)])
+            query = re.sub(self.blocked_regex, '', re.sub(self.required_regex, '', query)).strip()

        if len(required_words) == 0 and len(blocked_words) == 0:
            return query, set(range(len(entries)))
@@ -72,20 +66,16 @@ class WordFilter(BaseFilter):
        if not self.word_to_entry_index:
            self.load(entries, regenerate=False)

-        start = time.time()
-
        # mark entries that contain all required_words for inclusion
-        entries_with_all_required_words = set(range(len(entries)))
-        if len(required_words) > 0:
-            entries_with_all_required_words = set.intersection(*[self.word_to_entry_index.get(word, set()) for word in required_words])
+        with timer("Mark entries satisfying filter", logger):
+            entries_with_all_required_words = set(range(len(entries)))
+            if len(required_words) > 0:
+                entries_with_all_required_words = set.intersection(*[self.word_to_entry_index.get(word, set()) for word in required_words])

-        # mark entries that contain any blocked_words for exclusion
-        entries_with_any_blocked_words = set()
-        if len(blocked_words) > 0:
-            entries_with_any_blocked_words = set.union(*[self.word_to_entry_index.get(word, set()) for word in blocked_words])
-
-        end = time.time()
-        logger.debug(f"Mark entries satisfying filter: {end - start} seconds")
+            # mark entries that contain any blocked_words for exclusion
+            entries_with_any_blocked_words = set()
+            if len(blocked_words) > 0:
+                entries_with_any_blocked_words = set.union(*[self.word_to_entry_index.get(word, set()) for word in blocked_words])

        # get entries satisfying inclusion and exclusion filters
        included_entry_indices = entries_with_all_required_words - entries_with_any_blocked_words