Create and use a context manager to time code

Use the timer context manager in all places where code was being timed

- Benefits
  - Deduplicate timing code scattered across codebase.
  - Provides single place to manage perf timing code
  - Use consistent timing log patterns
This commit is contained in:
Debanjum Singh Solanky
2023-01-09 19:43:19 -03:00
parent 93f39dbd43
commit aa22d83172
11 changed files with 235 additions and 298 deletions

View File

@@ -12,7 +12,7 @@ import dateparser as dtparse
# Internal Packages
from src.search_filter.base_filter import BaseFilter
from src.utils.helpers import LRU
from src.utils.helpers import LRU, timer
logger = logging.getLogger(__name__)
@@ -34,19 +34,16 @@ class DateFilter(BaseFilter):
def load(self, entries, *args, **kwargs):
start = time.time()
for id, entry in enumerate(entries):
# Extract dates from entry
for date_in_entry_string in re.findall(r'\d{4}-\d{2}-\d{2}', getattr(entry, self.entry_key)):
# Convert date string in entry to unix timestamp
try:
date_in_entry = datetime.strptime(date_in_entry_string, '%Y-%m-%d').timestamp()
except ValueError:
continue
self.date_to_entry_ids[date_in_entry].add(id)
end = time.time()
logger.debug(f"Created date filter index: {end - start} seconds")
with timer("Created date filter index", logger):
for id, entry in enumerate(entries):
# Extract dates from entry
for date_in_entry_string in re.findall(r'\d{4}-\d{2}-\d{2}', getattr(entry, self.entry_key)):
# Convert date string in entry to unix timestamp
try:
date_in_entry = datetime.strptime(date_in_entry_string, '%Y-%m-%d').timestamp()
except ValueError:
continue
self.date_to_entry_ids[date_in_entry].add(id)
def can_filter(self, raw_query):
"Check if query contains date filters"
@@ -56,10 +53,8 @@ class DateFilter(BaseFilter):
def apply(self, query, entries):
"Find entries containing any dates that fall within date range specified in query"
# extract date range specified in date filter of query
start = time.time()
query_daterange = self.extract_date_range(query)
end = time.time()
logger.debug(f"Extract date range to filter from query: {end - start} seconds")
with timer("Extract date range to filter from query", logger):
query_daterange = self.extract_date_range(query)
# if no date in query, return all entries
if query_daterange is None:
@@ -80,14 +75,12 @@ class DateFilter(BaseFilter):
self.load(entries)
# find entries containing any dates that fall with date range specified in query
start = time.time()
entries_to_include = set()
for date_in_entry in self.date_to_entry_ids.keys():
# Check if date in entry is within date range specified in query
if query_daterange[0] <= date_in_entry < query_daterange[1]:
entries_to_include |= self.date_to_entry_ids[date_in_entry]
end = time.time()
logger.debug(f"Mark entries satisfying filter: {end - start} seconds")
with timer("Mark entries satisfying filter", logger):
entries_to_include = set()
for date_in_entry in self.date_to_entry_ids.keys():
# Check if date in entry is within date range specified in query
if query_daterange[0] <= date_in_entry < query_daterange[1]:
entries_to_include |= self.date_to_entry_ids[date_in_entry]
# cache results
self.cache[cache_key] = entries_to_include

View File

@@ -7,7 +7,7 @@ from collections import defaultdict
# Internal Packages
from src.search_filter.base_filter import BaseFilter
from src.utils.helpers import LRU
from src.utils.helpers import LRU, timer
logger = logging.getLogger(__name__)
@@ -22,32 +22,28 @@ class FileFilter(BaseFilter):
self.cache = LRU()
def load(self, entries, *args, **kwargs):
start = time.time()
for id, entry in enumerate(entries):
self.file_to_entry_map[getattr(entry, self.entry_key)].add(id)
end = time.time()
logger.debug(f"Created file filter index: {end - start} seconds")
with timer("Created file filter index", logger):
for id, entry in enumerate(entries):
self.file_to_entry_map[getattr(entry, self.entry_key)].add(id)
def can_filter(self, raw_query):
return re.search(self.file_filter_regex, raw_query) is not None
def apply(self, query, entries):
# Extract file filters from raw query
start = time.time()
raw_files_to_search = re.findall(self.file_filter_regex, query)
if not raw_files_to_search:
return query, set(range(len(entries)))
with timer("Extract files_to_search from query", logger):
raw_files_to_search = re.findall(self.file_filter_regex, query)
if not raw_files_to_search:
return query, set(range(len(entries)))
# Convert simple file filters with no path separator into regex
# e.g. "file:notes.org" -> "file:.*notes.org"
files_to_search = []
for file in sorted(raw_files_to_search):
if '/' not in file and '\\' not in file and '*' not in file:
files_to_search += [f'*{file}']
else:
files_to_search += [file]
end = time.time()
logger.debug(f"Extract files_to_search from query: {end - start} seconds")
# Convert simple file filters with no path separator into regex
# e.g. "file:notes.org" -> "file:.*notes.org"
files_to_search = []
for file in sorted(raw_files_to_search):
if '/' not in file and '\\' not in file and '*' not in file:
files_to_search += [f'*{file}']
else:
files_to_search += [file]
# Return item from cache if exists
query = re.sub(self.file_filter_regex, '', query).strip()
@@ -61,17 +57,13 @@ class FileFilter(BaseFilter):
self.load(entries, regenerate=False)
# Mark entries that contain any blocked_words for exclusion
start = time.time()
included_entry_indices = set.union(*[self.file_to_entry_map[entry_file]
for entry_file in self.file_to_entry_map.keys()
for search_file in files_to_search
if fnmatch.fnmatch(entry_file, search_file)], set())
if not included_entry_indices:
return query, {}
end = time.time()
logger.debug(f"Mark entries satisfying filter: {end - start} seconds")
with timer("Mark entries satisfying filter", logger):
included_entry_indices = set.union(*[self.file_to_entry_map[entry_file]
for entry_file in self.file_to_entry_map.keys()
for search_file in files_to_search
if fnmatch.fnmatch(entry_file, search_file)], set())
if not included_entry_indices:
return query, {}
# Cache results
self.cache[cache_key] = included_entry_indices

View File

@@ -6,7 +6,7 @@ from collections import defaultdict
# Internal Packages
from src.search_filter.base_filter import BaseFilter
from src.utils.helpers import LRU
from src.utils.helpers import LRU, timer
logger = logging.getLogger(__name__)
@@ -24,17 +24,15 @@ class WordFilter(BaseFilter):
def load(self, entries, *args, **kwargs):
start = time.time()
self.cache = {} # Clear cache on filter (re-)load
entry_splitter = r',|\.| |\]|\[\(|\)|\{|\}|\<|\>|\t|\n|\:|\;|\?|\!|\(|\)|\&|\^|\$|\@|\%|\+|\=|\/|\\|\||\~|\`|\"|\''
# Create map of words to entries they exist in
for entry_index, entry in enumerate(entries):
for word in re.split(entry_splitter, getattr(entry, self.entry_key).lower()):
if word == '':
continue
self.word_to_entry_index[word].add(entry_index)
end = time.time()
logger.debug(f"Created word filter index: {end - start} seconds")
with timer("Created word filter index", logger):
self.cache = {} # Clear cache on filter (re-)load
entry_splitter = r',|\.| |\]|\[\(|\)|\{|\}|\<|\>|\t|\n|\:|\;|\?|\!|\(|\)|\&|\^|\$|\@|\%|\+|\=|\/|\\|\||\~|\`|\"|\''
# Create map of words to entries they exist in
for entry_index, entry in enumerate(entries):
for word in re.split(entry_splitter, getattr(entry, self.entry_key).lower()):
if word == '':
continue
self.word_to_entry_index[word].add(entry_index)
return self.word_to_entry_index
@@ -50,14 +48,10 @@ class WordFilter(BaseFilter):
def apply(self, query, entries):
"Find entries containing required and not blocked words specified in query"
# Separate natural query from required, blocked words filters
start = time.time()
required_words = set([word.lower() for word in re.findall(self.required_regex, query)])
blocked_words = set([word.lower() for word in re.findall(self.blocked_regex, query)])
query = re.sub(self.blocked_regex, '', re.sub(self.required_regex, '', query)).strip()
end = time.time()
logger.debug(f"Extract required, blocked filters from query: {end - start} seconds")
with timer("Extract required, blocked filters from query", logger):
required_words = set([word.lower() for word in re.findall(self.required_regex, query)])
blocked_words = set([word.lower() for word in re.findall(self.blocked_regex, query)])
query = re.sub(self.blocked_regex, '', re.sub(self.required_regex, '', query)).strip()
if len(required_words) == 0 and len(blocked_words) == 0:
return query, set(range(len(entries)))
@@ -72,20 +66,16 @@ class WordFilter(BaseFilter):
if not self.word_to_entry_index:
self.load(entries, regenerate=False)
start = time.time()
# mark entries that contain all required_words for inclusion
entries_with_all_required_words = set(range(len(entries)))
if len(required_words) > 0:
entries_with_all_required_words = set.intersection(*[self.word_to_entry_index.get(word, set()) for word in required_words])
with timer("Mark entries satisfying filter", logger):
entries_with_all_required_words = set(range(len(entries)))
if len(required_words) > 0:
entries_with_all_required_words = set.intersection(*[self.word_to_entry_index.get(word, set()) for word in required_words])
# mark entries that contain any blocked_words for exclusion
entries_with_any_blocked_words = set()
if len(blocked_words) > 0:
entries_with_any_blocked_words = set.union(*[self.word_to_entry_index.get(word, set()) for word in blocked_words])
end = time.time()
logger.debug(f"Mark entries satisfying filter: {end - start} seconds")
# mark entries that contain any blocked_words for exclusion
entries_with_any_blocked_words = set()
if len(blocked_words) > 0:
entries_with_any_blocked_words = set.union(*[self.word_to_entry_index.get(word, set()) for word in blocked_words])
# get entries satisfying inclusion and exclusion filters
included_entry_indices = entries_with_all_required_words - entries_with_any_blocked_words