Make cross-encoder re-rank results if query param set on /search API

- Improve search speed by ~10x
  Tested on corpus of 125K lines, 12.5K entries

- Allow cross-encoder to re-rank results by settings &?r=true when querying /search API
  - It's an optional param that default to False
  - Earlier all results were re-ranked by cross-encoder
  - Making this configurable allows for much faster results, if desired
    but for lower accuracy
This commit is contained in:
Debanjum Singh Solanky
2022-07-26 22:56:36 +04:00
parent b1e64fd4a8
commit 1168244c92
4 changed files with 22 additions and 19 deletions

View File

@@ -59,7 +59,7 @@ async def config_data(updated_config: FullConfig):
return config
@app.get('/search')
def search(q: str, n: Optional[int] = 5, t: Optional[SearchType] = None):
def search(q: str, n: Optional[int] = 5, t: Optional[SearchType] = None, r: Optional[bool] = False):
if q is None or q == '':
print(f'No query param (q) passed in API call to initiate search')
return {}
@@ -72,7 +72,7 @@ def search(q: str, n: Optional[int] = 5, t: Optional[SearchType] = None):
if (t == SearchType.Org or t == None) and model.orgmode_search:
# query org-mode notes
query_start = time.time()
hits, entries = text_search.query(user_query, model.orgmode_search, device=device, filters=[DateFilter(), ExplicitFilter()], verbose=verbose)
hits, entries = text_search.query(user_query, model.orgmode_search, rank_results=r, device=device, filters=[DateFilter(), ExplicitFilter()], verbose=verbose)
query_end = time.time()
# collate and return results
@@ -83,7 +83,7 @@ def search(q: str, n: Optional[int] = 5, t: Optional[SearchType] = None):
if (t == SearchType.Music or t == None) and model.music_search:
# query music library
query_start = time.time()
hits, entries = text_search.query(user_query, model.music_search, device=device, filters=[DateFilter(), ExplicitFilter()], verbose=verbose)
hits, entries = text_search.query(user_query, model.music_search, rank_results=r, device=device, filters=[DateFilter(), ExplicitFilter()], verbose=verbose)
query_end = time.time()
# collate and return results
@@ -94,7 +94,7 @@ def search(q: str, n: Optional[int] = 5, t: Optional[SearchType] = None):
if (t == SearchType.Markdown or t == None) and model.orgmode_search:
# query markdown files
query_start = time.time()
hits, entries = text_search.query(user_query, model.markdown_search, device=device, filters=[ExplicitFilter(), DateFilter()], verbose=verbose)
hits, entries = text_search.query(user_query, model.markdown_search, rank_results=r, device=device, filters=[ExplicitFilter(), DateFilter()], verbose=verbose)
query_end = time.time()
# collate and return results
@@ -105,7 +105,7 @@ def search(q: str, n: Optional[int] = 5, t: Optional[SearchType] = None):
if (t == SearchType.Ledger or t == None) and model.ledger_search:
# query transactions
query_start = time.time()
hits, entries = text_search.query(user_query, model.ledger_search, filters=[ExplicitFilter(), DateFilter()], verbose=verbose)
hits, entries = text_search.query(user_query, model.ledger_search, rank_results=r, device=device, filters=[ExplicitFilter(), DateFilter()], verbose=verbose)
query_end = time.time()
# collate and return results