From 3aac3c7d5293c6ba160dd6d4384e62904832071c Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Thu, 14 Jul 2022 21:54:04 +0400 Subject: [PATCH] Run explicit filter on raw entry, add more terms to split entries by - With \t Last Word in Headings was suffixed by \t and so couldn't be filtered by - User interacts with raw entries, so run explicit filters on raw entry - For semantic search using the filtered entry is cleaner, still --- src/search_filter/explicit_filter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/search_filter/explicit_filter.py b/src/search_filter/explicit_filter.py index 363dbd71..e6a7f551 100644 --- a/src/search_filter/explicit_filter.py +++ b/src/search_filter/explicit_filter.py @@ -18,8 +18,8 @@ def explicit_filter(raw_query, entries, embeddings): entries_by_word_set = [set(word.lower() for word in re.split( - r',|\.| |\]|\[\(|\)|\{|\}', # split on fullstop, comma or any brackets - entry[0]) + r',|\.| |\]|\[\(|\)|\{|\}|\t|\n|\:', # split on fullstop, comma or any brackets + entry[1]) if word != "") for entry in entries]