mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-05 13:21:18 +00:00
Add method to extract filter terms from query to all filters
- Test the get_filter_term method in all 3 word, file, date filters - Make the existing can_filter method by default in base filter abstract class
This commit is contained in:
@@ -12,9 +12,12 @@ class BaseFilter(ABC):
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def can_filter(self, raw_query: str) -> bool:
|
||||
def get_filter_terms(self, query: str) -> List[str]:
|
||||
...
|
||||
|
||||
def can_filter(self, raw_query: str) -> bool:
|
||||
return len(self.get_filter_terms(raw_query)) > 0
|
||||
|
||||
@abstractmethod
|
||||
def apply(self, query: str, entries: List[Entry]) -> Tuple[str, Set[int]]:
|
||||
...
|
||||
|
||||
@@ -3,6 +3,7 @@ import re
|
||||
import logging
|
||||
from collections import defaultdict
|
||||
from datetime import timedelta, datetime
|
||||
from typing import List
|
||||
from dateutil.relativedelta import relativedelta
|
||||
from math import inf
|
||||
|
||||
@@ -45,9 +46,9 @@ class DateFilter(BaseFilter):
|
||||
continue
|
||||
self.date_to_entry_ids[date_in_entry].add(id)
|
||||
|
||||
def can_filter(self, raw_query):
|
||||
"Check if query contains date filters"
|
||||
return self.extract_date_range(raw_query) is not None
|
||||
def get_filter_terms(self, query: str) -> List[str]:
|
||||
"Get all filter terms in query"
|
||||
return [f"dt{item[0]}'{item[1]}'" for item in re.findall(self.date_regex, query)]
|
||||
|
||||
def defilter(self, query):
|
||||
# remove date range filter from query
|
||||
|
||||
@@ -3,6 +3,7 @@ import re
|
||||
import fnmatch
|
||||
import logging
|
||||
from collections import defaultdict
|
||||
from typing import List
|
||||
|
||||
# Internal Packages
|
||||
from khoj.search_filter.base_filter import BaseFilter
|
||||
@@ -25,8 +26,9 @@ class FileFilter(BaseFilter):
|
||||
for id, entry in enumerate(entries):
|
||||
self.file_to_entry_map[getattr(entry, self.entry_key)].add(id)
|
||||
|
||||
def can_filter(self, raw_query):
|
||||
return re.search(self.file_filter_regex, raw_query) is not None
|
||||
def get_filter_terms(self, query: str) -> List[str]:
|
||||
"Get all filter terms in query"
|
||||
return [f'file:"{term}"' for term in re.findall(self.file_filter_regex, query)]
|
||||
|
||||
def defilter(self, query: str) -> str:
|
||||
return re.sub(self.file_filter_regex, "", query).strip()
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
import re
|
||||
import logging
|
||||
from collections import defaultdict
|
||||
from typing import List
|
||||
|
||||
# Internal Packages
|
||||
from khoj.search_filter.base_filter import BaseFilter
|
||||
@@ -36,12 +37,11 @@ class WordFilter(BaseFilter):
|
||||
|
||||
return self.word_to_entry_index
|
||||
|
||||
def can_filter(self, raw_query):
|
||||
"Check if query contains word filters"
|
||||
required_words = re.findall(self.required_regex, raw_query)
|
||||
blocked_words = re.findall(self.blocked_regex, raw_query)
|
||||
|
||||
return len(required_words) != 0 or len(blocked_words) != 0
|
||||
def get_filter_terms(self, query: str) -> List[str]:
|
||||
"Get all filter terms in query"
|
||||
required_terms = [f"+{required_term}" for required_term in re.findall(self.required_regex, query)]
|
||||
blocked_terms = [f"-{blocked_term}" for blocked_term in re.findall(self.blocked_regex, query)]
|
||||
return required_terms + blocked_terms
|
||||
|
||||
def defilter(self, query: str) -> str:
|
||||
return re.sub(self.blocked_regex, "", re.sub(self.required_regex, "", query)).strip()
|
||||
|
||||
Reference in New Issue
Block a user