mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-09 05:39:12 +00:00
Add, test date filter regex and date parsing to get natural date range
This commit is contained in:
@@ -1,8 +1,21 @@
|
|||||||
# Standard Packages
|
# Standard Packages
|
||||||
import re
|
import re
|
||||||
|
from datetime import timedelta, datetime
|
||||||
|
from dateutil.relativedelta import relativedelta, MO
|
||||||
|
from math import inf
|
||||||
|
|
||||||
# External Packages
|
# External Packages
|
||||||
import torch
|
import torch
|
||||||
|
import dateparser as dtparse
|
||||||
|
|
||||||
|
|
||||||
|
# Date Range Filter Regexes
|
||||||
|
# Example filter queries:
|
||||||
|
# - dt>=yesterday dt<"tomorrow"
|
||||||
|
# - dt>="last week"
|
||||||
|
# - dt:"next year"
|
||||||
|
date_regex = r'(?:dt([:><=]{1,2})\"?([\w\-\/]+))?\"?'
|
||||||
|
date_range_regex=f'.*?\s+{date_regex}\s*{date_regex}.*?'
|
||||||
|
|
||||||
|
|
||||||
def date_filter(query, entries, embeddings):
|
def date_filter(query, entries, embeddings):
|
||||||
@@ -30,4 +43,44 @@ def date_filter(query, entries, embeddings):
|
|||||||
del entries[id]
|
del entries[id]
|
||||||
embeddings = torch.cat((embeddings[:id], embeddings[id+1:]))
|
embeddings = torch.cat((embeddings[:id], embeddings[id+1:]))
|
||||||
|
|
||||||
return query, entries, embeddings
|
return query, entries, embeddings
|
||||||
|
|
||||||
|
def parse(date_str, relative_base=None):
|
||||||
|
"Parse date string passed in date filter of query to datetime object"
|
||||||
|
# clean date string to handle future date parsing by date parser
|
||||||
|
clean_date_str = re.sub(r'later|from now|from today', '', date_str)
|
||||||
|
|
||||||
|
# parse date passed in query date filter
|
||||||
|
parsed_date = dtparse.parse(
|
||||||
|
clean_date_str,
|
||||||
|
settings= {
|
||||||
|
'RELATIVE_BASE': relative_base or datetime.now(),
|
||||||
|
'PREFER_DAY_OF_MONTH': 'first',
|
||||||
|
'PREFER_DATES_FROM': 'future'
|
||||||
|
})
|
||||||
|
|
||||||
|
if parsed_date is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return date_to_daterange(parsed_date, date_str)
|
||||||
|
|
||||||
|
|
||||||
|
def date_to_daterange(parsed_date, date_str):
|
||||||
|
"Convert parsed date to date ranges at natural granularity (day, week, month or year)"
|
||||||
|
|
||||||
|
start_of_day = parsed_date.replace(hour=0, minute=0, second=0, microsecond=0)
|
||||||
|
|
||||||
|
if 'year' in date_str:
|
||||||
|
return (datetime(parsed_date.year, 1, 1, 0, 0, 0), datetime(parsed_date.year+1, 1, 1, 0, 0, 0))
|
||||||
|
if 'month' in date_str:
|
||||||
|
start_of_month = datetime(parsed_date.year, parsed_date.month, 1, 0, 0, 0)
|
||||||
|
next_month = start_of_month + relativedelta(months=1)
|
||||||
|
return (start_of_month, next_month)
|
||||||
|
if 'week' in date_str:
|
||||||
|
# if week in date string, dateparser parses it to next week start
|
||||||
|
# so today = end of this week
|
||||||
|
start_of_week = start_of_day - timedelta(days=7)
|
||||||
|
return (start_of_week, start_of_day)
|
||||||
|
else:
|
||||||
|
next_day = start_of_day + relativedelta(days=1)
|
||||||
|
return (start_of_day, next_day)
|
||||||
|
|||||||
44
tests/test_date_filter.py
Normal file
44
tests/test_date_filter.py
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
# Standard Packages
|
||||||
|
import re
|
||||||
|
from datetime import timedelta, datetime
|
||||||
|
|
||||||
|
# Application Packages
|
||||||
|
from src.search_filter import date_filter
|
||||||
|
|
||||||
|
def test_parse():
|
||||||
|
test_now = datetime(1984, 4, 1, 21, 21, 21)
|
||||||
|
|
||||||
|
# day variations
|
||||||
|
assert date_filter.parse('today', relative_base=test_now) == (datetime(1984, 4, 1, 0, 0, 0), datetime(1984, 4, 2, 0, 0, 0))
|
||||||
|
assert date_filter.parse('tomorrow', relative_base=test_now) == (datetime(1984, 4, 2, 0, 0, 0), datetime(1984, 4, 3, 0, 0, 0))
|
||||||
|
assert date_filter.parse('yesterday', relative_base=test_now) == (datetime(1984, 3, 31, 0, 0, 0), datetime(1984, 4, 1, 0, 0, 0))
|
||||||
|
assert date_filter.parse('5 days ago', relative_base=test_now) == (datetime(1984, 3, 27, 0, 0, 0), datetime(1984, 3, 28, 0, 0, 0))
|
||||||
|
|
||||||
|
# week variations
|
||||||
|
assert date_filter.parse('last week', relative_base=test_now) == (datetime(1984, 3, 18, 0, 0, 0), datetime(1984, 3, 25, 0, 0, 0))
|
||||||
|
assert date_filter.parse('2 weeks ago', relative_base=test_now) == (datetime(1984, 3, 11, 0, 0, 0), datetime(1984, 3, 18, 0, 0, 0))
|
||||||
|
|
||||||
|
# month variations
|
||||||
|
assert date_filter.parse('next month', relative_base=test_now) == (datetime(1984, 5, 1, 0, 0, 0), datetime(1984, 6, 1, 0, 0, 0))
|
||||||
|
assert date_filter.parse('2 months ago', relative_base=test_now) == (datetime(1984, 2, 1, 0, 0, 0), datetime(1984, 3, 1, 0, 0, 0))
|
||||||
|
|
||||||
|
# year variations
|
||||||
|
assert date_filter.parse('this year', relative_base=test_now) == (datetime(1984, 1, 1, 0, 0, 0), datetime(1985, 1, 1, 0, 0, 0))
|
||||||
|
assert date_filter.parse('20 years later', relative_base=test_now) == (datetime(2004, 1, 1, 0, 0, 0), datetime(2005, 1, 1, 0, 0, 0))
|
||||||
|
|
||||||
|
|
||||||
|
def test_date_filter_regex():
|
||||||
|
dtrange_match = re.search(date_filter.date_range_regex, 'head dt>"today" dt:"2020-01-01" tail')
|
||||||
|
assert dtrange_match.groups() == ('>', 'today', ':', '2020-01-01')
|
||||||
|
|
||||||
|
dtrange_match = re.search(date_filter.date_range_regex, 'head dt>="today" dt="2020-01-01"')
|
||||||
|
assert dtrange_match.groups() == ('>=', 'today', '=', '2020-01-01')
|
||||||
|
|
||||||
|
dtrange_match = re.search(date_filter.date_range_regex, 'head dt<"today" tail')
|
||||||
|
assert dtrange_match.groups() == ('<', 'today', None, None)
|
||||||
|
|
||||||
|
dtrange_match = re.search(date_filter.date_range_regex, 'head dt<="today"')
|
||||||
|
assert dtrange_match.groups() == ('<=', 'today', None, None)
|
||||||
|
|
||||||
|
dtrange_match = re.search(date_filter.date_range_regex, 'head tail')
|
||||||
|
assert dtrange_match.groups() == (None, None, None, None)
|
||||||
Reference in New Issue
Block a user