From e6db3e3d00a2750d998184d9cbdefb97e4414993 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Thu, 14 Jul 2022 18:13:12 +0400 Subject: [PATCH] Prefer Dates From Future only when specific words in date string - Default to looking at dates from past, as most notes are from past - Look for dates in future for cases where it's obvious query is for dates in the future but dateparser's parse doesn't parse it at all. E.g parse('5 months from now') returns nothing - Setting PREFER_DATES_FROM_FUTURE in this case and passing just parse('5 months') to dateparser.parse works as expected --- src/search_filter/date_filter.py | 6 ++++-- tests/test_date_filter.py | 4 ++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/search_filter/date_filter.py b/src/search_filter/date_filter.py index 0930523b..fe7ee5e5 100644 --- a/src/search_filter/date_filter.py +++ b/src/search_filter/date_filter.py @@ -48,7 +48,9 @@ def date_filter(query, entries, embeddings): def parse(date_str, relative_base=None): "Parse date string passed in date filter of query to datetime object" # clean date string to handle future date parsing by date parser - clean_date_str = re.sub(r'later|from now|from today', '', date_str) + future_strings = ['later', 'from now', 'from today'] + prefer_dates_from = {True: 'future', False: 'past'}[any([True for fstr in future_strings if fstr in date_str])] + clean_date_str = re.sub('|'.join(future_strings), '', date_str) # parse date passed in query date filter parsed_date = dtparse.parse( @@ -56,7 +58,7 @@ def parse(date_str, relative_base=None): settings= { 'RELATIVE_BASE': relative_base or datetime.now(), 'PREFER_DAY_OF_MONTH': 'first', - 'PREFER_DATES_FROM': 'future' + 'PREFER_DATES_FROM': prefer_dates_from }) if parsed_date is None: diff --git a/tests/test_date_filter.py b/tests/test_date_filter.py index 0c30695c..8abec3af 100644 --- a/tests/test_date_filter.py +++ b/tests/test_date_filter.py @@ -26,6 +26,10 @@ def test_parse(): assert date_filter.parse('this year', relative_base=test_now) == (datetime(1984, 1, 1, 0, 0, 0), datetime(1985, 1, 1, 0, 0, 0)) assert date_filter.parse('20 years later', relative_base=test_now) == (datetime(2004, 1, 1, 0, 0, 0), datetime(2005, 1, 1, 0, 0, 0)) + # specific month/date variation + assert date_filter.parse('in august', relative_base=test_now) == (datetime(1983, 8, 1, 0, 0, 0), datetime(1983, 8, 2, 0, 0, 0)) + assert date_filter.parse('on 1983-08-01', relative_base=test_now) == (datetime(1983, 8, 1, 0, 0, 0), datetime(1983, 8, 2, 0, 0, 0)) + def test_date_filter_regex(): dtrange_match = re.search(date_filter.date_range_regex, 'head dt>"today" dt:"2020-01-01" tail')