From 8dfa0bf047202c9711b65eecfb8d6b988660db8c Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Mon, 29 Apr 2024 11:44:16 +0530 Subject: [PATCH] Simplify task scheduler prompt. No timezone conversion. Infer subject - Make timezone aware scheduling programmatic, instead of asking the chat model to do the conversion. This removes the need for scratchpad and may let smaller models handle the task as well - Make chat model infer subject for email. This should make the notification email more readable - Improve email by using subject in email subject, task heading. Move query to email final paragraph, which is where task metadata should go --- src/khoj/interface/email/task.html | 9 ++-- src/khoj/processor/conversation/prompts.py | 50 +++++++++++----------- src/khoj/routers/api_chat.py | 41 +++++++++--------- src/khoj/routers/email.py | 6 +-- src/khoj/routers/helpers.py | 10 ++--- 5 files changed, 54 insertions(+), 62 deletions(-) diff --git a/src/khoj/interface/email/task.html b/src/khoj/interface/email/task.html index 09035092..86a801ac 100644 --- a/src/khoj/interface/email/task.html +++ b/src/khoj/interface/email/task.html @@ -11,19 +11,20 @@
-

Merge AI with your brain

+

Your Open, Personal AI

Hey {{name}}!

-

I've shared the results you'd requested below:

+

I've shared your scheduled task results below:

-

{{query}}

+

{{subject}}

{{result}}

-

You can view, delete and manage your scheduled tasks on the settings page

+

The scheduled query I ran on your behalf: {query}

+

You can view, delete and manage your scheduled tasks via the settings page

- Khoj

diff --git a/src/khoj/processor/conversation/prompts.py b/src/khoj/processor/conversation/prompts.py index 2c5bea25..dd22ecd3 100644 --- a/src/khoj/processor/conversation/prompts.py +++ b/src/khoj/processor/conversation/prompts.py @@ -512,67 +512,65 @@ Khoj: crontime_prompt = PromptTemplate.from_template( """ You are Khoj, an extremely smart and helpful task scheduling assistant -- Given a user query, you infer the date, time to run the query at as a cronjob time string (converted to UTC time zone) -- Convert the cron job time to run in UTC. Use the scratchpad to calculate the cron job time. -- Infer user's time zone from the current location provided in their message. Think step-by-step. +- Given a user query, infer the date, time to run the query at as a cronjob time string - Use an approximate time that makes sense, if it not unspecified. - Also extract the search query to run at the scheduled time. Add any context required from the chat history to improve the query. -- Return the scratchpad, cronjob time and the search query to run as a JSON object. +- Return a JSON object with the cronjob time, the search query to run and the task subject in it. # Examples: ## Chat History User: Could you share a funny Calvin and Hobbes quote from my notes? AI: Here is one I found: "It's not denial. I'm just selective about the reality I accept." -User: Hahah, nice! Show a new one every morning at 9:40. My Current Location: Shanghai, China +User: Hahah, nice! Show a new one every morning. Khoj: {{ - "Scratchpad": "Shanghai is UTC+8. So, 9:40 in Shanghai is 1:40 UTC. I'll also generalize the search query to get better results.", - "Crontime": "40 1 * * *", - "Query": "/task Share a funny Calvin and Hobbes or Bill Watterson quote from my notes." + "crontime": "0 9 * * *", + "query": "/task Share a funny Calvin and Hobbes or Bill Watterson quote from my notes", + "subject": "Your Calvin and Hobbes Quote for the Day" }} ## Chat History -User: Every Monday evening share the top posts on Hacker News from last week. Format it as a newsletter. My Current Location: Nairobi, Kenya +User: Every monday evening at 6 share the top posts on hacker news from last week. Format it as a newsletter Khoj: {{ - "Scratchpad": "Nairobi is UTC+3. As evening specified, I'll share at 18:30 your time. Which will be 15:30 UTC.", - "Crontime": "30 15 * * 1", - "Query": "/task Top posts last week on Hacker News" + "crontime": "0 18 * * 1", + "query": "/task Top posts last week on Hacker News", + "subject": "Your Weekly Top Hacker News Posts Newsletter" }} ## Chat History -User: What is the latest version of the Khoj python package? +User: What is the latest version of the khoj python package? AI: The latest released Khoj python package version is 1.5.0. -User: Notify me when version 2.0.0 is released. My Current Location: Mexico City, Mexico +User: Notify me when version 2.0.0 is released Khoj: {{ - "Scratchpad": "Mexico City is UTC-6. No time is specified, so I'll notify at 10:00 your time. Which will be 16:00 in UTC. Also I'll ensure the search query doesn't trigger another reminder.", - "Crontime": "0 16 * * *", - "Query": "/task Check if the latest released version of the Khoj python package is >= 2.0.0?" + "crontime": "0 10 * * *", + "query": "/task What is the latest released version of the Khoj python package?", + "subject": "Khoj Python Package Version 2.0.0 Release" }} ## Chat History -User: Tell me the latest local tech news on the first Sunday of every Month. My Current Location: Dublin, Ireland +User: Tell me the latest local tech news on the first sunday of every month Khoj: {{ - "Scratchpad": "Dublin is UTC+1. So, 10:00 in Dublin is 8:00 UTC. First Sunday of every month is 1-7. Also I'll enhance the search query.", - "Crontime": "0 9 1-7 * 0", - "Query": "/task Find the latest tech, AI and engineering news from around Dublin, Ireland" + "crontime": "0 8 1-7 * 0", + "query": "/task Find the latest local tech, AI and engineering news. Format it as a newsletter.", + "subject": "Your Monthly Dose of Local Tech News" }} ## Chat History -User: Inform me when the national election results are officially declared. Run task at 4pm every thursday. My Current Location: Trichy, India +User: Inform me when the national election results are declared. Run task at 4pm every thursday. Khoj: {{ - "Scratchpad": "Trichy is UTC+5:30. So, 4pm in Trichy is 10:30 UTC. Also let's add location details to the search query.", - "Crontime": "30 10 * * 4", - "Query": "/task Check if the Indian national election results are officially declared." + "crontime": "0 16 * * 4", + "query": "/task Check if the Indian national election results are officially declared", + "subject": "Indian National Election Results Declared" }} # Chat History: {chat_history} -User: {query}. My Current Location: {user_location} +User: {query} Khoj: """.strip() ) diff --git a/src/khoj/routers/api_chat.py b/src/khoj/routers/api_chat.py index 4e2f38ab..b6119e12 100644 --- a/src/khoj/routers/api_chat.py +++ b/src/khoj/routers/api_chat.py @@ -399,17 +399,18 @@ async def websocket_endpoint( q = q.replace(f"/{cmd.value}", "").strip() if ConversationCommand.Reminder in conversation_commands: - crontime, inferred_query = await schedule_query(q, location, meta_log) + user_timezone = pytz.timezone(timezone) + crontime, inferred_query, subject = await schedule_query(q, location, meta_log) try: - trigger = CronTrigger.from_crontab(crontime) + trigger = CronTrigger.from_crontab(crontime, user_timezone) except ValueError as e: await send_complete_llm_response(f"Unable to create reminder with crontime schedule: {crontime}") continue # Generate the job id from the hash of inferred_query and crontime - job_id = hashlib.md5(f"{inferred_query}_{crontime}".encode("utf-8")).hexdigest() + job_id = f"job_{user.uuid}_" + hashlib.md5(f"{inferred_query}_{crontime}".encode("utf-8")).hexdigest() query_id = hashlib.md5(f"{inferred_query}".encode("utf-8")).hexdigest() partial_scheduled_chat = functools.partial( - scheduled_chat, inferred_query, q, websocket.user.object, websocket.url + scheduled_chat, inferred_query, q, subject, websocket.user.object, websocket.url ) try: job = state.scheduler.add_job( @@ -419,7 +420,7 @@ async def websocket_endpoint( partial_scheduled_chat, f"{ProcessLock.Operation.SCHEDULED_JOB}_{user.uuid}_{query_id}", ), - id=f"job_{user.uuid}_{job_id}", + id=job_id, name=f"{inferred_query}", max_instances=2, # Allow second instance to kill any previous instance with stale lock jitter=30, @@ -430,17 +431,15 @@ async def websocket_endpoint( ) continue # Display next run time in user timezone instead of UTC - user_timezone = pytz.timezone(timezone) - next_run_time_utc = job.next_run_time.replace(tzinfo=pytz.utc) - next_run_time_user_tz = next_run_time_utc.astimezone(user_timezone) - next_run_time = next_run_time_user_tz.strftime("%Y-%m-%d %H:%M %Z (%z)") + next_run_time = job.next_run_time.strftime("%Y-%m-%d %H:%M %Z (%z)") # Remove /task prefix from inferred_query unprefixed_inferred_query = re.sub(r"^\/task\s*", "", inferred_query) # Create the scheduled task response llm_response = f""" ### 🕒 Scheduled Task - Query: **"{unprefixed_inferred_query}"** -- Schedule: `{crontime}` UTC (+0000) +- Subject: **{subject}** +- Schedule: `{crontime}` - Next Run At: **{next_run_time}**. """.strip() @@ -671,9 +670,10 @@ async def chat( user_name = await aget_user_name(user) if ConversationCommand.Reminder in conversation_commands: - crontime, inferred_query = await schedule_query(q, location, meta_log) + user_timezone = pytz.timezone(timezone) + crontime, inferred_query, subject = await schedule_query(q, location, meta_log) try: - trigger = CronTrigger.from_crontab(crontime) + trigger = CronTrigger.from_crontab(crontime, user_timezone) except ValueError as e: return Response( content=f"Unable to create reminder with crontime schedule: {crontime}", @@ -682,15 +682,17 @@ async def chat( ) # Generate the job id from the hash of inferred_query and crontime - job_id = hashlib.md5(f"{inferred_query}_{crontime}".encode("utf-8")).hexdigest() + job_id = f"job_{user.uuid}_" + hashlib.md5(f"{inferred_query}_{crontime}".encode("utf-8")).hexdigest() query_id = hashlib.md5(f"{inferred_query}".encode("utf-8")).hexdigest() - partial_scheduled_chat = functools.partial(scheduled_chat, inferred_query, q, request.user.object, request.url) + partial_scheduled_chat = functools.partial( + scheduled_chat, inferred_query, q, subject, request.user.object, request.url + ) try: job = state.scheduler.add_job( run_with_process_lock, trigger=trigger, args=(partial_scheduled_chat, f"{ProcessLock.Operation.SCHEDULED_JOB}_{user.uuid}_{query_id}"), - id=f"job_{user.uuid}_{job_id}", + id=job_id, name=f"{inferred_query}", max_instances=2, # Allow second instance to kill any previous instance with stale lock jitter=30, @@ -701,19 +703,16 @@ async def chat( media_type="text/plain", status_code=500, ) - # Display next run time in user timezone instead of UTC - user_timezone = pytz.timezone(timezone) - next_run_time_utc = job.next_run_time.replace(tzinfo=pytz.utc) - next_run_time_user_tz = next_run_time_utc.astimezone(user_timezone) - next_run_time = next_run_time_user_tz.strftime("%Y-%m-%d %H:%M %Z (%z)") + next_run_time = job.next_run_time.strftime("%Y-%m-%d %H:%M %Z (%z)") # Remove /task prefix from inferred_query unprefixed_inferred_query = re.sub(r"^\/task\s*", "", inferred_query) # Create the scheduled task response llm_response = f""" ### 🕒 Scheduled Task - Query: **"{unprefixed_inferred_query}"** -- Schedule: `{crontime}` UTC (+0000) +- Subject: **{subject}** +- Schedule: `{crontime}` - Next Run At: **{next_run_time}**.' """.strip() diff --git a/src/khoj/routers/email.py b/src/khoj/routers/email.py index 8e6464d3..bb5cdd5c 100644 --- a/src/khoj/routers/email.py +++ b/src/khoj/routers/email.py @@ -50,7 +50,7 @@ def send_welcome_email(name, email): ) -def send_task_email(name, email, query, result): +def send_task_email(name, email, query, result, subject): if not is_resend_enabled(): logger.debug("Email sending disabled") return @@ -60,13 +60,11 @@ def send_task_email(name, email, query, result): html_result = markdown_it.MarkdownIt().render(result) html_content = template.render(name=name, query=query, result=html_result) - query_for_subject_line = query.replace("\n", " ").replace('"', "").replace("'", "") - r = resend.Emails.send( { "from": "Khoj ", "to": email, - "subject": f'✨ Your Task Results for "{query_for_subject_line}"', + "subject": f"✨ {subject}", "html": html_content, } ) diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index e11c1cff..c6974ef5 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -332,14 +332,10 @@ async def schedule_query(q: str, location_data: LocationData, conversation_histo """ Schedule the date, time to run the query. Assume the server timezone is UTC. """ - user_location = ( - f"{location_data.city}, {location_data.region}, {location_data.country}" if location_data else "Greenwich" - ) chat_history = construct_chat_history(conversation_history) crontime_prompt = prompts.crontime_prompt.format( query=q, - user_location=user_location, chat_history=chat_history, ) @@ -351,7 +347,7 @@ async def schedule_query(q: str, location_data: LocationData, conversation_histo response: Dict[str, str] = json.loads(raw_response) if not response or not isinstance(response, Dict) or len(response) != 3: raise AssertionError(f"Invalid response for scheduling query : {response}") - return tuple(response.values())[1:] + return response.get("crontime"), response.get("query"), response.get("subject") except Exception: raise AssertionError(f"Invalid response for scheduling query: {raw_response}") @@ -871,7 +867,7 @@ def should_notify(original_query: str, executed_query: str, ai_response: str) -> return True -def scheduled_chat(executing_query: str, scheduling_query: str, user: KhojUser, calling_url: URL): +def scheduled_chat(executing_query: str, scheduling_query: str, subject: str, user: KhojUser, calling_url: URL): # Extract relevant params from the original URL scheme = "http" if not calling_url.is_secure else "https" query_dict = parse_qs(calling_url.query) @@ -913,6 +909,6 @@ def scheduled_chat(executing_query: str, scheduling_query: str, user: KhojUser, # Notify user if the AI response is satisfactory if should_notify(original_query=scheduling_query, executed_query=cleaned_query, ai_response=ai_response): if is_resend_enabled(): - send_task_email(user.get_short_name(), user.email, scheduling_query, ai_response) + send_task_email(user.get_short_name(), user.email, scheduling_query, ai_response, subject) else: return raw_response