From c11742f443d0d52a4cac554d190c5b9b5037c573 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Tue, 12 Mar 2024 02:59:32 +0530 Subject: [PATCH 01/42] Add chat actor to schedule run query for user at specified times - Detect when user intends to schedule a task, aka reminder Add new output mode: reminder. Add example of selecting the reminder output mode - Extract schedule time (as cron timestring) and inferred query to run from user message - Use APScheduler to call chat with inferred query at scheduled time - Handle reminder scheduling from both websocket and http chat requests - Support constructing scheduled task using chat history as context Pass chat history to scheduled query generator for improved context for scheduled task generation --- pyproject.toml | 1 + src/khoj/main.py | 5 ++ src/khoj/processor/conversation/prompts.py | 55 ++++++++++++++- src/khoj/routers/api_chat.py | 81 +++++++++++++++++++++- src/khoj/routers/helpers.py | 32 ++++++++- src/khoj/utils/helpers.py | 5 +- src/khoj/utils/state.py | 2 + 7 files changed, 175 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 304b3886..76928771 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -79,6 +79,7 @@ dependencies = [ "websockets == 12.0", "psutil >= 5.8.0", "huggingface-hub >= 0.22.2", + "apscheduler ~= 3.10.0", ] dynamic = ["version"] diff --git a/src/khoj/main.py b/src/khoj/main.py index 745b77fb..74807137 100644 --- a/src/khoj/main.py +++ b/src/khoj/main.py @@ -23,6 +23,7 @@ warnings.filterwarnings("ignore", message=r"legacy way to download files from th import uvicorn import django +from apscheduler.schedulers.background import BackgroundScheduler from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from fastapi.staticfiles import StaticFiles @@ -126,6 +127,10 @@ def run(should_start_server=True): # Setup task scheduler poll_task_scheduler() + # Setup Background Scheduler + state.scheduler = BackgroundScheduler() + state.scheduler.start() + # Start Server configure_routes(app) diff --git a/src/khoj/processor/conversation/prompts.py b/src/khoj/processor/conversation/prompts.py index f5700167..04dee1ce 100644 --- a/src/khoj/processor/conversation/prompts.py +++ b/src/khoj/processor/conversation/prompts.py @@ -10,8 +10,7 @@ You were created by Khoj Inc. with the following capabilities: - You *CAN REMEMBER ALL NOTES and PERSONAL INFORMATION FOREVER* that the user ever shares with you. - Users can share files and other information with you using the Khoj Desktop, Obsidian or Emacs app. They can also drag and drop their files into the chat window. -- You *CAN* generate images, look-up real-time information from the internet, and answer questions based on the user's notes. -- You cannot set reminders. +- You *CAN* generate images, look-up real-time information from the internet, set reminders and answer questions based on the user's notes. - Say "I don't know" or "I don't understand" if you don't know what to say or if you don't know the answer to a question. - Ask crisp follow-up questions to get additional context, when the answer cannot be inferred from the provided notes or past conversations. - Sometimes the user will share personal information that needs to be remembered, like an account ID or a residential address. These can be acknowledged with a simple "Got it" or "Okay". @@ -301,6 +300,22 @@ AI: I can help with that. I see online that there is a new model of the Dell XPS Q: What are the specs of the new Dell XPS 15? Khoj: default +Example: +Chat History: +User: Where did I go on my last vacation? +AI: You went to Jordan and visited Petra, the Dead Sea, and Wadi Rum. + +Q: Remind me who did I go with on that trip? +Khoj: default + +Example: +Chat History: +User: How's the weather outside? Current Location: Bali, Indonesia +AI: It's currently 28°C and partly cloudy in Bali. + +Q: Share a painting using the weather for Bali every morning. +Khoj: reminder + Now it's your turn to pick the mode you would like to use to answer the user's question. Provide your response as a string. Chat History: @@ -492,6 +507,42 @@ Khoj: """.strip() ) +# Schedule task +# -- +crontime_prompt = PromptTemplate.from_template( + """ +You are Khoj, an extremely smart and helpful task scheduling assistant +- Given a user query, you infer the date, time to run the query at as a cronjob time string (converted to UTC time zone) +- Convert the cron job time to run in UTC +- Infer user's time zone from the current location provided in their message +- Use an approximate time that makes sense, if it not unspecified. +- Also extract the query to run at the scheduled time. Add any context required from the chat history to improve the query. + +# Examples: +User: Could you share a funny Calvin and Hobbes quote from my notes? +AI: Here is one I found: "It's not denial. I'm just selective about the reality I accept." +User: Hahah, nice! Show a new one every morning at 9am. My Current Location: Shanghai, China +Khoj: ["0 1 * * *", "Share a funny Calvin and Hobbes or Bill Watterson quote from my notes."] + +User: Share the top weekly posts on Hacker News on Monday evenings. Format it as a newsletter. My Current Location: Nairobi, Kenya +Khoj: ["30 15 * * 1", "Top posts last week on Hacker News"] + +User: What is the latest version of the Khoj python package? +AI: The latest released Khoj python package version is 1.5.0. +User: Notify me when version 2.0.0 is released. My Current Location: Mexico City, Mexico +Khoj: ["0 16 * * *", "Check if the latest released version of the Khoj python package is >= 2.0.0?"] + +User: Tell me the latest local tech news on the first Sunday of every Month. My Current Location: Dublin, Ireland +Khoj: ["0 9 1-7 * 0", "Latest tech, AI and engineering news from around Dublin, Ireland"] + +# Chat History: +{chat_history} + +User: {query}. My Current Location: {user_location} +Khoj: +""".strip() +) + # System messages to user # -- help_message = PromptTemplate.from_template( diff --git a/src/khoj/routers/api_chat.py b/src/khoj/routers/api_chat.py index 2a1bbc5e..bb164b13 100644 --- a/src/khoj/routers/api_chat.py +++ b/src/khoj/routers/api_chat.py @@ -4,7 +4,8 @@ import math from typing import Dict, Optional from urllib.parse import unquote -from asgiref.sync import sync_to_async +from apscheduler.triggers.cron import CronTrigger +from asgiref.sync import async_to_sync, sync_to_async from fastapi import APIRouter, Depends, HTTPException, Request, WebSocket from fastapi.requests import Request from fastapi.responses import Response, StreamingResponse @@ -29,12 +30,14 @@ from khoj.routers.api import extract_references_and_questions from khoj.routers.helpers import ( ApiUserRateLimiter, CommonQueryParams, + CommonQueryParamsClass, ConversationCommandRateLimiter, agenerate_chat_response, aget_relevant_information_sources, aget_relevant_output_modes, get_conversation_command, is_ready_to_chat, + schedule_query, text_to_image, update_telemetry_state, validate_conversation_config, @@ -381,6 +384,55 @@ async def websocket_endpoint( await conversation_command_rate_limiter.update_and_check_if_valid(websocket, cmd) q = q.replace(f"/{cmd.value}", "").strip() + if ConversationCommand.Reminder in conversation_commands: + crontime, inferred_query = await schedule_query(q, location, meta_log) + trigger = CronTrigger.from_crontab(crontime) + common = CommonQueryParamsClass( + client=websocket.user.client_app, + user_agent=websocket.headers.get("user-agent"), + host=websocket.headers.get("host"), + ) + scope = websocket.scope.copy() + scope["path"] = "/api/chat" + scope["type"] = "http" + request = Request(scope) + + state.scheduler.add_job( + async_to_sync(chat), + trigger=trigger, + args=(request, common, inferred_query), + kwargs={ + "stream": False, + "conversation_id": conversation_id, + "city": city, + "region": region, + "country": country, + }, + id=f"job_{user.uuid}_{inferred_query}", + replace_existing=True, + ) + + llm_response = ( + f'🕒 Scheduled running Query: "{inferred_query}" on Schedule: `{crontime}` (in server timezone).' + ) + await sync_to_async(save_to_conversation_log)( + q, + llm_response, + user, + meta_log, + intent_type="reminder", + client_application=websocket.user.client_app, + conversation_id=conversation_id, + ) + update_telemetry_state( + request=websocket, + telemetry_type="api", + api="chat", + **common.__dict__, + ) + await send_complete_llm_response(llm_response) + continue + compiled_references, inferred_queries, defiltered_query = await extract_references_and_questions( websocket, meta_log, q, 7, 0.18, conversation_commands, location, send_status_update ) @@ -576,6 +628,33 @@ async def chat( user_name = await aget_user_name(user) + if ConversationCommand.Reminder in conversation_commands: + crontime, inferred_query = await schedule_query(q, location, meta_log) + trigger = CronTrigger.from_crontab(crontime) + state.scheduler.add_job( + async_to_sync(chat), + trigger=trigger, + args=(request, common, inferred_query, n, d, False, title, conversation_id, city, region, country), + id=f"job_{user.uuid}_{inferred_query}", + replace_existing=True, + ) + + llm_response = f'🕒 Scheduled running Query: "{inferred_query}" on Schedule: `{crontime}` (in server timezone).' + await sync_to_async(save_to_conversation_log)( + q, + llm_response, + user, + meta_log, + intent_type="reminder", + client_application=request.user.client_app, + conversation_id=conversation_id, + ) + + if stream: + return StreamingResponse(llm_response, media_type="text/event-stream", status_code=200) + else: + return Response(content=llm_response, media_type="text/plain", status_code=200) + compiled_references, inferred_queries, defiltered_query = await extract_references_and_questions( request, meta_log, q, (n or 5), (d or math.inf), conversation_commands, location ) diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index af33564f..1dab6c53 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -134,7 +134,7 @@ def update_telemetry_state( def construct_chat_history(conversation_history: dict, n: int = 4, agent_name="AI") -> str: chat_history = "" for chat in conversation_history.get("chat", [])[-n:]: - if chat["by"] == "khoj" and chat["intent"].get("type") == "remember": + if chat["by"] == "khoj" and chat["intent"].get("type") in ["remember", "reminder"]: chat_history += f"User: {chat['intent']['query']}\n" chat_history += f"{agent_name}: {chat['message']}\n" elif chat["by"] == "khoj" and ("text-to-image" in chat["intent"].get("type")): @@ -312,6 +312,34 @@ async def generate_online_subqueries(q: str, conversation_history: dict, locatio return [q] +async def schedule_query(q: str, location_data: LocationData, conversation_history: dict) -> Tuple[str, ...]: + """ + Schedule the date, time to run the query. Assume the server timezone is UTC. + """ + user_location = ( + f"{location_data.city}, {location_data.region}, {location_data.country}" if location_data else "Greenwich" + ) + chat_history = construct_chat_history(conversation_history) + + crontime_prompt = prompts.crontime_prompt.format( + query=q, + user_location=user_location, + chat_history=chat_history, + ) + + raw_response = await send_message_to_model_wrapper(crontime_prompt) + + # Validate that the response is a non-empty, JSON-serializable list + try: + raw_response = raw_response.strip() + response: List[str] = json.loads(raw_response) + if not isinstance(response, list) or not response or len(response) != 2: + raise AssertionError(f"Invalid response for scheduling query : {response}") + return tuple(response) + except Exception: + raise AssertionError(f"Invalid response for scheduling query: {raw_response}") + + async def extract_relevant_info(q: str, corpus: str) -> Union[str, None]: """ Extract relevant information for a given query from the target corpus @@ -547,7 +575,7 @@ async def text_to_image( text2image_model = text_to_image_config.model_name chat_history = "" for chat in conversation_log.get("chat", [])[-4:]: - if chat["by"] == "khoj" and chat["intent"].get("type") == "remember": + if chat["by"] == "khoj" and chat["intent"].get("type") in ["remember", "reminder"]: chat_history += f"Q: {chat['intent']['query']}\n" chat_history += f"A: {chat['message']}\n" elif chat["by"] == "khoj" and "text-to-image" in chat["intent"].get("type"): diff --git a/src/khoj/utils/helpers.py b/src/khoj/utils/helpers.py index 3cb5bfac..9ff402ab 100644 --- a/src/khoj/utils/helpers.py +++ b/src/khoj/utils/helpers.py @@ -304,6 +304,7 @@ class ConversationCommand(str, Enum): Online = "online" Webpage = "webpage" Image = "image" + Reminder = "reminder" command_descriptions = { @@ -313,6 +314,7 @@ command_descriptions = { ConversationCommand.Online: "Search for information on the internet.", ConversationCommand.Webpage: "Get information from webpage links provided by you.", ConversationCommand.Image: "Generate images by describing your imagination in words.", + ConversationCommand.Reminder: "Schedule your query to run at a specified time or interval.", ConversationCommand.Help: "Display a help message with all available commands and other metadata.", } @@ -325,7 +327,8 @@ tool_descriptions_for_llm = { } mode_descriptions_for_llm = { - ConversationCommand.Image: "Use this if you think the user is requesting an image or visual response to their query.", + ConversationCommand.Image: "Use this if the user is requesting an image or visual response to their query.", + ConversationCommand.Reminder: "Use this if the user is requesting a response at a scheduled date or time.", ConversationCommand.Default: "Use this if the other response modes don't seem to fit the query.", } diff --git a/src/khoj/utils/state.py b/src/khoj/utils/state.py index 8270a70f..7439929f 100644 --- a/src/khoj/utils/state.py +++ b/src/khoj/utils/state.py @@ -4,6 +4,7 @@ from collections import defaultdict from pathlib import Path from typing import Any, Dict, List +from apscheduler.schedulers.background import BackgroundScheduler from openai import OpenAI from whisper import Whisper @@ -29,6 +30,7 @@ cli_args: List[str] = None query_cache: Dict[str, LRU] = defaultdict(LRU) chat_lock = threading.Lock() SearchType = utils_config.SearchType +scheduler: BackgroundScheduler = None telemetry: List[Dict[str, str]] = [] khoj_version: str = None device = get_device() From c28d7d34147e931ef4f4a38bbbdf1394a0495c33 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Sun, 21 Apr 2024 13:43:46 +0530 Subject: [PATCH 02/42] Add basic chat actor test to infer scheduled queries --- tests/test_openai_chat_actors.py | 104 +++++++++++++++---------------- 1 file changed, 52 insertions(+), 52 deletions(-) diff --git a/tests/test_openai_chat_actors.py b/tests/test_openai_chat_actors.py index df9d8f07..848a6139 100644 --- a/tests/test_openai_chat_actors.py +++ b/tests/test_openai_chat_actors.py @@ -12,8 +12,10 @@ from khoj.routers.helpers import ( aget_relevant_output_modes, generate_online_subqueries, infer_webpage_urls, + schedule_query, ) from khoj.utils.helpers import ConversationCommand +from khoj.utils.rawconfig import LocationData # Initialize variables for tests api_key = os.getenv("OPENAI_API_KEY") @@ -490,71 +492,42 @@ async def test_websearch_khoj_website_for_info_about_khoj(chat_client): # ---------------------------------------------------------------------------------------------------- @pytest.mark.anyio @pytest.mark.django_db(transaction=True) -async def test_use_default_response_mode(chat_client): - # Arrange - user_query = "What's the latest in the Israel/Palestine conflict?" - +@pytest.mark.parametrize( + "user_query, expected_mode", + [ + ("What's the latest in the Israel/Palestine conflict?", "default"), + ("Summarize the latest tech news every Monday evening", "reminder"), + ("Paint a scenery in Timbuktu in the winter", "image"), + ("Remind me, when did I last visit the Serengeti?", "default"), + ], +) +async def test_use_default_response_mode(chat_client, user_query, expected_mode): # Act mode = await aget_relevant_output_modes(user_query, {}) # Assert - assert mode.value == "default" + assert mode.value == expected_mode # ---------------------------------------------------------------------------------------------------- @pytest.mark.anyio @pytest.mark.django_db(transaction=True) -async def test_use_image_response_mode(chat_client): - # Arrange - user_query = "Paint a scenery in Timbuktu in the winter" - - # Act - mode = await aget_relevant_output_modes(user_query, {}) - - # Assert - assert mode.value == "image" - - -# ---------------------------------------------------------------------------------------------------- -@pytest.mark.anyio -@pytest.mark.django_db(transaction=True) -async def test_select_data_sources_actor_chooses_to_search_notes(chat_client): - # Arrange - user_query = "Where did I learn to swim?" - +@pytest.mark.parametrize( + "user_query, expected_conversation_commands", + [ + ("Where did I learn to swim?", [ConversationCommand.Notes]), + ("Where is the nearest hospital?", [ConversationCommand.Online]), + ("Summarize the wikipedia page on the history of the internet", [ConversationCommand.Webpage]), + ], +) +async def test_select_data_sources_actor_chooses_to_search_notes( + chat_client, user_query, expected_conversation_commands +): # Act conversation_commands = await aget_relevant_information_sources(user_query, {}) # Assert - assert ConversationCommand.Notes in conversation_commands - - -# ---------------------------------------------------------------------------------------------------- -@pytest.mark.anyio -@pytest.mark.django_db(transaction=True) -async def test_select_data_sources_actor_chooses_to_search_online(chat_client): - # Arrange - user_query = "Where is the nearest hospital?" - - # Act - conversation_commands = await aget_relevant_information_sources(user_query, {}) - - # Assert - assert ConversationCommand.Online in conversation_commands - - -# ---------------------------------------------------------------------------------------------------- -@pytest.mark.anyio -@pytest.mark.django_db(transaction=True) -async def test_select_data_sources_actor_chooses_to_read_webpage(chat_client): - # Arrange - user_query = "Summarize the wikipedia page on the history of the internet" - - # Act - conversation_commands = await aget_relevant_information_sources(user_query, {}) - - # Assert - assert ConversationCommand.Webpage in conversation_commands + assert expected_conversation_commands in conversation_commands # ---------------------------------------------------------------------------------------------------- @@ -571,6 +544,33 @@ async def test_infer_webpage_urls_actor_extracts_correct_links(chat_client): assert "https://en.wikipedia.org/wiki/History_of_the_Internet" in urls +# ---------------------------------------------------------------------------------------------------- +@pytest.mark.anyio +@pytest.mark.django_db(transaction=True) +@pytest.mark.parametrize( + "user_query, location, expected_crontime, expected_queries", + [ + ( + "Share the weather forecast for the next day at 19:30", + ("Boston", "MA", "USA"), + "30 23 * * *", + ["weather forecast", "boston"], + ), + ], +) +async def test_infer_task_scheduling_request(chat_client, user_query, location, expected_crontime, expected_queries): + # Arrange + location_data = LocationData(city=location[0], region=location[1], country=location[2]) + + # Act + crontime, inferred_query = await schedule_query(user_query, location_data, {}) + + # Assert + assert expected_crontime in crontime + for query in expected_queries: + assert query in inferred_query.lower() + + # Helpers # ---------------------------------------------------------------------------------------------------- def populate_chat_history(message_list): From fcf878e1f36215dfae0eaf27a161f6066f70b2df Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Wed, 17 Apr 2024 12:23:55 +0530 Subject: [PATCH 03/42] Add new operation Scheduled Job to Operation enum of ProcessLock --- src/khoj/configure.py | 2 +- .../migrations/0036_alter_processlock_name.py | 19 +++++++++++++++++++ src/khoj/database/models/__init__.py | 3 ++- 3 files changed, 22 insertions(+), 2 deletions(-) create mode 100644 src/khoj/database/migrations/0036_alter_processlock_name.py diff --git a/src/khoj/configure.py b/src/khoj/configure.py index 38b8223f..81e653d4 100644 --- a/src/khoj/configure.py +++ b/src/khoj/configure.py @@ -324,7 +324,7 @@ def update_content_index(): @schedule.repeat(schedule.every(22).to(25).hours) def update_content_index_regularly(): ProcessLockAdapters.run_with_lock( - update_content_index, ProcessLock.Operation.UPDATE_EMBEDDINGS, max_duration_in_seconds=60 * 60 * 2 + update_content_index, ProcessLock.Operation.INDEX_CONTENT, max_duration_in_seconds=60 * 60 * 2 ) diff --git a/src/khoj/database/migrations/0036_alter_processlock_name.py b/src/khoj/database/migrations/0036_alter_processlock_name.py new file mode 100644 index 00000000..87c9a0d4 --- /dev/null +++ b/src/khoj/database/migrations/0036_alter_processlock_name.py @@ -0,0 +1,19 @@ +# Generated by Django 4.2.10 on 2024-04-16 18:39 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("database", "0035_processlock"), + ] + + operations = [ + migrations.AlterField( + model_name="processlock", + name="name", + field=models.CharField( + choices=[("index_content", "Index Content"), ("scheduled_job", "Scheduled Job")], max_length=200 + ), + ), + ] diff --git a/src/khoj/database/models/__init__.py b/src/khoj/database/models/__init__.py index 4077c35c..ad35be01 100644 --- a/src/khoj/database/models/__init__.py +++ b/src/khoj/database/models/__init__.py @@ -109,7 +109,8 @@ class Agent(BaseModel): class ProcessLock(BaseModel): class Operation(models.TextChoices): - UPDATE_EMBEDDINGS = "update_embeddings" + INDEX_CONTENT = "index_content" + SCHEDULED_JOB = "scheduled_job" # We need to make sure that some operations are thread-safe. To do so, add locks for potentially shared operations. # For example, we need to make sure that only one process is updating the embeddings at a time. From af0972c5396961380c5cbe2de2d4ce7fcea9b4f7 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Wed, 17 Apr 2024 16:28:42 +0530 Subject: [PATCH 04/42] Make scheduled jobs persistent and work in multiple worker setups - Store scheduled job state in Postgres so job schedules persist across app restarts - Use Process Locks to only allow single worker to process a given job type. This prevents duplicating job runs across all workers --- src/khoj/database/adapters/__init__.py | 7 ++ src/khoj/main.py | 18 +++- src/khoj/processor/conversation/utils.py | 2 + src/khoj/routers/api_chat.py | 122 ++++++++++++++++------- src/khoj/routers/helpers.py | 35 ++++++- 5 files changed, 144 insertions(+), 40 deletions(-) diff --git a/src/khoj/database/adapters/__init__.py b/src/khoj/database/adapters/__init__.py index 10fde9e8..f32e3b8b 100644 --- a/src/khoj/database/adapters/__init__.py +++ b/src/khoj/database/adapters/__init__.py @@ -454,6 +454,13 @@ class ProcessLockAdapters: logger.info(f"🔓 Unlocked {operation} process after executing {func} {'Succeeded' if success else 'Failed'}") +def run_with_process_lock(*args): + """Wrapper function used for scheduling jobs. + Required as APScheduler can't discover the `ProcessLockAdapter.run_with_lock' method on its own. + """ + return ProcessLockAdapters.run_with_lock(*args) + + class ClientApplicationAdapters: @staticmethod async def aget_client_application_by_id(client_id: str, client_secret: str): diff --git a/src/khoj/main.py b/src/khoj/main.py index 74807137..6ce30c7a 100644 --- a/src/khoj/main.py +++ b/src/khoj/main.py @@ -128,7 +128,20 @@ def run(should_start_server=True): poll_task_scheduler() # Setup Background Scheduler - state.scheduler = BackgroundScheduler() + from django.conf import settings as django_settings + + django_db = django_settings.DATABASES["default"] + state.scheduler = BackgroundScheduler( + { + "apscheduler.jobstores.default": { + "type": "sqlalchemy", + "url": f'postgresql://{django_db["USER"]}:{django_db["PASSWORD"]}@{django_db["HOST"]}:{django_db["PORT"]}/{django_db["NAME"]}', + }, + "apscheduler.timezone": "UTC", + "apscheduler.job_defaults.misfire_grace_time": "60", # Useful to run scheduled jobs even when worker delayed because it was busy or down + "apscheduler.job_defaults.coalesce": "true", # Combine multiple jobs into one if they are scheduled at the same time + } + ) state.scheduler.start() # Start Server @@ -150,6 +163,9 @@ def run(should_start_server=True): if should_start_server: start_server(app, host=args.host, port=args.port, socket=args.socket) + # Teardown + state.scheduler.shutdown() + def set_state(args): state.config_file = args.config_file diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py index c970c421..6ef7016d 100644 --- a/src/khoj/processor/conversation/utils.py +++ b/src/khoj/processor/conversation/utils.py @@ -102,6 +102,7 @@ def save_to_conversation_log( intent_type: str = "remember", client_application: ClientApplication = None, conversation_id: int = None, + job_id: str = None, ): user_message_time = user_message_time or datetime.now().strftime("%Y-%m-%d %H:%M:%S") updated_conversation = message_to_log( @@ -112,6 +113,7 @@ def save_to_conversation_log( "context": compiled_references, "intent": {"inferred-queries": inferred_queries, "type": intent_type}, "onlineContext": online_results, + "jobId": job_id, }, conversation_log=meta_log.get("chat", []), ) diff --git a/src/khoj/routers/api_chat.py b/src/khoj/routers/api_chat.py index bb164b13..34bbc5fa 100644 --- a/src/khoj/routers/api_chat.py +++ b/src/khoj/routers/api_chat.py @@ -1,3 +1,4 @@ +import functools import json import logging import math @@ -13,8 +14,13 @@ from starlette.authentication import requires from starlette.websockets import WebSocketDisconnect from websockets import ConnectionClosedOK -from khoj.database.adapters import ConversationAdapters, EntryAdapters, aget_user_name -from khoj.database.models import KhojUser +from khoj.database.adapters import ( + ConversationAdapters, + EntryAdapters, + aget_user_name, + run_with_process_lock, +) +from khoj.database.models import KhojUser, ProcessLock from khoj.processor.conversation.prompts import ( help_message, no_entries_found, @@ -38,6 +44,7 @@ from khoj.routers.helpers import ( get_conversation_command, is_ready_to_chat, schedule_query, + scheduled_chat, text_to_image, update_telemetry_state, validate_conversation_config, @@ -386,35 +393,40 @@ async def websocket_endpoint( if ConversationCommand.Reminder in conversation_commands: crontime, inferred_query = await schedule_query(q, location, meta_log) - trigger = CronTrigger.from_crontab(crontime) - common = CommonQueryParamsClass( - client=websocket.user.client_app, - user_agent=websocket.headers.get("user-agent"), - host=websocket.headers.get("host"), + try: + trigger = CronTrigger.from_crontab(crontime) + except ValueError as e: + await send_complete_llm_response(f"Unable to create reminder with crontime schedule: {crontime}") + continue + partial_scheduled_chat = functools.partial( + scheduled_chat, inferred_query, websocket.user.object, websocket.url ) - scope = websocket.scope.copy() - scope["path"] = "/api/chat" - scope["type"] = "http" - request = Request(scope) + try: + job = state.scheduler.add_job( + run_with_process_lock, + trigger=trigger, + args=( + partial_scheduled_chat, + f"{ProcessLock.Operation.SCHEDULED_JOB}_{user.uuid}_{inferred_query}", + ), + id=f"job_{user.uuid}_{inferred_query}_{crontime}", + name=f"{inferred_query}", + max_instances=2, # Allow second instance to kill any previous instance with stale lock + jitter=30, + ) + except: + await send_complete_llm_response( + f"Unable to schedule reminder. Ensure the reminder doesn't already exist." + ) + continue + next_run_time = job.next_run_time.strftime("%Y-%m-%d %H:%M:%S") + llm_response = f""" + ### 🕒 Scheduled Job +- Query: **"{inferred_query}"** +- Schedule: `{crontime}` +- Next Run At: **{next_run_time}** UTC. + """.strip() - state.scheduler.add_job( - async_to_sync(chat), - trigger=trigger, - args=(request, common, inferred_query), - kwargs={ - "stream": False, - "conversation_id": conversation_id, - "city": city, - "region": region, - "country": country, - }, - id=f"job_{user.uuid}_{inferred_query}", - replace_existing=True, - ) - - llm_response = ( - f'🕒 Scheduled running Query: "{inferred_query}" on Schedule: `{crontime}` (in server timezone).' - ) await sync_to_async(save_to_conversation_log)( q, llm_response, @@ -423,6 +435,13 @@ async def websocket_endpoint( intent_type="reminder", client_application=websocket.user.client_app, conversation_id=conversation_id, + inferred_queries=[inferred_query], + job_id=job.id, + ) + common = CommonQueryParamsClass( + client=websocket.user.client_app, + user_agent=websocket.headers.get("user-agent"), + host=websocket.headers.get("host"), ) update_telemetry_state( request=websocket, @@ -630,16 +649,41 @@ async def chat( if ConversationCommand.Reminder in conversation_commands: crontime, inferred_query = await schedule_query(q, location, meta_log) - trigger = CronTrigger.from_crontab(crontime) - state.scheduler.add_job( - async_to_sync(chat), - trigger=trigger, - args=(request, common, inferred_query, n, d, False, title, conversation_id, city, region, country), - id=f"job_{user.uuid}_{inferred_query}", - replace_existing=True, - ) + try: + trigger = CronTrigger.from_crontab(crontime) + except ValueError as e: + return Response( + content=f"Unable to create reminder with crontime schedule: {crontime}", + media_type="text/plain", + status_code=500, + ) + + partial_scheduled_chat = functools.partial(scheduled_chat, inferred_query, request.user.object, request.url) + try: + job = state.scheduler.add_job( + run_with_process_lock, + trigger=trigger, + args=(partial_scheduled_chat, f"{ProcessLock.Operation.SCHEDULED_JOB}_{user.uuid}_{inferred_query}"), + id=f"job_{user.uuid}_{inferred_query}_{crontime}", + name=f"{inferred_query}", + max_instances=2, # Allow second instance to kill any previous instance with stale lock + jitter=30, + ) + except: + return Response( + content=f"Unable to schedule reminder. Ensure the reminder doesn't already exist.", + media_type="text/plain", + status_code=500, + ) + + next_run_time = job.next_run_time.strftime("%Y-%m-%d %H:%M:%S") + llm_response = f""" + ### 🕒 Scheduled Job +- Query: **"{inferred_query}"** +- Schedule: `{crontime}` +- Next Run At: **{next_run_time}** UTC.' + """.strip() - llm_response = f'🕒 Scheduled running Query: "{inferred_query}" on Schedule: `{crontime}` (in server timezone).' await sync_to_async(save_to_conversation_log)( q, llm_response, @@ -648,6 +692,8 @@ async def chat( intent_type="reminder", client_application=request.user.client_app, conversation_id=conversation_id, + inferred_queries=[inferred_query], + job_id=job.id, ) if stream: diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index 1dab6c53..ce49d3da 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -17,13 +17,22 @@ from typing import ( Tuple, Union, ) +from urllib.parse import parse_qs, urlencode import openai +import requests from fastapi import Depends, Header, HTTPException, Request, UploadFile from PIL import Image from starlette.authentication import has_required_scope +from starlette.requests import URL -from khoj.database.adapters import AgentAdapters, ConversationAdapters, EntryAdapters +from khoj.database.adapters import ( + AgentAdapters, + ConversationAdapters, + EntryAdapters, + create_khoj_token, + get_khoj_tokens, +) from khoj.database.models import ( ChatModelOptions, ClientApplication, @@ -779,3 +788,27 @@ class CommonQueryParamsClass: CommonQueryParams = Annotated[CommonQueryParamsClass, Depends()] + + +def scheduled_chat(query, user: KhojUser, calling_url: URL): + # Construct the URL, header for the chat API + scheme = "http" if calling_url.scheme == "http" or calling_url.scheme == "ws" else "https" + # Replace the original scheduling query with the scheduled query + query_dict = parse_qs(calling_url.query) + query_dict["q"] = [query] + # Convert the dictionary back into a query string + scheduled_query = urlencode(query_dict, doseq=True) + url = f"{scheme}://{calling_url.netloc}/api/chat?{scheduled_query}" + + headers = {"User-Agent": "Khoj"} + if not state.anonymous_mode: + # Add authorization request header in non-anonymous mode + token = get_khoj_tokens(user) + if is_none_or_empty(token): + token = create_khoj_token(user) + else: + token = token[0] + headers["Authorization"] = f"Bearer {token}" + + # Call the chat API endpoint with authenticated user token and query + return requests.get(url, headers=headers) From 423d61796d3aa423f7591a44402421da0a049598 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Wed, 17 Apr 2024 21:48:08 +0530 Subject: [PATCH 05/42] Add API endpoints to get and delete user scheduled tasks --- src/khoj/routers/api.py | 42 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py index fe90698e..65a99e79 100644 --- a/src/khoj/routers/api.py +++ b/src/khoj/routers/api.py @@ -7,6 +7,7 @@ import time import uuid from typing import Any, Callable, List, Optional, Union +from apscheduler.job import Job from asgiref.sync import sync_to_async from fastapi import APIRouter, Depends, File, HTTPException, Request, UploadFile from fastapi.requests import Request @@ -386,3 +387,44 @@ def user_info(request: Request) -> Response: # Return user information as a JSON response return Response(content=json.dumps(user_info), media_type="application/json", status_code=200) + + +@api.get("/tasks", response_class=Response) +@requires(["authenticated"]) +def get_jobs(request: Request) -> Response: + user: KhojUser = request.user.object + tasks: list[Job] = state.scheduler.get_jobs() + + # Collate all tasks assigned by user that are still active + tasks_info = [ + {"id": task.id, "name": task.name, "next": task.next_run_time.strftime("%Y-%m-%d %H:%M")} + for task in tasks + if task.id.startswith(f"job_{user.uuid}_") + ] + + # Return tasks information as a JSON response + return Response(content=json.dumps(tasks_info), media_type="application/json", status_code=200) + + +@api.delete("/task", response_class=Response) +@requires(["authenticated"]) +def delete_job(request: Request, task_id: str) -> Response: + user: KhojUser = request.user.object + + # Perform validation checks + # Check if user is allowed to delete this task id + if not task_id.startswith(f"job_{user.uuid}_"): + return Response(content="Unauthorized job deletion request", status_code=403) + # Check if task with this task id exist + task: Job = state.scheduler.get_job(job_id=task_id) + if not task: + return Response(content="Invalid job", status_code=403) + + # Collate info about user task to be deleted + task_info = {"id": task.id, "name": task.name, "next": task.next_run_time.strftime("%Y-%m-%d %H:%MS")} + + # Delete job + task.remove() + + # Return delete task information as a JSON response + return Response(content=json.dumps(task_info), media_type="application/json", status_code=200) From 98d0ffecf143469fae817405a2c06e62010687a2 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Thu, 18 Apr 2024 00:34:19 +0530 Subject: [PATCH 06/42] Add section in settings page to view, delete your scheduled tasks --- src/khoj/interface/web/config.html | 79 ++++++++++++++++++++++++++++-- 1 file changed, 75 insertions(+), 4 deletions(-) diff --git a/src/khoj/interface/web/config.html b/src/khoj/interface/web/config.html index 8c48cb7a..2a05ef12 100644 --- a/src/khoj/interface/web/config.html +++ b/src/khoj/interface/web/config.html @@ -272,6 +272,34 @@ {% endif %} {% endif %} +
+

Scheduled Tasks

+
+
+ Scheduled Tasks +

Tasks

+
+
+

Manage your scheduled tasks handled by Khoj

+
+ + + + + + + + + +
NameNext RunActions
+
+ +
+
+
+ {% if billing_enabled %}

Billing

@@ -616,7 +644,6 @@ `; - } function listApiKeys() { @@ -624,7 +651,51 @@ fetch('/auth/token') .then(response => response.json()) .then(tokens => { - apiKeyList.innerHTML = tokens.map(generateTokenRow).join(""); + if (!tokens?.length > 0) return; + apiKeyList.innerHTML = tokens?.map(generateTokenRow).join(""); + }); + } + + // List user's API keys on page load + listApiKeys(); + + function deleteTask(taskId) { + const scheduledTaskList = document.getElementById("scheduled-tasks-list"); + // url encode the task id + taskId = encodeURIComponent(taskId); + fetch(`/api/task?task_id=${taskId}`, { + method: 'DELETE', + }) + .then(response => { + if (response.status == 200) { + const scheduledTaskItem = document.getElementById(`scheduled-task-item-${taskId}`); + scheduledTaskList.removeChild(scheduledTaskItem); + } + }); + } + + function generateTaskRow(taskObj) { + let taskId = taskObj.id; + let taskName = taskObj.name; + let taskNextRun = taskObj.next; + return ` + + ${taskName} + ${taskNextRun} + + Delete Task + + + `; + } + + function listScheduledTasks() { + const scheduledTasksList = document.getElementById("scheduled-tasks-list"); + fetch('/api/tasks') + .then(response => response.json()) + .then(tasks => { + if (!tasks?.length > 0) return; + scheduledTasksList.innerHTML = tasks.map(generateTaskRow).join(""); }); } @@ -637,8 +708,8 @@ }); } - // List user's API keys on page load - listApiKeys(); + // List user's scheduled tasks on page load + listScheduledTasks(); function removeFile(path) { fetch('/api/config/data/file?filename=' + path, { From 648f1a5c71b70da4320378180ecf6507da642056 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Wed, 17 Apr 2024 22:21:51 +0530 Subject: [PATCH 07/42] Suffix chat response element vars with "El" in chat.html of web, desktop apps --- src/interface/desktop/chat.html | 36 ++++++++++++------------ src/khoj/interface/web/chat.html | 48 +++++++++++++++----------------- 2 files changed, 41 insertions(+), 43 deletions(-) diff --git a/src/interface/desktop/chat.html b/src/interface/desktop/chat.html index aa73df21..a1002ec9 100644 --- a/src/interface/desktop/chat.html +++ b/src/interface/desktop/chat.html @@ -465,14 +465,14 @@ // Generate backend API URL to execute query let chatApi = `${hostURL}/api/chat?q=${encodeURIComponent(query)}&n=${resultsCount}&client=web&stream=true&conversation_id=${conversationID}®ion=${region}&city=${city}&country=${countryName}`; - let new_response = document.createElement("div"); - new_response.classList.add("chat-message", "khoj"); - new_response.attributes["data-meta"] = "🏮 Khoj at " + formatDate(new Date()); - chat_body.appendChild(new_response); + let newResponseEl = document.createElement("div"); + newResponseEl.classList.add("chat-message", "khoj"); + newResponseEl.attributes["data-meta"] = "🏮 Khoj at " + formatDate(new Date()); + chat_body.appendChild(newResponseEl); - let newResponseText = document.createElement("div"); - newResponseText.classList.add("chat-message-text", "khoj"); - new_response.appendChild(newResponseText); + let newResponseTextEl = document.createElement("div"); + newResponseTextEl.classList.add("chat-message-text", "khoj"); + newResponseEl.appendChild(newResponseTextEl); // Temporary status message to indicate that Khoj is thinking let loadingEllipsis = document.createElement("div"); @@ -495,7 +495,7 @@ loadingEllipsis.appendChild(thirdEllipsis); loadingEllipsis.appendChild(fourthEllipsis); - newResponseText.appendChild(loadingEllipsis); + newResponseTextEl.appendChild(loadingEllipsis); document.getElementById("chat-body").scrollTop = document.getElementById("chat-body").scrollHeight; let chatTooltip = document.getElementById("chat-tooltip"); @@ -540,11 +540,11 @@ // If the chunk is not a JSON object, just display it as is rawResponse += chunk; } finally { - newResponseText.innerHTML = ""; - newResponseText.appendChild(formatHTMLMessage(rawResponse)); + newResponseTextEl.innerHTML = ""; + newResponseTextEl.appendChild(formatHTMLMessage(rawResponse)); if (references != null) { - newResponseText.appendChild(references); + newResponseTextEl.appendChild(references); } document.getElementById("chat-body").scrollTop = document.getElementById("chat-body").scrollHeight; @@ -563,7 +563,7 @@ if (done) { // Append any references after all the data has been streamed if (references != {}) { - newResponseText.appendChild(createReferenceSection(references)); + newResponseTextEl.appendChild(createReferenceSection(references)); } document.getElementById("chat-body").scrollTop = document.getElementById("chat-body").scrollHeight; document.getElementById("chat-input").removeAttribute("disabled"); @@ -576,8 +576,8 @@ if (chunk.includes("### compiled references:")) { const additionalResponse = chunk.split("### compiled references:")[0]; rawResponse += additionalResponse; - newResponseText.innerHTML = ""; - newResponseText.appendChild(formatHTMLMessage(rawResponse)); + newResponseTextEl.innerHTML = ""; + newResponseTextEl.appendChild(formatHTMLMessage(rawResponse)); const rawReference = chunk.split("### compiled references:")[1]; const rawReferenceAsJson = JSON.parse(rawReference); @@ -589,14 +589,14 @@ readStream(); } else { // Display response from Khoj - if (newResponseText.getElementsByClassName("lds-ellipsis").length > 0) { - newResponseText.removeChild(loadingEllipsis); + if (newResponseTextEl.getElementsByClassName("lds-ellipsis").length > 0) { + newResponseTextEl.removeChild(loadingEllipsis); } // If the chunk is not a JSON object, just display it as is rawResponse += chunk; - newResponseText.innerHTML = ""; - newResponseText.appendChild(formatHTMLMessage(rawResponse)); + newResponseTextEl.innerHTML = ""; + newResponseTextEl.appendChild(formatHTMLMessage(rawResponse)); readStream(); } diff --git a/src/khoj/interface/web/chat.html b/src/khoj/interface/web/chat.html index 73b626fa..ef35f35d 100644 --- a/src/khoj/interface/web/chat.html +++ b/src/khoj/interface/web/chat.html @@ -61,8 +61,8 @@ To get started, just start typing below. You can also type / to see a list of co let waitingForLocation = true; let websocketState = { - newResponseText: null, - newResponseElement: null, + newResponseTextEl: null, + newResponseEl: null, loadingEllipsis: null, references: {}, rawResponse: "", @@ -459,8 +459,6 @@ To get started, just start typing below. You can also type / to see a list of co } async function chat() { - // Extract required fields for search from form - if (websocket) { sendMessageViaWebSocket(); return; @@ -899,8 +897,8 @@ To get started, just start typing below. You can also type / to see a list of co } websocketState = { - newResponseText: null, - newResponseElement: null, + newResponseTextEl: null, + newResponseEl: null, loadingEllipsis: null, references: {}, rawResponse: "", @@ -920,12 +918,12 @@ To get started, just start typing below. You can also type / to see a list of co } else if(chunk == "end_llm_response") { console.log("Stopped streaming", new Date()); // Append any references after all the data has been streamed - finalizeChatBodyResponse(websocketState.references, websocketState.newResponseText); + finalizeChatBodyResponse(websocketState.references, websocketState.newResponseTextEl); // Reset variables websocketState = { - newResponseText: null, - newResponseElement: null, + newResponseTextEl: null, + newResponseEl: null, loadingEllipsis: null, references: {}, rawResponse: "", @@ -950,9 +948,9 @@ To get started, just start typing below. You can also type / to see a list of co websocketState.rawResponse = rawResponse; websocketState.references = references; } else if (chunk.type == "status") { - handleStreamResponse(websocketState.newResponseText, chunk.message, null, false); + handleStreamResponse(websocketState.newResponseTextEl, chunk.message, null, false); } else if (chunk.type == "rate_limit") { - handleStreamResponse(websocketState.newResponseText, chunk.message, websocketState.loadingEllipsis, true); + handleStreamResponse(websocketState.newResponseTextEl, chunk.message, websocketState.loadingEllipsis, true); } else { rawResponse = chunk.response; } @@ -961,21 +959,21 @@ To get started, just start typing below. You can also type / to see a list of co websocketState.rawResponse += chunk; } finally { if (chunk.type != "status" && chunk.type != "rate_limit") { - addMessageToChatBody(websocketState.rawResponse, websocketState.newResponseText, websocketState.references); + addMessageToChatBody(websocketState.rawResponse, websocketState.newResponseTextEl, websocketState.references); } } } else { // Handle streamed response of type text/event-stream or text/plain if (chunk && chunk.includes("### compiled references:")) { - ({ rawResponse, references } = handleCompiledReferences(websocketState.newResponseText, chunk, websocketState.references, websocketState.rawResponse)); + ({ rawResponse, references } = handleCompiledReferences(websocketState.newResponseTextEl, chunk, websocketState.references, websocketState.rawResponse)); websocketState.rawResponse = rawResponse; websocketState.references = references; } else { // If the chunk is not a JSON object, just display it as is websocketState.rawResponse += chunk; - if (websocketState.newResponseText) { - handleStreamResponse(websocketState.newResponseText, websocketState.rawResponse, websocketState.loadingEllipsis); + if (websocketState.newResponseTextEl) { + handleStreamResponse(websocketState.newResponseTextEl, websocketState.rawResponse, websocketState.loadingEllipsis); } } @@ -1024,19 +1022,19 @@ To get started, just start typing below. You can also type / to see a list of co autoResize(); document.getElementById("chat-input").setAttribute("disabled", "disabled"); - let newResponseElement = document.createElement("div"); - newResponseElement.classList.add("chat-message", "khoj"); - newResponseElement.attributes["data-meta"] = "🏮 Khoj at " + formatDate(new Date()); - chatBody.appendChild(newResponseElement); + let newResponseEl = document.createElement("div"); + newResponseEl.classList.add("chat-message", "khoj"); + newResponseEl.attributes["data-meta"] = "🏮 Khoj at " + formatDate(new Date()); + chatBody.appendChild(newResponseEl); - let newResponseText = document.createElement("div"); - newResponseText.classList.add("chat-message-text", "khoj"); - newResponseElement.appendChild(newResponseText); + let newResponseTextEl = document.createElement("div"); + newResponseTextEl.classList.add("chat-message-text", "khoj"); + newResponseEl.appendChild(newResponseTextEl); // Temporary status message to indicate that Khoj is thinking let loadingEllipsis = createLoadingEllipse(); - newResponseText.appendChild(loadingEllipsis); + newResponseTextEl.appendChild(loadingEllipsis); document.getElementById("chat-body").scrollTop = document.getElementById("chat-body").scrollHeight; let chatTooltip = document.getElementById("chat-tooltip"); @@ -1051,8 +1049,8 @@ To get started, just start typing below. You can also type / to see a list of co let references = {}; websocketState = { - newResponseText, - newResponseElement, + newResponseTextEl, + newResponseEl, loadingEllipsis, references, rawResponse, From 5133b6e73b445dc03cc8f9205d6a46d052fced8e Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Wed, 17 Apr 2024 22:45:44 +0530 Subject: [PATCH 08/42] Minor improvements to styling the config page --- src/khoj/interface/web/base_config.html | 4 ++-- src/khoj/interface/web/config.html | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/khoj/interface/web/base_config.html b/src/khoj/interface/web/base_config.html index 3d552714..31020c05 100644 --- a/src/khoj/interface/web/base_config.html +++ b/src/khoj/interface/web/base_config.html @@ -103,7 +103,7 @@ .section-title { margin: 0; - padding: 12px 0 16px 0; + padding: 0 0 16px 0; font-size: 32; font-weight: normal; } @@ -326,7 +326,7 @@ div.api-settings { - width: 640px; + width: 660px; } img.api-key-action:hover { diff --git a/src/khoj/interface/web/config.html b/src/khoj/interface/web/config.html index 2a05ef12..b87267b8 100644 --- a/src/khoj/interface/web/config.html +++ b/src/khoj/interface/web/config.html @@ -22,7 +22,7 @@
-

Content

+

Content

From a1e5195c8b988a22b0255b67b4d96cc34324852e Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Thu, 18 Apr 2024 01:26:15 +0530 Subject: [PATCH 09/42] Save separate user message time from Khoj response time in chat logs Previously user message time was being stored the same as Khoj response time in conversation logs. --- src/khoj/routers/api_chat.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/khoj/routers/api_chat.py b/src/khoj/routers/api_chat.py index 34bbc5fa..913468ec 100644 --- a/src/khoj/routers/api_chat.py +++ b/src/khoj/routers/api_chat.py @@ -2,6 +2,7 @@ import functools import json import logging import math +from datetime import datetime from typing import Dict, Optional from urllib.parse import unquote @@ -362,6 +363,7 @@ async def websocket_endpoint( await send_rate_limit_message(e.detail) break + user_message_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") conversation_commands = [get_conversation_command(query=q, any_references=True)] await send_status_update(f"**👀 Understanding Query**: {q}") @@ -432,6 +434,7 @@ async def websocket_endpoint( llm_response, user, meta_log, + user_message_time, intent_type="reminder", client_application=websocket.user.client_app, conversation_id=conversation_id, @@ -531,6 +534,7 @@ async def websocket_endpoint( image, user, meta_log, + user_message_time, intent_type=intent_type, inferred_queries=[improved_image_prompt], client_application=websocket.user.client_app, @@ -607,6 +611,7 @@ async def chat( ) -> Response: user: KhojUser = request.user.object q = unquote(q) + user_message_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") logger.info(f"Chat request by {user.username}: {q}") await is_ready_to_chat(user) @@ -689,6 +694,7 @@ async def chat( llm_response, user, meta_log, + user_message_time, intent_type="reminder", client_application=request.user.client_app, conversation_id=conversation_id, @@ -765,6 +771,7 @@ async def chat( image, user, meta_log, + user_message_time, intent_type=intent_type, inferred_queries=[improved_image_prompt], client_application=request.user.client_app, From 7e084ef1e0cfde899bb8069a7fdf836325c9143f Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Fri, 26 Apr 2024 18:08:57 +0530 Subject: [PATCH 10/42] Improve job id. Fix refreshing list of jobs on delete from config page --- src/khoj/interface/web/config.html | 2 -- src/khoj/routers/api_chat.py | 9 +++++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/khoj/interface/web/config.html b/src/khoj/interface/web/config.html index b87267b8..882957d9 100644 --- a/src/khoj/interface/web/config.html +++ b/src/khoj/interface/web/config.html @@ -661,8 +661,6 @@ function deleteTask(taskId) { const scheduledTaskList = document.getElementById("scheduled-tasks-list"); - // url encode the task id - taskId = encodeURIComponent(taskId); fetch(`/api/task?task_id=${taskId}`, { method: 'DELETE', }) diff --git a/src/khoj/routers/api_chat.py b/src/khoj/routers/api_chat.py index 913468ec..f210f974 100644 --- a/src/khoj/routers/api_chat.py +++ b/src/khoj/routers/api_chat.py @@ -1,4 +1,5 @@ import functools +import hashlib import json import logging import math @@ -400,6 +401,8 @@ async def websocket_endpoint( except ValueError as e: await send_complete_llm_response(f"Unable to create reminder with crontime schedule: {crontime}") continue + # Generate the job id from the hash of inferred_query and crontime + job_id = hashlib.md5(f"{inferred_query}_{crontime}".encode("utf-8")).hexdigest() partial_scheduled_chat = functools.partial( scheduled_chat, inferred_query, websocket.user.object, websocket.url ) @@ -411,7 +414,7 @@ async def websocket_endpoint( partial_scheduled_chat, f"{ProcessLock.Operation.SCHEDULED_JOB}_{user.uuid}_{inferred_query}", ), - id=f"job_{user.uuid}_{inferred_query}_{crontime}", + id=f"job_{user.uuid}_{job_id}", name=f"{inferred_query}", max_instances=2, # Allow second instance to kill any previous instance with stale lock jitter=30, @@ -663,13 +666,15 @@ async def chat( status_code=500, ) + # Generate the job id from the hash of inferred_query and crontime + job_id = hashlib.md5(f"{inferred_query}_{crontime}".encode("utf-8")).hexdigest() partial_scheduled_chat = functools.partial(scheduled_chat, inferred_query, request.user.object, request.url) try: job = state.scheduler.add_job( run_with_process_lock, trigger=trigger, args=(partial_scheduled_chat, f"{ProcessLock.Operation.SCHEDULED_JOB}_{user.uuid}_{inferred_query}"), - id=f"job_{user.uuid}_{inferred_query}_{crontime}", + id=f"job_{user.uuid}_{job_id}", name=f"{inferred_query}", max_instances=2, # Allow second instance to kill any previous instance with stale lock jitter=30, From 7f5981594c7cd169453656895eddabc7dcc9c2a6 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Sun, 21 Apr 2024 20:06:42 +0530 Subject: [PATCH 11/42] Only notify when scheduled task results satisfy user's requirements There's a difference between running a scheduled task and notifying the user about the results of running the scheduled task. Decide to notify the user only when the results of running the scheduled task satisfy the user's requirements. Use sync version of send_message_to_model_wrapper for scheduled tasks --- src/khoj/processor/conversation/prompts.py | 41 ++++++++ src/khoj/routers/api_chat.py | 6 +- src/khoj/routers/helpers.py | 106 +++++++++++++++++++-- tests/test_openai_chat_actors.py | 43 +++++++++ 4 files changed, 183 insertions(+), 13 deletions(-) diff --git a/src/khoj/processor/conversation/prompts.py b/src/khoj/processor/conversation/prompts.py index 04dee1ce..6e4fc761 100644 --- a/src/khoj/processor/conversation/prompts.py +++ b/src/khoj/processor/conversation/prompts.py @@ -543,6 +543,47 @@ Khoj: """.strip() ) +to_notify_or_not = PromptTemplate.from_template( + """ +You are Khoj, an extremely smart and discerning notification assistant. +- Decide whether the user should be notified of the AI's response using the Original User Query, Executed User Query and AI Response triplet. +- Notify the user only if the AI's response satisfies the user specified requirements. +- You should only respond with a "Yes" or "No". Do not say anything else. + +# Examples: +Original User Query: Hahah, nice! Show a new one every morning at 9am. My Current Location: Shanghai, China +Executed User Query: Could you share a funny Calvin and Hobbes quote from my notes? +AI Reponse: Here is one I found: "It's not denial. I'm just selective about the reality I accept." +Khoj: Yes + +Original User Query: Every evening check if it's going to rain tomorrow. Notify me only if I'll need an umbrella. My Current Location: Nairobi, Kenya +Executed User Query: Is it going to rain tomorrow in Nairobi, Kenya +AI Response: Tomorrow's forecast is sunny with a high of 28°C and a low of 18°C +Khoj: No + +Original User Query: Tell me when version 2.0.0 is released. My Current Location: Mexico City, Mexico +Executed User Query: Check if version 2.0.0 of the Khoj python package is released +AI Response: The latest released Khoj python package version is 1.5.0. +Khoj: No + +Original User Query: Paint me a sunset every evening. My Current Location: Shanghai, China +Executed User Query: Paint me a sunset in Shanghai, China +AI Response: https://khoj-generated-images.khoj.dev/user110/image78124.webp +Khoj: Yes + +Original User Query: Share a summary of the tasks I've completed at the end of the day. My Current Location: Oslo, Norway +Executed User Query: Share a summary of the tasks I've completed today. +AI Response: I'm sorry, I couldn't find any relevant notes to respond to your message. +Khoj: No + +Original User Query: {original_query} +Executed User Query: {executed_query} +AI Response: {response} +Khoj: +""".strip() +) + + # System messages to user # -- help_message = PromptTemplate.from_template( diff --git a/src/khoj/routers/api_chat.py b/src/khoj/routers/api_chat.py index f210f974..bc980620 100644 --- a/src/khoj/routers/api_chat.py +++ b/src/khoj/routers/api_chat.py @@ -8,7 +8,7 @@ from typing import Dict, Optional from urllib.parse import unquote from apscheduler.triggers.cron import CronTrigger -from asgiref.sync import async_to_sync, sync_to_async +from asgiref.sync import sync_to_async from fastapi import APIRouter, Depends, HTTPException, Request, WebSocket from fastapi.requests import Request from fastapi.responses import Response, StreamingResponse @@ -404,7 +404,7 @@ async def websocket_endpoint( # Generate the job id from the hash of inferred_query and crontime job_id = hashlib.md5(f"{inferred_query}_{crontime}".encode("utf-8")).hexdigest() partial_scheduled_chat = functools.partial( - scheduled_chat, inferred_query, websocket.user.object, websocket.url + scheduled_chat, inferred_query, q, websocket.user.object, websocket.url ) try: job = state.scheduler.add_job( @@ -668,7 +668,7 @@ async def chat( # Generate the job id from the hash of inferred_query and crontime job_id = hashlib.md5(f"{inferred_query}_{crontime}".encode("utf-8")).hexdigest() - partial_scheduled_chat = functools.partial(scheduled_chat, inferred_query, request.user.object, request.url) + partial_scheduled_chat = functools.partial(scheduled_chat, inferred_query, q, request.user.object, request.url) try: job = state.scheduler.add_job( run_with_process_lock, diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index ce49d3da..9c1f7149 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -475,6 +475,51 @@ async def send_message_to_model_wrapper( raise HTTPException(status_code=500, detail="Invalid conversation config") +def send_message_to_model_wrapper_sync( + message: str, + system_message: str = "", + response_type: str = "text", +): + conversation_config: ChatModelOptions = ConversationAdapters.get_default_conversation_config() + + if conversation_config is None: + raise HTTPException(status_code=500, detail="Contact the server administrator to set a default chat model.") + + chat_model = conversation_config.chat_model + max_tokens = conversation_config.max_prompt_size + + if conversation_config.model_type == "offline": + if state.offline_chat_processor_config is None or state.offline_chat_processor_config.loaded_model is None: + state.offline_chat_processor_config = OfflineChatProcessorModel(chat_model, max_tokens) + + loaded_model = state.offline_chat_processor_config.loaded_model + truncated_messages = generate_chatml_messages_with_context( + user_message=message, system_message=system_message, model_name=chat_model, loaded_model=loaded_model + ) + + return send_message_to_model_offline( + messages=truncated_messages, + loaded_model=loaded_model, + model=chat_model, + streaming=False, + ) + + elif conversation_config.model_type == "openai": + openai_chat_config = ConversationAdapters.get_openai_conversation_config() + api_key = openai_chat_config.api_key + truncated_messages = generate_chatml_messages_with_context( + user_message=message, system_message=system_message, model_name=chat_model + ) + + openai_response = send_message_to_model( + messages=truncated_messages, api_key=api_key, model=chat_model, response_type=response_type + ) + + return openai_response + else: + raise HTTPException(status_code=500, detail="Invalid conversation config") + + def generate_chat_response( q: str, meta_log: dict, @@ -790,16 +835,41 @@ class CommonQueryParamsClass: CommonQueryParams = Annotated[CommonQueryParamsClass, Depends()] -def scheduled_chat(query, user: KhojUser, calling_url: URL): - # Construct the URL, header for the chat API - scheme = "http" if calling_url.scheme == "http" or calling_url.scheme == "ws" else "https" - # Replace the original scheduling query with the scheduled query - query_dict = parse_qs(calling_url.query) - query_dict["q"] = [query] - # Convert the dictionary back into a query string - scheduled_query = urlencode(query_dict, doseq=True) - url = f"{scheme}://{calling_url.netloc}/api/chat?{scheduled_query}" +def should_notify(original_query: str, executed_query: str, ai_response: str) -> bool: + """ + Decide whether to notify the user of the AI response. + Default to notifying the user for now. + """ + if any(is_none_or_empty(message) for message in [original_query, executed_query, ai_response]): + return False + to_notify_or_not = prompts.to_notify_or_not.format( + original_query=original_query, + executed_query=executed_query, + response=ai_response, + ) + + with timer("Chat actor: Decide to notify user of AI response", logger): + try: + response = send_message_to_model_wrapper_sync(to_notify_or_not) + return "no" not in response.lower() + except: + return True + + +def scheduled_chat(executing_query: str, scheduling_query: str, user: KhojUser, calling_url: URL): + # Extract relevant params from the original URL + scheme = "http" if not calling_url.is_secure else "https" + query_dict = parse_qs(calling_url.query) + + # Replace the original scheduling query with the scheduled query + query_dict["q"] = [executing_query] + + # Construct the URL to call the chat API with the scheduled query string + encoded_query = urlencode(query_dict, doseq=True) + url = f"{scheme}://{calling_url.netloc}/api/chat?{encoded_query}" + + # Construct the Headers for the chat API headers = {"User-Agent": "Khoj"} if not state.anonymous_mode: # Add authorization request header in non-anonymous mode @@ -811,4 +881,20 @@ def scheduled_chat(query, user: KhojUser, calling_url: URL): headers["Authorization"] = f"Bearer {token}" # Call the chat API endpoint with authenticated user token and query - return requests.get(url, headers=headers) + raw_response = requests.get(url, headers=headers) + + # Stop if the chat API call was not successful + if raw_response.status_code != 200: + logger.error(f"Failed to run schedule chat: {raw_response.text}") + return None + + # Extract the AI response from the chat API response + if raw_response.headers.get("Content-Type") == "application/json": + response_map = raw_response.json() + ai_response = response_map.get("response") or response_map.get("image") + else: + ai_response = raw_response.text + + # Notify user if the AI response is satisfactory + if should_notify(original_query=scheduling_query, executed_query=executing_query, ai_response=ai_response): + return raw_response diff --git a/tests/test_openai_chat_actors.py b/tests/test_openai_chat_actors.py index 848a6139..c0b101ef 100644 --- a/tests/test_openai_chat_actors.py +++ b/tests/test_openai_chat_actors.py @@ -13,6 +13,7 @@ from khoj.routers.helpers import ( generate_online_subqueries, infer_webpage_urls, schedule_query, + should_notify, ) from khoj.utils.helpers import ConversationCommand from khoj.utils.rawconfig import LocationData @@ -571,6 +572,48 @@ async def test_infer_task_scheduling_request(chat_client, user_query, location, assert query in inferred_query.lower() +# ---------------------------------------------------------------------------------------------------- +@pytest.mark.anyio +@pytest.mark.django_db(transaction=True) +@pytest.mark.parametrize( + "scheduling_query, executing_query, generated_response, expected_should_notify", + [ + ( + "Notify me if it is going to rain tomorrow?", + "What's the weather forecast for tomorrow?", + "It is sunny and warm tomorrow.", + False, + ), + ( + "Summarize the latest news every morning", + "Summarize today's news", + "Today in the news: AI is taking over the world", + True, + ), + ( + "Create a weather wallpaper every morning using the current weather", + "Paint a weather wallpaper using the current weather", + "https://khoj-generated-wallpaper.khoj.dev/user110/weathervane.webp", + True, + ), + ( + "Let me know the election results once they are offically declared", + "What are the results of the elections? Has the winner been declared?", + "The election results has not been declared yet.", + False, + ), + ], +) +def test_decision_on_when_to_notify_scheduled_task_results( + chat_client, scheduling_query, executing_query, generated_response, expected_should_notify +): + # Act + generated_should_notify = should_notify(scheduling_query, executing_query, generated_response) + + # Assert + assert generated_should_notify == expected_should_notify + + # Helpers # ---------------------------------------------------------------------------------------------------- def populate_chat_history(message_list): From 22289a00022e7af12f7a13d9901c844ca0f1a435 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Mon, 22 Apr 2024 00:50:59 +0530 Subject: [PATCH 12/42] Improve task scheduling by using json mode and agent scratchpad - The task scheduling actor was having trouble calculating the timezone. Giving the actor a scratchpad to improve correctness by thinking step by step - Add more examples to reduce chances of the inferred query looping to create another reminder instead of running the query and sharing results with user - Improve task scheduling chat actor test with more tests and by ensuring unexpected words not present in response --- src/khoj/processor/conversation/prompts.py | 52 ++++++++++++++++++---- src/khoj/routers/helpers.py | 8 ++-- tests/test_openai_chat_actors.py | 45 +++++++++++++++---- 3 files changed, 84 insertions(+), 21 deletions(-) diff --git a/src/khoj/processor/conversation/prompts.py b/src/khoj/processor/conversation/prompts.py index 6e4fc761..46feab91 100644 --- a/src/khoj/processor/conversation/prompts.py +++ b/src/khoj/processor/conversation/prompts.py @@ -513,27 +513,61 @@ crontime_prompt = PromptTemplate.from_template( """ You are Khoj, an extremely smart and helpful task scheduling assistant - Given a user query, you infer the date, time to run the query at as a cronjob time string (converted to UTC time zone) -- Convert the cron job time to run in UTC -- Infer user's time zone from the current location provided in their message +- Convert the cron job time to run in UTC. Use the scratchpad to calculate the cron job time. +- Infer user's time zone from the current location provided in their message. Think step-by-step. - Use an approximate time that makes sense, if it not unspecified. -- Also extract the query to run at the scheduled time. Add any context required from the chat history to improve the query. +- Also extract the search query to run at the scheduled time. Add any context required from the chat history to improve the query. +- Return the scratchpad, cronjob time and the search query to run as a JSON object. # Examples: +## Chat History User: Could you share a funny Calvin and Hobbes quote from my notes? AI: Here is one I found: "It's not denial. I'm just selective about the reality I accept." -User: Hahah, nice! Show a new one every morning at 9am. My Current Location: Shanghai, China -Khoj: ["0 1 * * *", "Share a funny Calvin and Hobbes or Bill Watterson quote from my notes."] -User: Share the top weekly posts on Hacker News on Monday evenings. Format it as a newsletter. My Current Location: Nairobi, Kenya -Khoj: ["30 15 * * 1", "Top posts last week on Hacker News"] +User: Hahah, nice! Show a new one every morning at 9:40. My Current Location: Shanghai, China +Khoj: {{ + "Scratchpad": "Shanghai is UTC+8. So, 9:40 in Shanghai is 1:40 UTC. I'll also generalize the search query to get better results.", + "Crontime": "40 1 * * *", + "Query": "Share a funny Calvin and Hobbes or Bill Watterson quote from my notes." +}} +## Chat History + +User: Every Monday evening share the top posts on Hacker News from last week. Format it as a newsletter. My Current Location: Nairobi, Kenya +Khoj: {{ + "Scratchpad": "Nairobi is UTC+3. As evening specified, I'll share at 18:30 your time. Which will be 15:30 UTC.", + "Crontime": "30 15 * * 1", + "Query": "Top posts last week on Hacker News" +}} + +## Chat History User: What is the latest version of the Khoj python package? AI: The latest released Khoj python package version is 1.5.0. + User: Notify me when version 2.0.0 is released. My Current Location: Mexico City, Mexico -Khoj: ["0 16 * * *", "Check if the latest released version of the Khoj python package is >= 2.0.0?"] +Khoj: {{ + "Scratchpad": "Mexico City is UTC-6. No time is specified, so I'll notify at 10:00 your time. Which will be 16:00 in UTC. Also I'll ensure the search query doesn't trigger another reminder.", + "Crontime": "0 16 * * *", + "Query": "Check if the latest released version of the Khoj python package is >= 2.0.0?" +}} + +## Chat History User: Tell me the latest local tech news on the first Sunday of every Month. My Current Location: Dublin, Ireland -Khoj: ["0 9 1-7 * 0", "Latest tech, AI and engineering news from around Dublin, Ireland"] +Khoj: {{ + "Scratchpad": "Dublin is UTC+1. So, 10:00 in Dublin is 8:00 UTC. First Sunday of every month is 1-7. Also I'll enhance the search query.", + "Crontime": "0 9 1-7 * 0", + "Query": "Find the latest tech, AI and engineering news from around Dublin, Ireland" +}} + +## Chat History + +User: Inform me when the national election results are officially declared. Run task at 4pm every thursday. My Current Location: Trichy, India +Khoj: {{ + "Scratchpad": "Trichy is UTC+5:30. So, 4pm in Trichy is 10:30 UTC. Also let's add location details to the search query.", + "Crontime": "30 10 * * 4", + "Query": "Check if the Indian national election results are officially declared." +}} # Chat History: {chat_history} diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index 9c1f7149..fa4b6d02 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -336,15 +336,15 @@ async def schedule_query(q: str, location_data: LocationData, conversation_histo chat_history=chat_history, ) - raw_response = await send_message_to_model_wrapper(crontime_prompt) + raw_response = await send_message_to_model_wrapper(crontime_prompt, response_type="json_object") # Validate that the response is a non-empty, JSON-serializable list try: raw_response = raw_response.strip() - response: List[str] = json.loads(raw_response) - if not isinstance(response, list) or not response or len(response) != 2: + response: Dict[str, str] = json.loads(raw_response) + if not response or not isinstance(response, Dict) or len(response) != 3: raise AssertionError(f"Invalid response for scheduling query : {response}") - return tuple(response) + return tuple(response.values())[1:] except Exception: raise AssertionError(f"Invalid response for scheduling query: {raw_response}") diff --git a/tests/test_openai_chat_actors.py b/tests/test_openai_chat_actors.py index c0b101ef..7c4f5ee3 100644 --- a/tests/test_openai_chat_actors.py +++ b/tests/test_openai_chat_actors.py @@ -549,27 +549,56 @@ async def test_infer_webpage_urls_actor_extracts_correct_links(chat_client): @pytest.mark.anyio @pytest.mark.django_db(transaction=True) @pytest.mark.parametrize( - "user_query, location, expected_crontime, expected_queries", + "user_query, location, expected_crontime, expected_qs, unexpected_qs", [ ( - "Share the weather forecast for the next day at 19:30", - ("Boston", "MA", "USA"), - "30 23 * * *", - ["weather forecast", "boston"], + "Share the weather forecast for the next day daily at 7:30pm", + ("Ubud", "Bali", "Indonesia"), + "30 11 * * *", # ensure correctly converts to utc + ["weather forecast", "ubud"], + ["7:30"], + ), + ( + "Notify me when the new President of Brazil is announced", + ("Sao Paulo", "Sao Paulo", "Brazil"), + "* *", # crontime is variable + ["brazil", "president"], + ["notify"], # ensure reminder isn't re-triggered on scheduled query run + ), + ( + "Let me know whenever Elon leaves Twitter. Check this every afternoon at 12", + ("Karachi", "Sindh", "Pakistan"), + "0 7 * * *", # ensure correctly converts to utc + ["elon", "twitter"], + ["12"], + ), + ( + "Draw a wallpaper every morning using the current weather", + ("Bogota", "Cundinamarca", "Colombia"), + "* * *", # daily crontime + ["weather", "wallpaper", "bogota"], + ["every"], ), ], ) -async def test_infer_task_scheduling_request(chat_client, user_query, location, expected_crontime, expected_queries): +async def test_infer_task_scheduling_request( + chat_client, user_query, location, expected_crontime, expected_qs, unexpected_qs +): # Arrange location_data = LocationData(city=location[0], region=location[1], country=location[2]) # Act crontime, inferred_query = await schedule_query(user_query, location_data, {}) + inferred_query = inferred_query.lower() # Assert assert expected_crontime in crontime - for query in expected_queries: - assert query in inferred_query.lower() + for expected_q in expected_qs: + assert expected_q in inferred_query, f"Expected fragment {expected_q} in query: {inferred_query}" + for unexpected_q in unexpected_qs: + assert ( + unexpected_q not in inferred_query + ), f"Did not expect fragment '{unexpected_q}' in query: '{inferred_query}'" # ---------------------------------------------------------------------------------------------------- From 69775b6d6eab96103bb0d9574034e1885c2fa810 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Mon, 22 Apr 2024 04:19:30 +0530 Subject: [PATCH 13/42] Add /task command. Use it to disable scheduling tasks from tasks This takes the load of the task scheduling chat actor / prompt from having to artifically differentiate query to create scheduled task from a scheduled task run. --- src/khoj/processor/conversation/prompts.py | 10 +++++----- src/khoj/routers/api_chat.py | 11 +++++++---- src/khoj/routers/helpers.py | 7 ++++++- src/khoj/utils/helpers.py | 2 ++ 4 files changed, 20 insertions(+), 10 deletions(-) diff --git a/src/khoj/processor/conversation/prompts.py b/src/khoj/processor/conversation/prompts.py index 46feab91..2c5bea25 100644 --- a/src/khoj/processor/conversation/prompts.py +++ b/src/khoj/processor/conversation/prompts.py @@ -528,7 +528,7 @@ User: Hahah, nice! Show a new one every morning at 9:40. My Current Location: Sh Khoj: {{ "Scratchpad": "Shanghai is UTC+8. So, 9:40 in Shanghai is 1:40 UTC. I'll also generalize the search query to get better results.", "Crontime": "40 1 * * *", - "Query": "Share a funny Calvin and Hobbes or Bill Watterson quote from my notes." + "Query": "/task Share a funny Calvin and Hobbes or Bill Watterson quote from my notes." }} ## Chat History @@ -537,7 +537,7 @@ User: Every Monday evening share the top posts on Hacker News from last week. Fo Khoj: {{ "Scratchpad": "Nairobi is UTC+3. As evening specified, I'll share at 18:30 your time. Which will be 15:30 UTC.", "Crontime": "30 15 * * 1", - "Query": "Top posts last week on Hacker News" + "Query": "/task Top posts last week on Hacker News" }} ## Chat History @@ -548,7 +548,7 @@ User: Notify me when version 2.0.0 is released. My Current Location: Mexico City Khoj: {{ "Scratchpad": "Mexico City is UTC-6. No time is specified, so I'll notify at 10:00 your time. Which will be 16:00 in UTC. Also I'll ensure the search query doesn't trigger another reminder.", "Crontime": "0 16 * * *", - "Query": "Check if the latest released version of the Khoj python package is >= 2.0.0?" + "Query": "/task Check if the latest released version of the Khoj python package is >= 2.0.0?" }} ## Chat History @@ -557,7 +557,7 @@ User: Tell me the latest local tech news on the first Sunday of every Month. My Khoj: {{ "Scratchpad": "Dublin is UTC+1. So, 10:00 in Dublin is 8:00 UTC. First Sunday of every month is 1-7. Also I'll enhance the search query.", "Crontime": "0 9 1-7 * 0", - "Query": "Find the latest tech, AI and engineering news from around Dublin, Ireland" + "Query": "/task Find the latest tech, AI and engineering news from around Dublin, Ireland" }} ## Chat History @@ -566,7 +566,7 @@ User: Inform me when the national election results are officially declared. Run Khoj: {{ "Scratchpad": "Trichy is UTC+5:30. So, 4pm in Trichy is 10:30 UTC. Also let's add location details to the search query.", "Crontime": "30 10 * * 4", - "Query": "Check if the Indian national election results are officially declared." + "Query": "/task Check if the Indian national election results are officially declared." }} # Chat History: diff --git a/src/khoj/routers/api_chat.py b/src/khoj/routers/api_chat.py index bc980620..47bc7c70 100644 --- a/src/khoj/routers/api_chat.py +++ b/src/khoj/routers/api_chat.py @@ -379,13 +379,14 @@ async def websocket_endpoint( continue meta_log = conversation.conversation_log + is_task = conversation_commands == [ConversationCommand.Task] - if conversation_commands == [ConversationCommand.Default]: + if conversation_commands == [ConversationCommand.Default] or is_task: conversation_commands = await aget_relevant_information_sources(q, meta_log) conversation_commands_str = ", ".join([cmd.value for cmd in conversation_commands]) await send_status_update(f"**🗃️ Chose Data Sources to Search:** {conversation_commands_str}") - mode = await aget_relevant_output_modes(q, meta_log) + mode = await aget_relevant_output_modes(q, meta_log, is_task) await send_status_update(f"**🧑🏾‍💻 Decided Response Mode:** {mode.value}") if mode not in conversation_commands: conversation_commands.append(mode) @@ -638,9 +639,11 @@ async def chat( else: meta_log = conversation.conversation_log - if conversation_commands == [ConversationCommand.Default]: + is_task = conversation_commands == [ConversationCommand.Task] + + if conversation_commands == [ConversationCommand.Default] or is_task: conversation_commands = await aget_relevant_information_sources(q, meta_log) - mode = await aget_relevant_output_modes(q, meta_log) + mode = await aget_relevant_output_modes(q, meta_log, is_task) if mode not in conversation_commands: conversation_commands.append(mode) diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index fa4b6d02..0f1d3728 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -163,6 +163,8 @@ def get_conversation_command(query: str, any_references: bool = False) -> Conver return ConversationCommand.Online elif query.startswith("/image"): return ConversationCommand.Image + elif query.startswith("/task"): + return ConversationCommand.Task # If no relevant notes found for the given query elif not any_references: return ConversationCommand.General @@ -220,7 +222,7 @@ async def aget_relevant_information_sources(query: str, conversation_history: di return [ConversationCommand.Default] -async def aget_relevant_output_modes(query: str, conversation_history: dict): +async def aget_relevant_output_modes(query: str, conversation_history: dict, is_task: bool = False): """ Given a query, determine which of the available tools the agent should use in order to answer appropriately. """ @@ -229,6 +231,9 @@ async def aget_relevant_output_modes(query: str, conversation_history: dict): mode_options_str = "" for mode, description in mode_descriptions_for_llm.items(): + # Do not allow tasks to schedule another task + if is_task and mode == ConversationCommand.Reminder: + continue mode_options[mode.value] = description mode_options_str += f'- "{mode.value}": "{description}"\n' diff --git a/src/khoj/utils/helpers.py b/src/khoj/utils/helpers.py index 9ff402ab..1e85b679 100644 --- a/src/khoj/utils/helpers.py +++ b/src/khoj/utils/helpers.py @@ -305,6 +305,7 @@ class ConversationCommand(str, Enum): Webpage = "webpage" Image = "image" Reminder = "reminder" + Task = "task" command_descriptions = { @@ -315,6 +316,7 @@ command_descriptions = { ConversationCommand.Webpage: "Get information from webpage links provided by you.", ConversationCommand.Image: "Generate images by describing your imagination in words.", ConversationCommand.Reminder: "Schedule your query to run at a specified time or interval.", + ConversationCommand.Task: "Scheduled task running at previously specified schedule.", ConversationCommand.Help: "Display a help message with all available commands and other metadata.", } From a5ed4f2af28f6c2bfc083b5e614408c406839bb4 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Mon, 22 Apr 2024 03:40:34 +0530 Subject: [PATCH 14/42] Send email to share results of scheduled task --- src/khoj/interface/email/task.html | 41 ++++++++++++++++++++++++++++++ src/khoj/routers/email.py | 21 ++++++++++++++- src/khoj/routers/helpers.py | 9 +++++-- 3 files changed, 68 insertions(+), 3 deletions(-) create mode 100644 src/khoj/interface/email/task.html diff --git a/src/khoj/interface/email/task.html b/src/khoj/interface/email/task.html new file mode 100644 index 00000000..09035092 --- /dev/null +++ b/src/khoj/interface/email/task.html @@ -0,0 +1,41 @@ + + + + Khoj AI - Task + + + + + +
+
+

Merge AI with your brain

+

Hey {{name}}!

+

I've shared the results you'd requested below:

+ +
+
+ +

{{query}}

+
+

{{result}}

+
+
+

You can view, delete and manage your scheduled tasks on the settings page

+
+
+

- Khoj

+ + + + + + + + +
DocsGitHubTwitterLinkedInDiscord
+ + + diff --git a/src/khoj/routers/email.py b/src/khoj/routers/email.py index 86bf67ee..ba28d029 100644 --- a/src/khoj/routers/email.py +++ b/src/khoj/routers/email.py @@ -30,7 +30,7 @@ def is_resend_enabled(): return bool(RESEND_API_KEY) -async def send_welcome_email(name, email): +def send_welcome_email(name, email): if not is_resend_enabled(): logger.debug("Email sending disabled") return @@ -47,3 +47,22 @@ async def send_welcome_email(name, email): "html": html_content, } ) + + +def send_task_email(name, email, query, result): + if not is_resend_enabled(): + logger.debug("Email sending disabled") + return + + template = env.get_template("task.html") + + html_content = template.render(name=name, query=query, result=result) + + resend.Emails.send( + { + "from": "Khoj ", + "to": email, + "subject": f'✨ Your Task Results for "{query}"', + "html": html_content, + } + ) diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index 0f1d3728..8059733d 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -53,6 +53,7 @@ from khoj.processor.conversation.utils import ( generate_chatml_messages_with_context, save_to_conversation_log, ) +from khoj.routers.email import is_resend_enabled, send_task_email from khoj.routers.storage import upload_image from khoj.utils import state from khoj.utils.config import OfflineChatProcessorModel @@ -894,6 +895,7 @@ def scheduled_chat(executing_query: str, scheduling_query: str, user: KhojUser, return None # Extract the AI response from the chat API response + cleaned_query = scheduling_query.replace("/task", "", 1).strip() if raw_response.headers.get("Content-Type") == "application/json": response_map = raw_response.json() ai_response = response_map.get("response") or response_map.get("image") @@ -901,5 +903,8 @@ def scheduled_chat(executing_query: str, scheduling_query: str, user: KhojUser, ai_response = raw_response.text # Notify user if the AI response is satisfactory - if should_notify(original_query=scheduling_query, executed_query=executing_query, ai_response=ai_response): - return raw_response + if should_notify(original_query=scheduling_query, executed_query=cleaned_query, ai_response=ai_response): + if is_resend_enabled(): + send_task_email(user.get_short_name(), user.email, scheduling_query, ai_response) + else: + return raw_response From 0e0136246950894bf5c69a7f3182bba693060b6c Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Fri, 26 Apr 2024 23:24:31 +0530 Subject: [PATCH 15/42] Merge DB migrations from master with those from scheduled task feature --- .../database/migrations/0038_merge_20240426_1640.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 src/khoj/database/migrations/0038_merge_20240426_1640.py diff --git a/src/khoj/database/migrations/0038_merge_20240426_1640.py b/src/khoj/database/migrations/0038_merge_20240426_1640.py new file mode 100644 index 00000000..74cabb85 --- /dev/null +++ b/src/khoj/database/migrations/0038_merge_20240426_1640.py @@ -0,0 +1,12 @@ +# Generated by Django 4.2.10 on 2024-04-26 16:40 + +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("database", "0036_alter_processlock_name"), + ("database", "0037_searchmodelconfig_bi_encoder_docs_encode_config_and_more"), + ] + + operations: list = [] From 6736551ba3bc76279715f0c74f8bc8fba698a1ae Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Fri, 26 Apr 2024 23:31:40 +0530 Subject: [PATCH 16/42] Improve scheduled task text rendered in UI --- src/khoj/interface/web/config.html | 2 +- src/khoj/routers/api.py | 7 ++++++- src/khoj/routers/api_chat.py | 11 +++++++---- src/khoj/routers/helpers.py | 12 ++++++++++-- 4 files changed, 24 insertions(+), 8 deletions(-) diff --git a/src/khoj/interface/web/config.html b/src/khoj/interface/web/config.html index 882957d9..96e4fa55 100644 --- a/src/khoj/interface/web/config.html +++ b/src/khoj/interface/web/config.html @@ -280,7 +280,7 @@

Tasks

-

Manage your scheduled tasks handled by Khoj

+

Manage your scheduled tasks

diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py index 65a99e79..88078c9b 100644 --- a/src/khoj/routers/api.py +++ b/src/khoj/routers/api.py @@ -3,6 +3,7 @@ import json import logging import math import os +import re import time import uuid from typing import Any, Callable, List, Optional, Union @@ -397,7 +398,11 @@ def get_jobs(request: Request) -> Response: # Collate all tasks assigned by user that are still active tasks_info = [ - {"id": task.id, "name": task.name, "next": task.next_run_time.strftime("%Y-%m-%d %H:%M")} + { + "id": task.id, + "name": re.sub(r"^/task\s*", "", task.name), + "next": task.next_run_time.strftime("%Y-%m-%d %H:%M"), + } for task in tasks if task.id.startswith(f"job_{user.uuid}_") ] diff --git a/src/khoj/routers/api_chat.py b/src/khoj/routers/api_chat.py index 47bc7c70..1f24b3a2 100644 --- a/src/khoj/routers/api_chat.py +++ b/src/khoj/routers/api_chat.py @@ -3,6 +3,7 @@ import hashlib import json import logging import math +import re from datetime import datetime from typing import Dict, Optional from urllib.parse import unquote @@ -425,10 +426,11 @@ async def websocket_endpoint( f"Unable to schedule reminder. Ensure the reminder doesn't already exist." ) continue + unprefixed_inferred_query = re.sub(r"^\/task\s*", "", inferred_query) next_run_time = job.next_run_time.strftime("%Y-%m-%d %H:%M:%S") llm_response = f""" - ### 🕒 Scheduled Job -- Query: **"{inferred_query}"** + ### 🕒 Scheduled Task +- Query: **"{unprefixed_inferred_query}"** - Schedule: `{crontime}` - Next Run At: **{next_run_time}** UTC. """.strip() @@ -689,10 +691,11 @@ async def chat( status_code=500, ) + unprefixed_inferred_query = re.sub(r"^\/task\s*", "", inferred_query) next_run_time = job.next_run_time.strftime("%Y-%m-%d %H:%M:%S") llm_response = f""" - ### 🕒 Scheduled Job -- Query: **"{inferred_query}"** + ### 🕒 Scheduled Task +- Query: **"{unprefixed_inferred_query}"** - Schedule: `{crontime}` - Next Run At: **{next_run_time}** UTC.' """.strip() diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index 8059733d..e11c1cff 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -3,6 +3,7 @@ import base64 import io import json import logging +import re from concurrent.futures import ThreadPoolExecutor from datetime import datetime, timedelta, timezone from functools import partial @@ -858,8 +859,15 @@ def should_notify(original_query: str, executed_query: str, ai_response: str) -> with timer("Chat actor: Decide to notify user of AI response", logger): try: response = send_message_to_model_wrapper_sync(to_notify_or_not) - return "no" not in response.lower() + should_notify_result = "no" not in response.lower() + logger.info( + f'Decided to {"not " if not should_notify_result else ""}notify user of scheduled task response.' + ) + return should_notify_result except: + logger.warning( + f"Fallback to notify user of scheduled task response as failed to infer should notify or not." + ) return True @@ -895,7 +903,7 @@ def scheduled_chat(executing_query: str, scheduling_query: str, user: KhojUser, return None # Extract the AI response from the chat API response - cleaned_query = scheduling_query.replace("/task", "", 1).strip() + cleaned_query = re.sub(r"^/task\s*", "", scheduling_query).strip() if raw_response.headers.get("Content-Type") == "application/json": response_map = raw_response.json() ai_response = response_map.get("response") or response_map.get("image") From c17dbbeb92a8920e3a458876fa8bd4e7a33461a1 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Sat, 27 Apr 2024 00:56:49 +0530 Subject: [PATCH 17/42] Render next run time in user timezone in config, chat UIs - Pass timezone string from ipapi to khoj via clients - Pass this data from web, desktop and obsidian clients to server - Use user tz to render next run time of scheduled task in user tz --- pyproject.toml | 1 + src/interface/desktop/chat.html | 4 +++- src/interface/obsidian/src/chat_modal.ts | 4 +++- src/khoj/interface/web/chat.html | 8 ++++--- src/khoj/routers/api_chat.py | 27 ++++++++++++++++++------ 5 files changed, 33 insertions(+), 11 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 76928771..fa1db686 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -80,6 +80,7 @@ dependencies = [ "psutil >= 5.8.0", "huggingface-hub >= 0.22.2", "apscheduler ~= 3.10.0", + "pytz ~= 2024.1", ] dynamic = ["version"] diff --git a/src/interface/desktop/chat.html b/src/interface/desktop/chat.html index a1002ec9..c26fe4b5 100644 --- a/src/interface/desktop/chat.html +++ b/src/interface/desktop/chat.html @@ -40,6 +40,7 @@ let region = null; let city = null; let countryName = null; + let timezone = null; fetch("https://ipapi.co/json") .then(response => response.json()) @@ -47,6 +48,7 @@ region = data.region; city = data.city; countryName = data.country_name; + timezone = data.timezone; }) .catch(err => { console.log(err); @@ -463,7 +465,7 @@ } // Generate backend API URL to execute query - let chatApi = `${hostURL}/api/chat?q=${encodeURIComponent(query)}&n=${resultsCount}&client=web&stream=true&conversation_id=${conversationID}®ion=${region}&city=${city}&country=${countryName}`; + let chatApi = `${hostURL}/api/chat?q=${encodeURIComponent(query)}&n=${resultsCount}&client=web&stream=true&conversation_id=${conversationID}®ion=${region}&city=${city}&country=${countryName}&timezone=${timezone}`; let newResponseEl = document.createElement("div"); newResponseEl.classList.add("chat-message", "khoj"); diff --git a/src/interface/obsidian/src/chat_modal.ts b/src/interface/obsidian/src/chat_modal.ts index 504ce4db..31b938a1 100644 --- a/src/interface/obsidian/src/chat_modal.ts +++ b/src/interface/obsidian/src/chat_modal.ts @@ -15,6 +15,7 @@ export class KhojChatModal extends Modal { region: string; city: string; countryName: string; + timezone: string; constructor(app: App, setting: KhojSetting) { super(app); @@ -30,6 +31,7 @@ export class KhojChatModal extends Modal { this.region = data.region; this.city = data.city; this.countryName = data.country_name; + this.timezone = data.timezone; }) .catch(err => { console.log(err); @@ -393,7 +395,7 @@ export class KhojChatModal extends Modal { // Get chat response from Khoj backend let encodedQuery = encodeURIComponent(query); - let chatUrl = `${this.setting.khojUrl}/api/chat?q=${encodedQuery}&n=${this.setting.resultsCount}&client=obsidian&stream=true®ion=${this.region}&city=${this.city}&country=${this.countryName}`; + let chatUrl = `${this.setting.khojUrl}/api/chat?q=${encodedQuery}&n=${this.setting.resultsCount}&client=obsidian&stream=true®ion=${this.region}&city=${this.city}&country=${this.countryName}&timezone=${this.timezone}`; let responseElement = this.createKhojResponseDiv(); // Temporary status message to indicate that Khoj is thinking diff --git a/src/khoj/interface/web/chat.html b/src/khoj/interface/web/chat.html index ef35f35d..5302f311 100644 --- a/src/khoj/interface/web/chat.html +++ b/src/khoj/interface/web/chat.html @@ -58,6 +58,7 @@ To get started, just start typing below. You can also type / to see a list of co let region = null; let city = null; let countryName = null; + let timezone = null; let waitingForLocation = true; let websocketState = { @@ -74,13 +75,14 @@ To get started, just start typing below. You can also type / to see a list of co region = data.region; city = data.city; countryName = data.country_name; + timezone = data.timezone; }) .catch(err => { console.log(err); return; }) .finally(() => { - console.debug("Region:", region, "City:", city, "Country:", countryName); + console.debug("Region:", region, "City:", city, "Country:", countryName, "Timezone:", timezone); waitingForLocation = false; setupWebSocket(); }); @@ -511,7 +513,7 @@ To get started, just start typing below. You can also type / to see a list of co chatInput.classList.remove("option-enabled"); // Generate backend API URL to execute query - let url = `/api/chat?q=${encodeURIComponent(query)}&n=${resultsCount}&client=web&stream=true&conversation_id=${conversationID}®ion=${region}&city=${city}&country=${countryName}`; + let url = `/api/chat?q=${encodeURIComponent(query)}&n=${resultsCount}&client=web&stream=true&conversation_id=${conversationID}®ion=${region}&city=${city}&country=${countryName}&timezone=${timezone}`; // Call specified Khoj API let response = await fetch(url); @@ -906,7 +908,7 @@ To get started, just start typing below. You can also type / to see a list of co if (chatBody.dataset.conversationId) { webSocketUrl += `?conversation_id=${chatBody.dataset.conversationId}`; - webSocketUrl += (!!region && !!city && !!countryName) ? `®ion=${region}&city=${city}&country=${countryName}` : ''; + webSocketUrl += (!!region && !!city && !!countryName) && !!timezone ? `®ion=${region}&city=${city}&country=${countryName}&timezone=${timezone}` : ''; websocket = new WebSocket(webSocketUrl); websocket.onmessage = function(event) { diff --git a/src/khoj/routers/api_chat.py b/src/khoj/routers/api_chat.py index 1f24b3a2..58cc72bc 100644 --- a/src/khoj/routers/api_chat.py +++ b/src/khoj/routers/api_chat.py @@ -8,6 +8,7 @@ from datetime import datetime from typing import Dict, Optional from urllib.parse import unquote +import pytz from apscheduler.triggers.cron import CronTrigger from asgiref.sync import sync_to_async from fastapi import APIRouter, Depends, HTTPException, Request, WebSocket @@ -273,6 +274,7 @@ async def websocket_endpoint( city: Optional[str] = None, region: Optional[str] = None, country: Optional[str] = None, + timezone: Optional[str] = None, ): connection_alive = True @@ -426,13 +428,19 @@ async def websocket_endpoint( f"Unable to schedule reminder. Ensure the reminder doesn't already exist." ) continue + # Display next run time in user timezone instead of UTC + user_timezone = pytz.timezone(timezone) + next_run_time_utc = job.next_run_time.replace(tzinfo=pytz.utc) + next_run_time_user_tz = next_run_time_utc.astimezone(user_timezone) + next_run_time = next_run_time_user_tz.strftime("%Y-%m-%d %H:%M %Z (%z)") + # Remove /task prefix from inferred_query unprefixed_inferred_query = re.sub(r"^\/task\s*", "", inferred_query) - next_run_time = job.next_run_time.strftime("%Y-%m-%d %H:%M:%S") + # Create the scheduled task response llm_response = f""" ### 🕒 Scheduled Task - Query: **"{unprefixed_inferred_query}"** -- Schedule: `{crontime}` -- Next Run At: **{next_run_time}** UTC. +- Schedule: `{crontime}` UTC (+0000) +- Next Run At: **{next_run_time}**. """.strip() await sync_to_async(save_to_conversation_log)( @@ -608,6 +616,7 @@ async def chat( city: Optional[str] = None, region: Optional[str] = None, country: Optional[str] = None, + timezone: Optional[str] = None, rate_limiter_per_minute=Depends( ApiUserRateLimiter(requests=5, subscribed_requests=60, window=60, slug="chat_minute") ), @@ -691,13 +700,19 @@ async def chat( status_code=500, ) + # Display next run time in user timezone instead of UTC + user_timezone = pytz.timezone(timezone) + next_run_time_utc = job.next_run_time.replace(tzinfo=pytz.utc) + next_run_time_user_tz = next_run_time_utc.astimezone(user_timezone) + next_run_time = next_run_time_user_tz.strftime("%Y-%m-%d %H:%M %Z (%z)") + # Remove /task prefix from inferred_query unprefixed_inferred_query = re.sub(r"^\/task\s*", "", inferred_query) - next_run_time = job.next_run_time.strftime("%Y-%m-%d %H:%M:%S") + # Create the scheduled task response llm_response = f""" ### 🕒 Scheduled Task - Query: **"{unprefixed_inferred_query}"** -- Schedule: `{crontime}` -- Next Run At: **{next_run_time}** UTC.' +- Schedule: `{crontime}` UTC (+0000) +- Next Run At: **{next_run_time}**.' """.strip() await sync_to_async(save_to_conversation_log)( From 3ce06a938c5bda57e846462ba41b742a84dd0afd Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Sat, 27 Apr 2024 01:05:29 +0530 Subject: [PATCH 18/42] Render scheduled task response as html to improve readability in email --- pyproject.toml | 1 + src/khoj/routers/email.py | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index fa1db686..498be35e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -76,6 +76,7 @@ dependencies = [ "django-phonenumber-field == 7.3.0", "phonenumbers == 8.13.27", "markdownify ~= 0.11.6", + "markdown-it-py ~= 3.0.0", "websockets == 12.0", "psutil >= 5.8.0", "huggingface-hub >= 0.22.2", diff --git a/src/khoj/routers/email.py b/src/khoj/routers/email.py index ba28d029..96eb5d85 100644 --- a/src/khoj/routers/email.py +++ b/src/khoj/routers/email.py @@ -6,6 +6,7 @@ try: except ImportError: pass +import markdown_it from django.conf import settings from jinja2 import Environment, FileSystemLoader @@ -56,7 +57,8 @@ def send_task_email(name, email, query, result): template = env.get_template("task.html") - html_content = template.render(name=name, query=query, result=result) + html_result = markdown_it.MarkdownIt().render(result) + html_content = template.render(name=name, query=query, result=html_result) resend.Emails.send( { From 2c563ad28048fc4672bb631f0605f5080761c892 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Sat, 27 Apr 2024 01:55:08 +0530 Subject: [PATCH 19/42] Use hash of query in process lock id to standardize id format - Using inferred_query directly was brittle (like previous job id) - And process lock id had a limited size, so wouldn't work for larger inferred query strings --- src/khoj/routers/api_chat.py | 6 ++++-- src/khoj/routers/email.py | 7 +++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/khoj/routers/api_chat.py b/src/khoj/routers/api_chat.py index 58cc72bc..4e2f38ab 100644 --- a/src/khoj/routers/api_chat.py +++ b/src/khoj/routers/api_chat.py @@ -407,6 +407,7 @@ async def websocket_endpoint( continue # Generate the job id from the hash of inferred_query and crontime job_id = hashlib.md5(f"{inferred_query}_{crontime}".encode("utf-8")).hexdigest() + query_id = hashlib.md5(f"{inferred_query}".encode("utf-8")).hexdigest() partial_scheduled_chat = functools.partial( scheduled_chat, inferred_query, q, websocket.user.object, websocket.url ) @@ -416,7 +417,7 @@ async def websocket_endpoint( trigger=trigger, args=( partial_scheduled_chat, - f"{ProcessLock.Operation.SCHEDULED_JOB}_{user.uuid}_{inferred_query}", + f"{ProcessLock.Operation.SCHEDULED_JOB}_{user.uuid}_{query_id}", ), id=f"job_{user.uuid}_{job_id}", name=f"{inferred_query}", @@ -682,12 +683,13 @@ async def chat( # Generate the job id from the hash of inferred_query and crontime job_id = hashlib.md5(f"{inferred_query}_{crontime}".encode("utf-8")).hexdigest() + query_id = hashlib.md5(f"{inferred_query}".encode("utf-8")).hexdigest() partial_scheduled_chat = functools.partial(scheduled_chat, inferred_query, q, request.user.object, request.url) try: job = state.scheduler.add_job( run_with_process_lock, trigger=trigger, - args=(partial_scheduled_chat, f"{ProcessLock.Operation.SCHEDULED_JOB}_{user.uuid}_{inferred_query}"), + args=(partial_scheduled_chat, f"{ProcessLock.Operation.SCHEDULED_JOB}_{user.uuid}_{query_id}"), id=f"job_{user.uuid}_{job_id}", name=f"{inferred_query}", max_instances=2, # Allow second instance to kill any previous instance with stale lock diff --git a/src/khoj/routers/email.py b/src/khoj/routers/email.py index 96eb5d85..8e6464d3 100644 --- a/src/khoj/routers/email.py +++ b/src/khoj/routers/email.py @@ -60,11 +60,14 @@ def send_task_email(name, email, query, result): html_result = markdown_it.MarkdownIt().render(result) html_content = template.render(name=name, query=query, result=html_result) - resend.Emails.send( + query_for_subject_line = query.replace("\n", " ").replace('"', "").replace("'", "") + + r = resend.Emails.send( { "from": "Khoj ", "to": email, - "subject": f'✨ Your Task Results for "{query}"', + "subject": f'✨ Your Task Results for "{query_for_subject_line}"', "html": html_content, } ) + return r From 8dfa0bf047202c9711b65eecfb8d6b988660db8c Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Mon, 29 Apr 2024 11:44:16 +0530 Subject: [PATCH 20/42] Simplify task scheduler prompt. No timezone conversion. Infer subject - Make timezone aware scheduling programmatic, instead of asking the chat model to do the conversion. This removes the need for scratchpad and may let smaller models handle the task as well - Make chat model infer subject for email. This should make the notification email more readable - Improve email by using subject in email subject, task heading. Move query to email final paragraph, which is where task metadata should go --- src/khoj/interface/email/task.html | 9 ++-- src/khoj/processor/conversation/prompts.py | 50 +++++++++++----------- src/khoj/routers/api_chat.py | 41 +++++++++--------- src/khoj/routers/email.py | 6 +-- src/khoj/routers/helpers.py | 10 ++--- 5 files changed, 54 insertions(+), 62 deletions(-) diff --git a/src/khoj/interface/email/task.html b/src/khoj/interface/email/task.html index 09035092..86a801ac 100644 --- a/src/khoj/interface/email/task.html +++ b/src/khoj/interface/email/task.html @@ -11,19 +11,20 @@
-

Merge AI with your brain

+

Your Open, Personal AI

Hey {{name}}!

-

I've shared the results you'd requested below:

+

I've shared your scheduled task results below:

-

You can view, delete and manage your scheduled tasks on the settings page

+

The scheduled query I ran on your behalf: {query}

+

You can view, delete and manage your scheduled tasks via the settings page

- Khoj

diff --git a/src/khoj/processor/conversation/prompts.py b/src/khoj/processor/conversation/prompts.py index 2c5bea25..dd22ecd3 100644 --- a/src/khoj/processor/conversation/prompts.py +++ b/src/khoj/processor/conversation/prompts.py @@ -512,67 +512,65 @@ Khoj: crontime_prompt = PromptTemplate.from_template( """ You are Khoj, an extremely smart and helpful task scheduling assistant -- Given a user query, you infer the date, time to run the query at as a cronjob time string (converted to UTC time zone) -- Convert the cron job time to run in UTC. Use the scratchpad to calculate the cron job time. -- Infer user's time zone from the current location provided in their message. Think step-by-step. +- Given a user query, infer the date, time to run the query at as a cronjob time string - Use an approximate time that makes sense, if it not unspecified. - Also extract the search query to run at the scheduled time. Add any context required from the chat history to improve the query. -- Return the scratchpad, cronjob time and the search query to run as a JSON object. +- Return a JSON object with the cronjob time, the search query to run and the task subject in it. # Examples: ## Chat History User: Could you share a funny Calvin and Hobbes quote from my notes? AI: Here is one I found: "It's not denial. I'm just selective about the reality I accept." -User: Hahah, nice! Show a new one every morning at 9:40. My Current Location: Shanghai, China +User: Hahah, nice! Show a new one every morning. Khoj: {{ - "Scratchpad": "Shanghai is UTC+8. So, 9:40 in Shanghai is 1:40 UTC. I'll also generalize the search query to get better results.", - "Crontime": "40 1 * * *", - "Query": "/task Share a funny Calvin and Hobbes or Bill Watterson quote from my notes." + "crontime": "0 9 * * *", + "query": "/task Share a funny Calvin and Hobbes or Bill Watterson quote from my notes", + "subject": "Your Calvin and Hobbes Quote for the Day" }} ## Chat History -User: Every Monday evening share the top posts on Hacker News from last week. Format it as a newsletter. My Current Location: Nairobi, Kenya +User: Every monday evening at 6 share the top posts on hacker news from last week. Format it as a newsletter Khoj: {{ - "Scratchpad": "Nairobi is UTC+3. As evening specified, I'll share at 18:30 your time. Which will be 15:30 UTC.", - "Crontime": "30 15 * * 1", - "Query": "/task Top posts last week on Hacker News" + "crontime": "0 18 * * 1", + "query": "/task Top posts last week on Hacker News", + "subject": "Your Weekly Top Hacker News Posts Newsletter" }} ## Chat History -User: What is the latest version of the Khoj python package? +User: What is the latest version of the khoj python package? AI: The latest released Khoj python package version is 1.5.0. -User: Notify me when version 2.0.0 is released. My Current Location: Mexico City, Mexico +User: Notify me when version 2.0.0 is released Khoj: {{ - "Scratchpad": "Mexico City is UTC-6. No time is specified, so I'll notify at 10:00 your time. Which will be 16:00 in UTC. Also I'll ensure the search query doesn't trigger another reminder.", - "Crontime": "0 16 * * *", - "Query": "/task Check if the latest released version of the Khoj python package is >= 2.0.0?" + "crontime": "0 10 * * *", + "query": "/task What is the latest released version of the Khoj python package?", + "subject": "Khoj Python Package Version 2.0.0 Release" }} ## Chat History -User: Tell me the latest local tech news on the first Sunday of every Month. My Current Location: Dublin, Ireland +User: Tell me the latest local tech news on the first sunday of every month Khoj: {{ - "Scratchpad": "Dublin is UTC+1. So, 10:00 in Dublin is 8:00 UTC. First Sunday of every month is 1-7. Also I'll enhance the search query.", - "Crontime": "0 9 1-7 * 0", - "Query": "/task Find the latest tech, AI and engineering news from around Dublin, Ireland" + "crontime": "0 8 1-7 * 0", + "query": "/task Find the latest local tech, AI and engineering news. Format it as a newsletter.", + "subject": "Your Monthly Dose of Local Tech News" }} ## Chat History -User: Inform me when the national election results are officially declared. Run task at 4pm every thursday. My Current Location: Trichy, India +User: Inform me when the national election results are declared. Run task at 4pm every thursday. Khoj: {{ - "Scratchpad": "Trichy is UTC+5:30. So, 4pm in Trichy is 10:30 UTC. Also let's add location details to the search query.", - "Crontime": "30 10 * * 4", - "Query": "/task Check if the Indian national election results are officially declared." + "crontime": "0 16 * * 4", + "query": "/task Check if the Indian national election results are officially declared", + "subject": "Indian National Election Results Declared" }} # Chat History: {chat_history} -User: {query}. My Current Location: {user_location} +User: {query} Khoj: """.strip() ) diff --git a/src/khoj/routers/api_chat.py b/src/khoj/routers/api_chat.py index 4e2f38ab..b6119e12 100644 --- a/src/khoj/routers/api_chat.py +++ b/src/khoj/routers/api_chat.py @@ -399,17 +399,18 @@ async def websocket_endpoint( q = q.replace(f"/{cmd.value}", "").strip() if ConversationCommand.Reminder in conversation_commands: - crontime, inferred_query = await schedule_query(q, location, meta_log) + user_timezone = pytz.timezone(timezone) + crontime, inferred_query, subject = await schedule_query(q, location, meta_log) try: - trigger = CronTrigger.from_crontab(crontime) + trigger = CronTrigger.from_crontab(crontime, user_timezone) except ValueError as e: await send_complete_llm_response(f"Unable to create reminder with crontime schedule: {crontime}") continue # Generate the job id from the hash of inferred_query and crontime - job_id = hashlib.md5(f"{inferred_query}_{crontime}".encode("utf-8")).hexdigest() + job_id = f"job_{user.uuid}_" + hashlib.md5(f"{inferred_query}_{crontime}".encode("utf-8")).hexdigest() query_id = hashlib.md5(f"{inferred_query}".encode("utf-8")).hexdigest() partial_scheduled_chat = functools.partial( - scheduled_chat, inferred_query, q, websocket.user.object, websocket.url + scheduled_chat, inferred_query, q, subject, websocket.user.object, websocket.url ) try: job = state.scheduler.add_job( @@ -419,7 +420,7 @@ async def websocket_endpoint( partial_scheduled_chat, f"{ProcessLock.Operation.SCHEDULED_JOB}_{user.uuid}_{query_id}", ), - id=f"job_{user.uuid}_{job_id}", + id=job_id, name=f"{inferred_query}", max_instances=2, # Allow second instance to kill any previous instance with stale lock jitter=30, @@ -430,17 +431,15 @@ async def websocket_endpoint( ) continue # Display next run time in user timezone instead of UTC - user_timezone = pytz.timezone(timezone) - next_run_time_utc = job.next_run_time.replace(tzinfo=pytz.utc) - next_run_time_user_tz = next_run_time_utc.astimezone(user_timezone) - next_run_time = next_run_time_user_tz.strftime("%Y-%m-%d %H:%M %Z (%z)") + next_run_time = job.next_run_time.strftime("%Y-%m-%d %H:%M %Z (%z)") # Remove /task prefix from inferred_query unprefixed_inferred_query = re.sub(r"^\/task\s*", "", inferred_query) # Create the scheduled task response llm_response = f""" ### 🕒 Scheduled Task - Query: **"{unprefixed_inferred_query}"** -- Schedule: `{crontime}` UTC (+0000) +- Subject: **{subject}** +- Schedule: `{crontime}` - Next Run At: **{next_run_time}**. """.strip() @@ -671,9 +670,10 @@ async def chat( user_name = await aget_user_name(user) if ConversationCommand.Reminder in conversation_commands: - crontime, inferred_query = await schedule_query(q, location, meta_log) + user_timezone = pytz.timezone(timezone) + crontime, inferred_query, subject = await schedule_query(q, location, meta_log) try: - trigger = CronTrigger.from_crontab(crontime) + trigger = CronTrigger.from_crontab(crontime, user_timezone) except ValueError as e: return Response( content=f"Unable to create reminder with crontime schedule: {crontime}", @@ -682,15 +682,17 @@ async def chat( ) # Generate the job id from the hash of inferred_query and crontime - job_id = hashlib.md5(f"{inferred_query}_{crontime}".encode("utf-8")).hexdigest() + job_id = f"job_{user.uuid}_" + hashlib.md5(f"{inferred_query}_{crontime}".encode("utf-8")).hexdigest() query_id = hashlib.md5(f"{inferred_query}".encode("utf-8")).hexdigest() - partial_scheduled_chat = functools.partial(scheduled_chat, inferred_query, q, request.user.object, request.url) + partial_scheduled_chat = functools.partial( + scheduled_chat, inferred_query, q, subject, request.user.object, request.url + ) try: job = state.scheduler.add_job( run_with_process_lock, trigger=trigger, args=(partial_scheduled_chat, f"{ProcessLock.Operation.SCHEDULED_JOB}_{user.uuid}_{query_id}"), - id=f"job_{user.uuid}_{job_id}", + id=job_id, name=f"{inferred_query}", max_instances=2, # Allow second instance to kill any previous instance with stale lock jitter=30, @@ -701,19 +703,16 @@ async def chat( media_type="text/plain", status_code=500, ) - # Display next run time in user timezone instead of UTC - user_timezone = pytz.timezone(timezone) - next_run_time_utc = job.next_run_time.replace(tzinfo=pytz.utc) - next_run_time_user_tz = next_run_time_utc.astimezone(user_timezone) - next_run_time = next_run_time_user_tz.strftime("%Y-%m-%d %H:%M %Z (%z)") + next_run_time = job.next_run_time.strftime("%Y-%m-%d %H:%M %Z (%z)") # Remove /task prefix from inferred_query unprefixed_inferred_query = re.sub(r"^\/task\s*", "", inferred_query) # Create the scheduled task response llm_response = f""" ### 🕒 Scheduled Task - Query: **"{unprefixed_inferred_query}"** -- Schedule: `{crontime}` UTC (+0000) +- Subject: **{subject}** +- Schedule: `{crontime}` - Next Run At: **{next_run_time}**.' """.strip() diff --git a/src/khoj/routers/email.py b/src/khoj/routers/email.py index 8e6464d3..bb5cdd5c 100644 --- a/src/khoj/routers/email.py +++ b/src/khoj/routers/email.py @@ -50,7 +50,7 @@ def send_welcome_email(name, email): ) -def send_task_email(name, email, query, result): +def send_task_email(name, email, query, result, subject): if not is_resend_enabled(): logger.debug("Email sending disabled") return @@ -60,13 +60,11 @@ def send_task_email(name, email, query, result): html_result = markdown_it.MarkdownIt().render(result) html_content = template.render(name=name, query=query, result=html_result) - query_for_subject_line = query.replace("\n", " ").replace('"', "").replace("'", "") - r = resend.Emails.send( { "from": "Khoj ", "to": email, - "subject": f'✨ Your Task Results for "{query_for_subject_line}"', + "subject": f"✨ {subject}", "html": html_content, } ) diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index e11c1cff..c6974ef5 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -332,14 +332,10 @@ async def schedule_query(q: str, location_data: LocationData, conversation_histo """ Schedule the date, time to run the query. Assume the server timezone is UTC. """ - user_location = ( - f"{location_data.city}, {location_data.region}, {location_data.country}" if location_data else "Greenwich" - ) chat_history = construct_chat_history(conversation_history) crontime_prompt = prompts.crontime_prompt.format( query=q, - user_location=user_location, chat_history=chat_history, ) @@ -351,7 +347,7 @@ async def schedule_query(q: str, location_data: LocationData, conversation_histo response: Dict[str, str] = json.loads(raw_response) if not response or not isinstance(response, Dict) or len(response) != 3: raise AssertionError(f"Invalid response for scheduling query : {response}") - return tuple(response.values())[1:] + return response.get("crontime"), response.get("query"), response.get("subject") except Exception: raise AssertionError(f"Invalid response for scheduling query: {raw_response}") @@ -871,7 +867,7 @@ def should_notify(original_query: str, executed_query: str, ai_response: str) -> return True -def scheduled_chat(executing_query: str, scheduling_query: str, user: KhojUser, calling_url: URL): +def scheduled_chat(executing_query: str, scheduling_query: str, subject: str, user: KhojUser, calling_url: URL): # Extract relevant params from the original URL scheme = "http" if not calling_url.is_secure else "https" query_dict = parse_qs(calling_url.query) @@ -913,6 +909,6 @@ def scheduled_chat(executing_query: str, scheduling_query: str, user: KhojUser, # Notify user if the AI response is satisfactory if should_notify(original_query=scheduling_query, executed_query=cleaned_query, ai_response=ai_response): if is_resend_enabled(): - send_task_email(user.get_short_name(), user.email, scheduling_query, ai_response) + send_task_email(user.get_short_name(), user.email, scheduling_query, ai_response, subject) else: return raw_response From ae10ff4a5f86fd8c0aaf9bf9635ec1ea339531ca Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Mon, 29 Apr 2024 15:52:39 +0530 Subject: [PATCH 21/42] Create create_scheduled_task func to dedupe logic across ws, http APIs Previously, both the websocket and http endpoint were implementing the same logic. This was becoming too unwieldy --- src/khoj/database/adapters/__init__.py | 8 +-- src/khoj/routers/api_chat.py | 81 +++++--------------------- src/khoj/routers/helpers.py | 36 ++++++++++++ 3 files changed, 53 insertions(+), 72 deletions(-) diff --git a/src/khoj/database/adapters/__init__.py b/src/khoj/database/adapters/__init__.py index f32e3b8b..d4175704 100644 --- a/src/khoj/database/adapters/__init__.py +++ b/src/khoj/database/adapters/__init__.py @@ -429,7 +429,7 @@ class ProcessLockAdapters: return ProcessLock.objects.filter(name=process_name).delete() @staticmethod - def run_with_lock(func: Callable, operation: ProcessLock.Operation, max_duration_in_seconds: int = 600): + def run_with_lock(func: Callable, operation: ProcessLock.Operation, max_duration_in_seconds: int = 600, **kwargs): # Exit early if process lock is already taken if ProcessLockAdapters.is_process_locked(operation): logger.info(f"🔒 Skip executing {func} as {operation} lock is already taken") @@ -443,7 +443,7 @@ class ProcessLockAdapters: # Execute Function with timer(f"🔒 Run {func} with {operation} process lock", logger): - func() + func(**kwargs) success = True except Exception as e: logger.error(f"🚨 Error executing {func} with {operation} process lock: {e}", exc_info=True) @@ -454,11 +454,11 @@ class ProcessLockAdapters: logger.info(f"🔓 Unlocked {operation} process after executing {func} {'Succeeded' if success else 'Failed'}") -def run_with_process_lock(*args): +def run_with_process_lock(*args, **kwargs): """Wrapper function used for scheduling jobs. Required as APScheduler can't discover the `ProcessLockAdapter.run_with_lock' method on its own. """ - return ProcessLockAdapters.run_with_lock(*args) + return ProcessLockAdapters.run_with_lock(*args, **kwargs) class ClientApplicationAdapters: diff --git a/src/khoj/routers/api_chat.py b/src/khoj/routers/api_chat.py index b6119e12..fe628e69 100644 --- a/src/khoj/routers/api_chat.py +++ b/src/khoj/routers/api_chat.py @@ -1,5 +1,3 @@ -import functools -import hashlib import json import logging import math @@ -8,8 +6,6 @@ from datetime import datetime from typing import Dict, Optional from urllib.parse import unquote -import pytz -from apscheduler.triggers.cron import CronTrigger from asgiref.sync import sync_to_async from fastapi import APIRouter, Depends, HTTPException, Request, WebSocket from fastapi.requests import Request @@ -18,13 +14,8 @@ from starlette.authentication import requires from starlette.websockets import WebSocketDisconnect from websockets import ConnectionClosedOK -from khoj.database.adapters import ( - ConversationAdapters, - EntryAdapters, - aget_user_name, - run_with_process_lock, -) -from khoj.database.models import KhojUser, ProcessLock +from khoj.database.adapters import ConversationAdapters, EntryAdapters, aget_user_name +from khoj.database.models import KhojUser from khoj.processor.conversation.prompts import ( help_message, no_entries_found, @@ -45,10 +36,9 @@ from khoj.routers.helpers import ( agenerate_chat_response, aget_relevant_information_sources, aget_relevant_output_modes, + create_scheduled_task, get_conversation_command, is_ready_to_chat, - schedule_query, - scheduled_chat, text_to_image, update_telemetry_state, validate_conversation_config, @@ -399,36 +389,13 @@ async def websocket_endpoint( q = q.replace(f"/{cmd.value}", "").strip() if ConversationCommand.Reminder in conversation_commands: - user_timezone = pytz.timezone(timezone) - crontime, inferred_query, subject = await schedule_query(q, location, meta_log) try: - trigger = CronTrigger.from_crontab(crontime, user_timezone) - except ValueError as e: - await send_complete_llm_response(f"Unable to create reminder with crontime schedule: {crontime}") - continue - # Generate the job id from the hash of inferred_query and crontime - job_id = f"job_{user.uuid}_" + hashlib.md5(f"{inferred_query}_{crontime}".encode("utf-8")).hexdigest() - query_id = hashlib.md5(f"{inferred_query}".encode("utf-8")).hexdigest() - partial_scheduled_chat = functools.partial( - scheduled_chat, inferred_query, q, subject, websocket.user.object, websocket.url - ) - try: - job = state.scheduler.add_job( - run_with_process_lock, - trigger=trigger, - args=( - partial_scheduled_chat, - f"{ProcessLock.Operation.SCHEDULED_JOB}_{user.uuid}_{query_id}", - ), - id=job_id, - name=f"{inferred_query}", - max_instances=2, # Allow second instance to kill any previous instance with stale lock - jitter=30, - ) - except: - await send_complete_llm_response( - f"Unable to schedule reminder. Ensure the reminder doesn't already exist." + job, crontime, inferred_query, subject = await create_scheduled_task( + q, location, timezone, user, websocket.url, meta_log ) + except Exception as e: + logger.error(f"Error scheduling task {q} for {user.email}: {e}") + await send_complete_llm_response(f"Unable to schedule task. Ensure the task doesn't already exist.") continue # Display next run time in user timezone instead of UTC next_run_time = job.next_run_time.strftime("%Y-%m-%d %H:%M %Z (%z)") @@ -670,36 +637,14 @@ async def chat( user_name = await aget_user_name(user) if ConversationCommand.Reminder in conversation_commands: - user_timezone = pytz.timezone(timezone) - crontime, inferred_query, subject = await schedule_query(q, location, meta_log) try: - trigger = CronTrigger.from_crontab(crontime, user_timezone) - except ValueError as e: - return Response( - content=f"Unable to create reminder with crontime schedule: {crontime}", - media_type="text/plain", - status_code=500, + job, crontime, inferred_query, subject = await create_scheduled_task( + q, location, timezone, user, request.url, meta_log ) - - # Generate the job id from the hash of inferred_query and crontime - job_id = f"job_{user.uuid}_" + hashlib.md5(f"{inferred_query}_{crontime}".encode("utf-8")).hexdigest() - query_id = hashlib.md5(f"{inferred_query}".encode("utf-8")).hexdigest() - partial_scheduled_chat = functools.partial( - scheduled_chat, inferred_query, q, subject, request.user.object, request.url - ) - try: - job = state.scheduler.add_job( - run_with_process_lock, - trigger=trigger, - args=(partial_scheduled_chat, f"{ProcessLock.Operation.SCHEDULED_JOB}_{user.uuid}_{query_id}"), - id=job_id, - name=f"{inferred_query}", - max_instances=2, # Allow second instance to kill any previous instance with stale lock - jitter=30, - ) - except: + except Exception as e: + logger.error(f"Error scheduling task {q} for {user.email}: {e}") return Response( - content=f"Unable to schedule reminder. Ensure the reminder doesn't already exist.", + content=f"Unable to schedule task. Ensure the task doesn't already exist.", media_type="text/plain", status_code=500, ) diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index c6974ef5..194dae8a 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -1,5 +1,6 @@ import asyncio import base64 +import hashlib import io import json import logging @@ -21,7 +22,9 @@ from typing import ( from urllib.parse import parse_qs, urlencode import openai +import pytz import requests +from apscheduler.triggers.cron import CronTrigger from fastapi import Depends, Header, HTTPException, Request, UploadFile from PIL import Image from starlette.authentication import has_required_scope @@ -33,12 +36,14 @@ from khoj.database.adapters import ( EntryAdapters, create_khoj_token, get_khoj_tokens, + run_with_process_lock, ) from khoj.database.models import ( ChatModelOptions, ClientApplication, Conversation, KhojUser, + ProcessLock, Subscription, TextToImageModelConfig, UserRequests, @@ -912,3 +917,34 @@ def scheduled_chat(executing_query: str, scheduling_query: str, subject: str, us send_task_email(user.get_short_name(), user.email, scheduling_query, ai_response, subject) else: return raw_response + + +async def create_scheduled_task( + q: str, location: LocationData, timezone: str, user: KhojUser, calling_url: URL, meta_log: dict = {} +): + user_timezone = pytz.timezone(timezone) + crontime, inferred_query, subject = await schedule_query(q, location, meta_log) + trigger = CronTrigger.from_crontab(crontime, user_timezone) + # Generate id and metadata used by task scheduler and process locks for the task runs + job_id = f"job_{user.uuid}_" + hashlib.md5(f"{inferred_query}_{crontime}".encode("utf-8")).hexdigest() + query_id = hashlib.md5(f"{inferred_query}".encode("utf-8")).hexdigest() + job = state.scheduler.add_job( + run_with_process_lock, + trigger=trigger, + args=( + scheduled_chat, + f"{ProcessLock.Operation.SCHEDULED_JOB}_{user.uuid}_{query_id}", + ), + kwargs={ + "executing_query": inferred_query, + "scheduling_query": q, + "subject": subject, + "user": user, + "calling_url": calling_url, + }, + id=job_id, + name=f"{inferred_query}", + max_instances=2, # Allow second instance to kill any previous instance with stale lock + jitter=30, + ) + return job, crontime, inferred_query, subject From d341b1efe8d9a4136990ca459f4bc99bdffa424a Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Mon, 29 Apr 2024 17:27:11 +0530 Subject: [PATCH 22/42] Store, retrieve task metadata from the job name field --- src/khoj/routers/api.py | 27 +++++++++++++++++---------- src/khoj/routers/helpers.py | 7 +++++-- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py index 88078c9b..60042815 100644 --- a/src/khoj/routers/api.py +++ b/src/khoj/routers/api.py @@ -397,15 +397,17 @@ def get_jobs(request: Request) -> Response: tasks: list[Job] = state.scheduler.get_jobs() # Collate all tasks assigned by user that are still active - tasks_info = [ - { - "id": task.id, - "name": re.sub(r"^/task\s*", "", task.name), - "next": task.next_run_time.strftime("%Y-%m-%d %H:%M"), - } - for task in tasks - if task.id.startswith(f"job_{user.uuid}_") - ] + tasks_info = [] + for task in tasks: + if task.id.startswith(f"job_{user.uuid}_"): + task_metadata = json.loads(task.name) + tasks_info.append( + { + "id": task.id, + "name": re.sub(r"^/task\s*", "", task_metadata["inferred_query"]), + "next": task.next_run_time.strftime("%Y-%m-%d %H:%M"), + } + ) # Return tasks information as a JSON response return Response(content=json.dumps(tasks_info), media_type="application/json", status_code=200) @@ -426,7 +428,12 @@ def delete_job(request: Request, task_id: str) -> Response: return Response(content="Invalid job", status_code=403) # Collate info about user task to be deleted - task_info = {"id": task.id, "name": task.name, "next": task.next_run_time.strftime("%Y-%m-%d %H:%MS")} + task_metadata = json.loads(task.name) + task_info = { + "id": task.id, + "name": task_metadata["inferred_query"], + "next": task.next_run_time.strftime("%Y-%m-%d %H:%MS"), + } # Delete job task.remove() diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index 194dae8a..10067216 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -926,8 +926,11 @@ async def create_scheduled_task( crontime, inferred_query, subject = await schedule_query(q, location, meta_log) trigger = CronTrigger.from_crontab(crontime, user_timezone) # Generate id and metadata used by task scheduler and process locks for the task runs - job_id = f"job_{user.uuid}_" + hashlib.md5(f"{inferred_query}_{crontime}".encode("utf-8")).hexdigest() + job_metadata = json.dumps( + {"inferred_query": inferred_query, "original_query": q, "subject": subject, "crontime": crontime} + ) query_id = hashlib.md5(f"{inferred_query}".encode("utf-8")).hexdigest() + job_id = f"job_{user.uuid}_{crontime}_{query_id}" job = state.scheduler.add_job( run_with_process_lock, trigger=trigger, @@ -943,7 +946,7 @@ async def create_scheduled_task( "calling_url": calling_url, }, id=job_id, - name=f"{inferred_query}", + name=job_metadata, max_instances=2, # Allow second instance to kill any previous instance with stale lock jitter=30, ) From 230d160602957db27a9e069ed0b2655701576ed1 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Mon, 29 Apr 2024 18:54:54 +0530 Subject: [PATCH 23/42] Improve rendering task scheduled settings view and message - Render crontime string in natural language in message & settings UI - Show more fields in tasks web config UI - Add link to the tasks settings page in task scheduled chat response - Improve task variables names Rename executing_query to query_to_run. scheduling_query to scheduling_request --- pyproject.toml | 1 + src/khoj/interface/web/config.html | 17 ++++++++++++----- src/khoj/routers/api.py | 11 +++++++++-- src/khoj/routers/api_chat.py | 23 +++++++++++++++-------- src/khoj/routers/helpers.py | 26 +++++++++++++------------- 5 files changed, 50 insertions(+), 28 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 498be35e..4c32a1d2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -82,6 +82,7 @@ dependencies = [ "huggingface-hub >= 0.22.2", "apscheduler ~= 3.10.0", "pytz ~= 2024.1", + "cron-descriptor == 1.4.3", ] dynamic = ["version"] diff --git a/src/khoj/interface/web/config.html b/src/khoj/interface/web/config.html index 96e4fa55..a17cd0f9 100644 --- a/src/khoj/interface/web/config.html +++ b/src/khoj/interface/web/config.html @@ -286,7 +286,9 @@
- + + + @@ -674,12 +676,17 @@ function generateTaskRow(taskObj) { let taskId = taskObj.id; - let taskName = taskObj.name; - let taskNextRun = taskObj.next; + let taskSchedulingRequest = taskObj.scheduling_request; + let taskQuery = taskObj.query_to_run; + let taskSubject = taskObj.subject; + let taskNextRun = `Next run at ${taskObj.next}`; + let taskSchedule = taskObj.schedule; return ` - - + + + + diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py index 60042815..114e2f11 100644 --- a/src/khoj/routers/api.py +++ b/src/khoj/routers/api.py @@ -8,6 +8,7 @@ import time import uuid from typing import Any, Callable, List, Optional, Union +import cron_descriptor from apscheduler.job import Job from asgiref.sync import sync_to_async from fastapi import APIRouter, Depends, File, HTTPException, Request, UploadFile @@ -401,11 +402,17 @@ def get_jobs(request: Request) -> Response: for task in tasks: if task.id.startswith(f"job_{user.uuid}_"): task_metadata = json.loads(task.name) + schedule = ( + f'{cron_descriptor.get_description(task_metadata["crontime"])} {task.next_run_time.strftime("%Z")}' + ) tasks_info.append( { "id": task.id, - "name": re.sub(r"^/task\s*", "", task_metadata["inferred_query"]), - "next": task.next_run_time.strftime("%Y-%m-%d %H:%M"), + "subject": task_metadata["subject"], + "query_to_run": re.sub(r"^/task\s*", "", task_metadata["query_to_run"]), + "scheduling_request": task_metadata["scheduling_request"], + "schedule": schedule, + "next": task.next_run_time.strftime("%Y-%m-%d %I:%M %p %Z"), } ) diff --git a/src/khoj/routers/api_chat.py b/src/khoj/routers/api_chat.py index fe628e69..5aee7ac0 100644 --- a/src/khoj/routers/api_chat.py +++ b/src/khoj/routers/api_chat.py @@ -6,6 +6,7 @@ from datetime import datetime from typing import Dict, Optional from urllib.parse import unquote +import cron_descriptor from asgiref.sync import sync_to_async from fastapi import APIRouter, Depends, HTTPException, Request, WebSocket from fastapi.requests import Request @@ -398,16 +399,19 @@ async def websocket_endpoint( await send_complete_llm_response(f"Unable to schedule task. Ensure the task doesn't already exist.") continue # Display next run time in user timezone instead of UTC - next_run_time = job.next_run_time.strftime("%Y-%m-%d %H:%M %Z (%z)") + schedule = f'{cron_descriptor.get_description(crontime)} {job.next_run_time.strftime("%Z")}' + next_run_time = job.next_run_time.strftime("%Y-%m-%d %H:%M %Z") # Remove /task prefix from inferred_query unprefixed_inferred_query = re.sub(r"^\/task\s*", "", inferred_query) # Create the scheduled task response llm_response = f""" ### 🕒 Scheduled Task -- Query: **"{unprefixed_inferred_query}"** - Subject: **{subject}** -- Schedule: `{crontime}` -- Next Run At: **{next_run_time}**. +- Query: "{unprefixed_inferred_query}" +- Schedule: `{schedule}` +- Next Run At: {next_run_time} + +Manage your tasks [here](/config#tasks). """.strip() await sync_to_async(save_to_conversation_log)( @@ -649,16 +653,19 @@ async def chat( status_code=500, ) # Display next run time in user timezone instead of UTC - next_run_time = job.next_run_time.strftime("%Y-%m-%d %H:%M %Z (%z)") + schedule = f'{cron_descriptor.get_description(crontime)} {job.next_run_time.strftime("%Z")}' + next_run_time = job.next_run_time.strftime("%Y-%m-%d %H:%M %Z") # Remove /task prefix from inferred_query unprefixed_inferred_query = re.sub(r"^\/task\s*", "", inferred_query) # Create the scheduled task response llm_response = f""" ### 🕒 Scheduled Task -- Query: **"{unprefixed_inferred_query}"** - Subject: **{subject}** -- Schedule: `{crontime}` -- Next Run At: **{next_run_time}**.' +- Query: "{unprefixed_inferred_query}" +- Schedule: `{schedule}` +- Next Run At: {next_run_time} + +Manage your tasks [here](/config#tasks). """.strip() await sync_to_async(save_to_conversation_log)( diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index 10067216..358975b5 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -872,13 +872,13 @@ def should_notify(original_query: str, executed_query: str, ai_response: str) -> return True -def scheduled_chat(executing_query: str, scheduling_query: str, subject: str, user: KhojUser, calling_url: URL): +def scheduled_chat(query_to_run: str, scheduling_request: str, subject: str, user: KhojUser, calling_url: URL): # Extract relevant params from the original URL scheme = "http" if not calling_url.is_secure else "https" query_dict = parse_qs(calling_url.query) # Replace the original scheduling query with the scheduled query - query_dict["q"] = [executing_query] + query_dict["q"] = [query_to_run] # Construct the URL to call the chat API with the scheduled query string encoded_query = urlencode(query_dict, doseq=True) @@ -904,7 +904,7 @@ def scheduled_chat(executing_query: str, scheduling_query: str, subject: str, us return None # Extract the AI response from the chat API response - cleaned_query = re.sub(r"^/task\s*", "", scheduling_query).strip() + cleaned_query = re.sub(r"^/task\s*", "", query_to_run).strip() if raw_response.headers.get("Content-Type") == "application/json": response_map = raw_response.json() ai_response = response_map.get("response") or response_map.get("image") @@ -912,9 +912,9 @@ def scheduled_chat(executing_query: str, scheduling_query: str, subject: str, us ai_response = raw_response.text # Notify user if the AI response is satisfactory - if should_notify(original_query=scheduling_query, executed_query=cleaned_query, ai_response=ai_response): + if should_notify(original_query=scheduling_request, executed_query=cleaned_query, ai_response=ai_response): if is_resend_enabled(): - send_task_email(user.get_short_name(), user.email, scheduling_query, ai_response, subject) + send_task_email(user.get_short_name(), user.email, scheduling_request, ai_response, subject) else: return raw_response @@ -923,14 +923,14 @@ async def create_scheduled_task( q: str, location: LocationData, timezone: str, user: KhojUser, calling_url: URL, meta_log: dict = {} ): user_timezone = pytz.timezone(timezone) - crontime, inferred_query, subject = await schedule_query(q, location, meta_log) - trigger = CronTrigger.from_crontab(crontime, user_timezone) + crontime_string, query_to_run, subject = await schedule_query(q, location, meta_log) + trigger = CronTrigger.from_crontab(crontime_string, user_timezone) # Generate id and metadata used by task scheduler and process locks for the task runs job_metadata = json.dumps( - {"inferred_query": inferred_query, "original_query": q, "subject": subject, "crontime": crontime} + {"query_to_run": query_to_run, "scheduling_request": q, "subject": subject, "crontime": crontime_string} ) - query_id = hashlib.md5(f"{inferred_query}".encode("utf-8")).hexdigest() - job_id = f"job_{user.uuid}_{crontime}_{query_id}" + query_id = hashlib.md5(f"{query_to_run}".encode("utf-8")).hexdigest() + job_id = f"job_{user.uuid}_{crontime_string}_{query_id}" job = state.scheduler.add_job( run_with_process_lock, trigger=trigger, @@ -939,8 +939,8 @@ async def create_scheduled_task( f"{ProcessLock.Operation.SCHEDULED_JOB}_{user.uuid}_{query_id}", ), kwargs={ - "executing_query": inferred_query, - "scheduling_query": q, + "query_to_run": query_to_run, + "scheduling_request": q, "subject": subject, "user": user, "calling_url": calling_url, @@ -950,4 +950,4 @@ async def create_scheduled_task( max_instances=2, # Allow second instance to kill any previous instance with stale lock jitter=30, ) - return job, crontime, inferred_query, subject + return job, crontime_string, query_to_run, subject From 2f9241b5a343e67d4770be39fdd1b9c472fee999 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Mon, 29 Apr 2024 20:41:07 +0530 Subject: [PATCH 24/42] Rename scheduled task to automations across code and UX - Fix query, subject parameters passed to email template - Show 12 hour scheduled time in automation created chat message --- src/khoj/interface/email/task.html | 8 +-- src/khoj/interface/web/config.html | 66 +++++++++---------- src/khoj/processor/conversation/prompts.py | 12 ++-- src/khoj/processor/conversation/utils.py | 4 +- src/khoj/routers/api.py | 66 +++++++++---------- src/khoj/routers/api_chat.py | 73 +++++++++++----------- src/khoj/routers/email.py | 2 +- src/khoj/routers/helpers.py | 22 +++---- src/khoj/utils/helpers.py | 9 ++- 9 files changed, 128 insertions(+), 134 deletions(-) diff --git a/src/khoj/interface/email/task.html b/src/khoj/interface/email/task.html index 86a801ac..1e78ce34 100644 --- a/src/khoj/interface/email/task.html +++ b/src/khoj/interface/email/task.html @@ -1,7 +1,7 @@ - Khoj AI - Task + Khoj AI - Automation @@ -13,7 +13,7 @@

Your Open, Personal AI

Hey {{name}}!

-

I've shared your scheduled task results below:

+

I've shared your automation results below:

@@ -23,8 +23,8 @@

{{result}}

-

The scheduled query I ran on your behalf: {query}

-

You can view, delete and manage your scheduled tasks via the settings page

+

The automation query I ran on your behalf: {{query}}

+

You can view, delete your automations via the settings page

- Khoj

diff --git a/src/khoj/interface/web/config.html b/src/khoj/interface/web/config.html index a17cd0f9..e9c24d06 100644 --- a/src/khoj/interface/web/config.html +++ b/src/khoj/interface/web/config.html @@ -272,17 +272,17 @@ {% endif %} {% endif %} -
-

Scheduled Tasks

-
+
+

Automations

+
- Scheduled Tasks -

Tasks

+ Automations +

Automations

-

Manage your scheduled tasks

+

Manage your automations

-
NameNext RunScheduling RequestQuery to RunSchedule Actions
${taskName}${taskNextRun}${taskSubject}${taskSchedulingRequest}${taskQuery}${taskSchedule} Delete Task
+
@@ -292,10 +292,10 @@ - +
NameActions
-
@@ -661,46 +661,42 @@ // List user's API keys on page load listApiKeys(); - function deleteTask(taskId) { - const scheduledTaskList = document.getElementById("scheduled-tasks-list"); - fetch(`/api/task?task_id=${taskId}`, { + function deleteAutomation(automationId) { + const AutomationList = document.getElementById("automations-list"); + fetch(`/api/automation?automation_id=${automationId}`, { method: 'DELETE', }) .then(response => { if (response.status == 200) { - const scheduledTaskItem = document.getElementById(`scheduled-task-item-${taskId}`); - scheduledTaskList.removeChild(scheduledTaskItem); + const AutomationItem = document.getElementById(`automation-item-${automationId}`); + AutomationList.removeChild(AutomationItem); } }); } - function generateTaskRow(taskObj) { - let taskId = taskObj.id; - let taskSchedulingRequest = taskObj.scheduling_request; - let taskQuery = taskObj.query_to_run; - let taskSubject = taskObj.subject; - let taskNextRun = `Next run at ${taskObj.next}`; - let taskSchedule = taskObj.schedule; + function generateAutomationRow(automationObj) { + let automationId = automationObj.id; + let automationNextRun = `Next run at ${automationObj.next}`; return ` - - ${taskSubject} - ${taskSchedulingRequest} - ${taskQuery} - ${taskSchedule} + + ${automationObj.subject} + ${automationObj.scheduling_request} + ${automationObj.query_to_run} + ${automationObj.schedule} - Delete Task + Delete Automation `; } - function listScheduledTasks() { - const scheduledTasksList = document.getElementById("scheduled-tasks-list"); - fetch('/api/tasks') + function listAutomations() { + const AutomationsList = document.getElementById("automations-list"); + fetch('/api/automations') .then(response => response.json()) - .then(tasks => { - if (!tasks?.length > 0) return; - scheduledTasksList.innerHTML = tasks.map(generateTaskRow).join(""); + .then(automations => { + if (!automations?.length > 0) return; + AutomationsList.innerHTML = automations.map(generateAutomationRow).join(""); }); } @@ -713,8 +709,8 @@ }); } - // List user's scheduled tasks on page load - listScheduledTasks(); + // List user's automations on page load + listAutomations(); function removeFile(path) { fetch('/api/config/data/file?filename=' + path, { diff --git a/src/khoj/processor/conversation/prompts.py b/src/khoj/processor/conversation/prompts.py index dd22ecd3..a95bbe73 100644 --- a/src/khoj/processor/conversation/prompts.py +++ b/src/khoj/processor/conversation/prompts.py @@ -507,7 +507,7 @@ Khoj: """.strip() ) -# Schedule task +# Automations # -- crontime_prompt = PromptTemplate.from_template( """ @@ -525,7 +525,7 @@ AI: Here is one I found: "It's not denial. I'm just selective about the reality User: Hahah, nice! Show a new one every morning. Khoj: {{ "crontime": "0 9 * * *", - "query": "/task Share a funny Calvin and Hobbes or Bill Watterson quote from my notes", + "query": "/automated_task Share a funny Calvin and Hobbes or Bill Watterson quote from my notes", "subject": "Your Calvin and Hobbes Quote for the Day" }} @@ -534,7 +534,7 @@ Khoj: {{ User: Every monday evening at 6 share the top posts on hacker news from last week. Format it as a newsletter Khoj: {{ "crontime": "0 18 * * 1", - "query": "/task Top posts last week on Hacker News", + "query": "/automated_task Top posts last week on Hacker News", "subject": "Your Weekly Top Hacker News Posts Newsletter" }} @@ -545,7 +545,7 @@ AI: The latest released Khoj python package version is 1.5.0. User: Notify me when version 2.0.0 is released Khoj: {{ "crontime": "0 10 * * *", - "query": "/task What is the latest released version of the Khoj python package?", + "query": "/automated_task What is the latest released version of the Khoj python package?", "subject": "Khoj Python Package Version 2.0.0 Release" }} @@ -554,7 +554,7 @@ Khoj: {{ User: Tell me the latest local tech news on the first sunday of every month Khoj: {{ "crontime": "0 8 1-7 * 0", - "query": "/task Find the latest local tech, AI and engineering news. Format it as a newsletter.", + "query": "/automated_task Find the latest local tech, AI and engineering news. Format it as a newsletter.", "subject": "Your Monthly Dose of Local Tech News" }} @@ -563,7 +563,7 @@ Khoj: {{ User: Inform me when the national election results are declared. Run task at 4pm every thursday. Khoj: {{ "crontime": "0 16 * * 4", - "query": "/task Check if the Indian national election results are officially declared", + "query": "/automated_task Check if the Indian national election results are officially declared", "subject": "Indian National Election Results Declared" }} diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py index 6ef7016d..775848c8 100644 --- a/src/khoj/processor/conversation/utils.py +++ b/src/khoj/processor/conversation/utils.py @@ -102,7 +102,7 @@ def save_to_conversation_log( intent_type: str = "remember", client_application: ClientApplication = None, conversation_id: int = None, - job_id: str = None, + automation_id: str = None, ): user_message_time = user_message_time or datetime.now().strftime("%Y-%m-%d %H:%M:%S") updated_conversation = message_to_log( @@ -113,7 +113,7 @@ def save_to_conversation_log( "context": compiled_references, "intent": {"inferred-queries": inferred_queries, "type": intent_type}, "onlineContext": online_results, - "jobId": job_id, + "automationId": automation_id, }, conversation_log=meta_log.get("chat", []), ) diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py index 114e2f11..88148d78 100644 --- a/src/khoj/routers/api.py +++ b/src/khoj/routers/api.py @@ -391,59 +391,59 @@ def user_info(request: Request) -> Response: return Response(content=json.dumps(user_info), media_type="application/json", status_code=200) -@api.get("/tasks", response_class=Response) +@api.get("/automations", response_class=Response) @requires(["authenticated"]) -def get_jobs(request: Request) -> Response: +def get_automations(request: Request) -> Response: user: KhojUser = request.user.object - tasks: list[Job] = state.scheduler.get_jobs() + automations: list[Job] = state.scheduler.get_jobs() - # Collate all tasks assigned by user that are still active - tasks_info = [] - for task in tasks: - if task.id.startswith(f"job_{user.uuid}_"): - task_metadata = json.loads(task.name) - schedule = ( - f'{cron_descriptor.get_description(task_metadata["crontime"])} {task.next_run_time.strftime("%Z")}' - ) - tasks_info.append( + # Collate all automations created by user that are still active + automations_info = [] + for automation in automations: + if automation.id.startswith(f"automation_{user.uuid}_"): + automation_metadata = json.loads(automation.name) + crontime = automation_metadata["crontime"] + timezone = automation.next_run_time.strftime("%Z") + schedule = f"{cron_descriptor.get_description(crontime)} {timezone}" + automations_info.append( { - "id": task.id, - "subject": task_metadata["subject"], - "query_to_run": re.sub(r"^/task\s*", "", task_metadata["query_to_run"]), - "scheduling_request": task_metadata["scheduling_request"], + "id": automation.id, + "subject": automation_metadata["subject"], + "query_to_run": re.sub(r"^/automated_task\s*", "", automation_metadata["query_to_run"]), + "scheduling_request": automation_metadata["scheduling_request"], "schedule": schedule, - "next": task.next_run_time.strftime("%Y-%m-%d %I:%M %p %Z"), + "next": automation.next_run_time.strftime("%Y-%m-%d %I:%M %p %Z"), } ) # Return tasks information as a JSON response - return Response(content=json.dumps(tasks_info), media_type="application/json", status_code=200) + return Response(content=json.dumps(automations_info), media_type="application/json", status_code=200) -@api.delete("/task", response_class=Response) +@api.delete("/automation", response_class=Response) @requires(["authenticated"]) -def delete_job(request: Request, task_id: str) -> Response: +def delete_automation(request: Request, automation_id: str) -> Response: user: KhojUser = request.user.object # Perform validation checks - # Check if user is allowed to delete this task id - if not task_id.startswith(f"job_{user.uuid}_"): + # Check if user is allowed to delete this automation id + if not automation_id.startswith(f"automation_{user.uuid}_"): return Response(content="Unauthorized job deletion request", status_code=403) - # Check if task with this task id exist - task: Job = state.scheduler.get_job(job_id=task_id) - if not task: + # Check if automation with this id exist + automation: Job = state.scheduler.get_job(job_id=automation_id) + if not automation: return Response(content="Invalid job", status_code=403) # Collate info about user task to be deleted - task_metadata = json.loads(task.name) - task_info = { - "id": task.id, - "name": task_metadata["inferred_query"], - "next": task.next_run_time.strftime("%Y-%m-%d %H:%MS"), + automation_metadata = json.loads(automation.name) + automation_info = { + "id": automation.id, + "name": automation_metadata["query_to_run"], + "next": automation.next_run_time.strftime("%Y-%m-%d %I:%M %p %Z"), } # Delete job - task.remove() + automation.remove() - # Return delete task information as a JSON response - return Response(content=json.dumps(task_info), media_type="application/json", status_code=200) + # Return deleted automation information as a JSON response + return Response(content=json.dumps(automation_info), media_type="application/json", status_code=200) diff --git a/src/khoj/routers/api_chat.py b/src/khoj/routers/api_chat.py index 5aee7ac0..e4b1ad2c 100644 --- a/src/khoj/routers/api_chat.py +++ b/src/khoj/routers/api_chat.py @@ -37,7 +37,7 @@ from khoj.routers.helpers import ( agenerate_chat_response, aget_relevant_information_sources, aget_relevant_output_modes, - create_scheduled_task, + create_automation, get_conversation_command, is_ready_to_chat, text_to_image, @@ -217,7 +217,8 @@ async def chat_options( ) -> Response: cmd_options = {} for cmd in ConversationCommand: - cmd_options[cmd.value] = command_descriptions[cmd] + if cmd in command_descriptions: + cmd_options[cmd.value] = command_descriptions[cmd] update_telemetry_state( request=request, @@ -373,14 +374,14 @@ async def websocket_endpoint( continue meta_log = conversation.conversation_log - is_task = conversation_commands == [ConversationCommand.Task] + is_automated_task = conversation_commands == [ConversationCommand.AutomatedTask] - if conversation_commands == [ConversationCommand.Default] or is_task: + if conversation_commands == [ConversationCommand.Default] or is_automated_task: conversation_commands = await aget_relevant_information_sources(q, meta_log) conversation_commands_str = ", ".join([cmd.value for cmd in conversation_commands]) await send_status_update(f"**🗃️ Chose Data Sources to Search:** {conversation_commands_str}") - mode = await aget_relevant_output_modes(q, meta_log, is_task) + mode = await aget_relevant_output_modes(q, meta_log, is_automated_task) await send_status_update(f"**🧑🏾‍💻 Decided Response Mode:** {mode.value}") if mode not in conversation_commands: conversation_commands.append(mode) @@ -389,29 +390,31 @@ async def websocket_endpoint( await conversation_command_rate_limiter.update_and_check_if_valid(websocket, cmd) q = q.replace(f"/{cmd.value}", "").strip() - if ConversationCommand.Reminder in conversation_commands: + if ConversationCommand.Automation in conversation_commands: try: - job, crontime, inferred_query, subject = await create_scheduled_task( + automation, crontime, query_to_run, subject = await create_automation( q, location, timezone, user, websocket.url, meta_log ) except Exception as e: logger.error(f"Error scheduling task {q} for {user.email}: {e}") - await send_complete_llm_response(f"Unable to schedule task. Ensure the task doesn't already exist.") + await send_complete_llm_response( + f"Unable to create automation. Ensure the automation doesn't already exist." + ) continue # Display next run time in user timezone instead of UTC - schedule = f'{cron_descriptor.get_description(crontime)} {job.next_run_time.strftime("%Z")}' - next_run_time = job.next_run_time.strftime("%Y-%m-%d %H:%M %Z") - # Remove /task prefix from inferred_query - unprefixed_inferred_query = re.sub(r"^\/task\s*", "", inferred_query) - # Create the scheduled task response + schedule = f'{cron_descriptor.get_description(crontime)} {automation.next_run_time.strftime("%Z")}' + next_run_time = automation.next_run_time.strftime("%Y-%m-%d %I:%M %p %Z") + # Remove /automated_task prefix from inferred_query + unprefixed_query_to_run = re.sub(r"^\/automated_task\s*", "", query_to_run) + # Create the automation response llm_response = f""" - ### 🕒 Scheduled Task + ### 🕒 Automation - Subject: **{subject}** -- Query: "{unprefixed_inferred_query}" +- Query to Run: "{unprefixed_query_to_run}" - Schedule: `{schedule}` - Next Run At: {next_run_time} -Manage your tasks [here](/config#tasks). +Manage your tasks [here](/config#automations). """.strip() await sync_to_async(save_to_conversation_log)( @@ -420,11 +423,11 @@ Manage your tasks [here](/config#tasks). user, meta_log, user_message_time, - intent_type="reminder", + intent_type="automation", client_application=websocket.user.client_app, conversation_id=conversation_id, - inferred_queries=[inferred_query], - job_id=job.id, + inferred_queries=[query_to_run], + automation_id=automation.id, ) common = CommonQueryParamsClass( client=websocket.user.client_app, @@ -621,7 +624,7 @@ async def chat( else: meta_log = conversation.conversation_log - is_task = conversation_commands == [ConversationCommand.Task] + is_task = conversation_commands == [ConversationCommand.AutomatedTask] if conversation_commands == [ConversationCommand.Default] or is_task: conversation_commands = await aget_relevant_information_sources(q, meta_log) @@ -640,32 +643,32 @@ async def chat( user_name = await aget_user_name(user) - if ConversationCommand.Reminder in conversation_commands: + if ConversationCommand.Automation in conversation_commands: try: - job, crontime, inferred_query, subject = await create_scheduled_task( + automation, crontime, query_to_run, subject = await create_automation( q, location, timezone, user, request.url, meta_log ) except Exception as e: - logger.error(f"Error scheduling task {q} for {user.email}: {e}") + logger.error(f"Error creating automation {q} for {user.email}: {e}") return Response( - content=f"Unable to schedule task. Ensure the task doesn't already exist.", + content=f"Unable to create automation. Ensure the automation doesn't already exist.", media_type="text/plain", status_code=500, ) # Display next run time in user timezone instead of UTC - schedule = f'{cron_descriptor.get_description(crontime)} {job.next_run_time.strftime("%Z")}' - next_run_time = job.next_run_time.strftime("%Y-%m-%d %H:%M %Z") - # Remove /task prefix from inferred_query - unprefixed_inferred_query = re.sub(r"^\/task\s*", "", inferred_query) - # Create the scheduled task response + schedule = f'{cron_descriptor.get_description(crontime)} {automation.next_run_time.strftime("%Z")}' + next_run_time = automation.next_run_time.strftime("%Y-%m-%d %I:%M %p %Z") + # Remove /automated_task prefix from inferred_query + unprefixed_query_to_run = re.sub(r"^\/automated_task\s*", "", query_to_run) + # Create the Automation response llm_response = f""" - ### 🕒 Scheduled Task + ### 🕒 Automation - Subject: **{subject}** -- Query: "{unprefixed_inferred_query}" +- Query to Run: "{unprefixed_query_to_run}" - Schedule: `{schedule}` - Next Run At: {next_run_time} -Manage your tasks [here](/config#tasks). +Manage your automations [here](/config#automations). """.strip() await sync_to_async(save_to_conversation_log)( @@ -674,11 +677,11 @@ Manage your tasks [here](/config#tasks). user, meta_log, user_message_time, - intent_type="reminder", + intent_type="automation", client_application=request.user.client_app, conversation_id=conversation_id, - inferred_queries=[inferred_query], - job_id=job.id, + inferred_queries=[query_to_run], + automation_id=automation.id, ) if stream: diff --git a/src/khoj/routers/email.py b/src/khoj/routers/email.py index bb5cdd5c..cb0c39c7 100644 --- a/src/khoj/routers/email.py +++ b/src/khoj/routers/email.py @@ -58,7 +58,7 @@ def send_task_email(name, email, query, result, subject): template = env.get_template("task.html") html_result = markdown_it.MarkdownIt().render(result) - html_content = template.render(name=name, query=query, result=html_result) + html_content = template.render(name=name, subject=subject, query=query, result=html_result) r = resend.Emails.send( { diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index 358975b5..29cf95e6 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -170,8 +170,8 @@ def get_conversation_command(query: str, any_references: bool = False) -> Conver return ConversationCommand.Online elif query.startswith("/image"): return ConversationCommand.Image - elif query.startswith("/task"): - return ConversationCommand.Task + elif query.startswith("/automated_task"): + return ConversationCommand.AutomatedTask # If no relevant notes found for the given query elif not any_references: return ConversationCommand.General @@ -239,7 +239,7 @@ async def aget_relevant_output_modes(query: str, conversation_history: dict, is_ for mode, description in mode_descriptions_for_llm.items(): # Do not allow tasks to schedule another task - if is_task and mode == ConversationCommand.Reminder: + if is_task and mode == ConversationCommand.Automation: continue mode_options[mode.value] = description mode_options_str += f'- "{mode.value}": "{description}"\n' @@ -857,18 +857,14 @@ def should_notify(original_query: str, executed_query: str, ai_response: str) -> response=ai_response, ) - with timer("Chat actor: Decide to notify user of AI response", logger): + with timer("Chat actor: Decide to notify user of automation response", logger): try: response = send_message_to_model_wrapper_sync(to_notify_or_not) should_notify_result = "no" not in response.lower() - logger.info( - f'Decided to {"not " if not should_notify_result else ""}notify user of scheduled task response.' - ) + logger.info(f'Decided to {"not " if not should_notify_result else ""}notify user of automation response.') return should_notify_result except: - logger.warning( - f"Fallback to notify user of scheduled task response as failed to infer should notify or not." - ) + logger.warning(f"Fallback to notify user of automation response as failed to infer should notify or not.") return True @@ -904,7 +900,7 @@ def scheduled_chat(query_to_run: str, scheduling_request: str, subject: str, use return None # Extract the AI response from the chat API response - cleaned_query = re.sub(r"^/task\s*", "", query_to_run).strip() + cleaned_query = re.sub(r"^/automated_task\s*", "", query_to_run).strip() if raw_response.headers.get("Content-Type") == "application/json": response_map = raw_response.json() ai_response = response_map.get("response") or response_map.get("image") @@ -919,7 +915,7 @@ def scheduled_chat(query_to_run: str, scheduling_request: str, subject: str, use return raw_response -async def create_scheduled_task( +async def create_automation( q: str, location: LocationData, timezone: str, user: KhojUser, calling_url: URL, meta_log: dict = {} ): user_timezone = pytz.timezone(timezone) @@ -930,7 +926,7 @@ async def create_scheduled_task( {"query_to_run": query_to_run, "scheduling_request": q, "subject": subject, "crontime": crontime_string} ) query_id = hashlib.md5(f"{query_to_run}".encode("utf-8")).hexdigest() - job_id = f"job_{user.uuid}_{crontime_string}_{query_id}" + job_id = f"automation_{user.uuid}_{crontime_string}_{query_id}" job = state.scheduler.add_job( run_with_process_lock, trigger=trigger, diff --git a/src/khoj/utils/helpers.py b/src/khoj/utils/helpers.py index 1e85b679..4b24b828 100644 --- a/src/khoj/utils/helpers.py +++ b/src/khoj/utils/helpers.py @@ -304,8 +304,8 @@ class ConversationCommand(str, Enum): Online = "online" Webpage = "webpage" Image = "image" - Reminder = "reminder" - Task = "task" + Automation = "automation" + AutomatedTask = "automated_task" command_descriptions = { @@ -315,8 +315,7 @@ command_descriptions = { ConversationCommand.Online: "Search for information on the internet.", ConversationCommand.Webpage: "Get information from webpage links provided by you.", ConversationCommand.Image: "Generate images by describing your imagination in words.", - ConversationCommand.Reminder: "Schedule your query to run at a specified time or interval.", - ConversationCommand.Task: "Scheduled task running at previously specified schedule.", + ConversationCommand.Automation: "Automatically run your query at a specified time or interval.", ConversationCommand.Help: "Display a help message with all available commands and other metadata.", } @@ -330,7 +329,7 @@ tool_descriptions_for_llm = { mode_descriptions_for_llm = { ConversationCommand.Image: "Use this if the user is requesting an image or visual response to their query.", - ConversationCommand.Reminder: "Use this if the user is requesting a response at a scheduled date or time.", + ConversationCommand.Automation: "Use this if the user is requesting a response at a scheduled date or time.", ConversationCommand.Default: "Use this if the other response modes don't seem to fit the query.", } From 23f2057868e3a1b48a4eb6d8cdfaed17b8ad79ea Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Mon, 29 Apr 2024 23:10:40 +0530 Subject: [PATCH 25/42] Allow creating automations from automation settings section in web ui - Create new POST API endpoint to create automations - Use it in the settings page on the web interface to create new automations This simplified managing automations from the setting page by allowing both delete and create from the same page --- src/khoj/interface/web/config.html | 26 +++++++++++++++++-- src/khoj/routers/api.py | 40 ++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 2 deletions(-) diff --git a/src/khoj/interface/web/config.html b/src/khoj/interface/web/config.html index e9c24d06..ceb09ff1 100644 --- a/src/khoj/interface/web/config.html +++ b/src/khoj/interface/web/config.html @@ -295,8 +295,8 @@
-
@@ -700,6 +700,28 @@ }); } + async function createAutomation() { + const scheduling_request = window.prompt("Describe the automation you want to create"); + if (!scheduling_request) return; + + const ip_response = await fetch("https://ipapi.co/json"); + const ip_data = await ip_response.json(); + + const query_string = `q=${scheduling_request}&city=${ip_data.city}®ion=${ip_data.region}&country=${ip_data.country_name}&timezone=${ip_data.timezone}`; + const automation_response = await fetch(`/api/automation?${query_string}`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + }); + if (!automation_response.ok) { + throw new Error(`Failed to create automation: ${automation_response.status}`); + } + + listAutomations(); + } + document.getElementById("create-automation").addEventListener("click", async () => { await createAutomation(); }); + function getIndexedDataSize() { document.getElementById("indexed-data-size").innerHTML = "Calculating..."; fetch('/api/config/index/size') diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py index 88148d78..663dc99a 100644 --- a/src/khoj/routers/api.py +++ b/src/khoj/routers/api.py @@ -32,6 +32,7 @@ from khoj.routers.helpers import ( ApiUserRateLimiter, CommonQueryParams, ConversationCommandRateLimiter, + create_automation, update_telemetry_state, ) from khoj.search_filter.date_filter import DateFilter @@ -447,3 +448,42 @@ def delete_automation(request: Request, automation_id: str) -> Response: # Return deleted automation information as a JSON response return Response(content=json.dumps(automation_info), media_type="application/json", status_code=200) + + +@api.post("/automation", response_class=Response) +@requires(["authenticated"]) +async def make_automation( + request: Request, + q: str, + city: Optional[str] = None, + region: Optional[str] = None, + country: Optional[str] = None, + timezone: Optional[str] = None, +) -> Response: + user: KhojUser = request.user.object + if city or region or country: + location = LocationData(city=city, region=region, country=country) + + # Create automation with scheduling query and location data + try: + automation, crontime, query_to_run, subject = await create_automation(q, location, timezone, user, request.url) + except Exception as e: + logger.error(f"Error creating automation {q} for {user.email}: {e}") + return Response( + content=f"Unable to create automation. Ensure the automation doesn't already exist.", + media_type="text/plain", + status_code=500, + ) + + # Collate info about the created user automation + schedule = f'{cron_descriptor.get_description(crontime)} {automation.next_run_time.strftime("%Z")}' + automation_info = { + "id": automation.id, + "subject": subject, + "query_to_run": query_to_run, + "scheduling_request": crontime, + "schedule": schedule, + "next": automation.next_run_time.strftime("%Y-%m-%d %I:%M %p %Z"), + } + # Return information about the created automation as a JSON response + return Response(content=json.dumps(automation_info), media_type="application/json", status_code=200) From cb2b1dccc54752ec7e8629ae7c8091d8ce26b36a Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Tue, 30 Apr 2024 02:18:36 +0530 Subject: [PATCH 26/42] Add icon for Automation feature. Replace old icons for delete, new --- .../interface/web/assets/icons/automation.svg | 37 +++++++++++++++++++ .../interface/web/assets/icons/copy-solid.svg | 1 - .../interface/web/assets/icons/delete.svg | 26 +++++++++++++ .../web/assets/icons/microphone-solid.svg | 1 - src/khoj/interface/web/assets/icons/new.svg | 23 ++++++++++++ .../web/assets/icons/trash-solid.svg | 1 - src/khoj/interface/web/base_config.html | 8 ++++ src/khoj/interface/web/chat.html | 4 ++ src/khoj/interface/web/config.html | 28 +++++++++----- src/khoj/routers/api_chat.py | 8 +++- 10 files changed, 122 insertions(+), 15 deletions(-) create mode 100644 src/khoj/interface/web/assets/icons/automation.svg delete mode 100644 src/khoj/interface/web/assets/icons/copy-solid.svg create mode 100644 src/khoj/interface/web/assets/icons/delete.svg delete mode 100644 src/khoj/interface/web/assets/icons/microphone-solid.svg create mode 100644 src/khoj/interface/web/assets/icons/new.svg delete mode 100644 src/khoj/interface/web/assets/icons/trash-solid.svg diff --git a/src/khoj/interface/web/assets/icons/automation.svg b/src/khoj/interface/web/assets/icons/automation.svg new file mode 100644 index 00000000..162dd9ba --- /dev/null +++ b/src/khoj/interface/web/assets/icons/automation.svg @@ -0,0 +1,37 @@ + + + + + + + diff --git a/src/khoj/interface/web/assets/icons/copy-solid.svg b/src/khoj/interface/web/assets/icons/copy-solid.svg deleted file mode 100644 index da7020be..00000000 --- a/src/khoj/interface/web/assets/icons/copy-solid.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/src/khoj/interface/web/assets/icons/delete.svg b/src/khoj/interface/web/assets/icons/delete.svg new file mode 100644 index 00000000..8e078275 --- /dev/null +++ b/src/khoj/interface/web/assets/icons/delete.svg @@ -0,0 +1,26 @@ + + + + + diff --git a/src/khoj/interface/web/assets/icons/microphone-solid.svg b/src/khoj/interface/web/assets/icons/microphone-solid.svg deleted file mode 100644 index 3fc4b91d..00000000 --- a/src/khoj/interface/web/assets/icons/microphone-solid.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/src/khoj/interface/web/assets/icons/new.svg b/src/khoj/interface/web/assets/icons/new.svg new file mode 100644 index 00000000..f27d95f6 --- /dev/null +++ b/src/khoj/interface/web/assets/icons/new.svg @@ -0,0 +1,23 @@ + + + + + diff --git a/src/khoj/interface/web/assets/icons/trash-solid.svg b/src/khoj/interface/web/assets/icons/trash-solid.svg deleted file mode 100644 index 768d80f8..00000000 --- a/src/khoj/interface/web/assets/icons/trash-solid.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/src/khoj/interface/web/base_config.html b/src/khoj/interface/web/base_config.html index 31020c05..ccd301c7 100644 --- a/src/khoj/interface/web/base_config.html +++ b/src/khoj/interface/web/base_config.html @@ -257,6 +257,14 @@ color: var(--leaf); } + img.automation-action-icon { + width: 16px; + padding-bottom: 2px; + } + img.automation-row-icon { + max-width: 24px; + } + img.configured-icon { max-width: 16px; } diff --git a/src/khoj/interface/web/chat.html b/src/khoj/interface/web/chat.html index 5302f311..cb18ba0a 100644 --- a/src/khoj/interface/web/chat.html +++ b/src/khoj/interface/web/chat.html @@ -2137,6 +2137,10 @@ To get started, just start typing below. You can also type / to see a list of co img.text-to-image { max-width: 60%; } + h3 > img.text-to-image { + height: 24px; + vertical-align: sub; + } #chat-footer { padding: 0; diff --git a/src/khoj/interface/web/config.html b/src/khoj/interface/web/config.html index ceb09ff1..50858f26 100644 --- a/src/khoj/interface/web/config.html +++ b/src/khoj/interface/web/config.html @@ -276,8 +276,10 @@

Automations

- Automations -

Automations

+ Automations +

+ Automations +

Manage your automations

@@ -296,6 +298,7 @@
@@ -607,13 +610,18 @@ function copyAPIKey(token) { // Copy API key to clipboard navigator.clipboard.writeText(token); - // Flash the API key copied message - const copyApiKeyButton = document.getElementById(`api-key-${token}`); - original_html = copyApiKeyButton.innerHTML + // Flash the API key copied icon + const apiKeyColumn = document.getElementById(`api-key-${token}`); + const original_html = apiKeyColumn.innerHTML; + const copyApiKeyButton = document.getElementById(`api-key-copy-${token}`); setTimeout(function() { - copyApiKeyButton.innerHTML = "✅ Copied!"; + copyApiKeyButton.src = "/static/assets/icons/copy-button-success.svg"; + setTimeout(() => { + copyApiKeyButton.src = "/static/assets/icons/copy-button.svg"; + }, 1000); + apiKeyColumn.innerHTML = "✅ Copied!"; setTimeout(function() { - copyApiKeyButton.innerHTML = original_html; + apiKeyColumn.innerHTML = original_html; }, 1000); }, 100); } @@ -641,8 +649,8 @@ ${tokenName} ${truncatedToken} - Copy API Key - Delete API Key + Copy API Key + Delete API Key `; @@ -684,7 +692,7 @@ ${automationObj.query_to_run} ${automationObj.schedule} - Delete Automation + Delete Automation `; diff --git a/src/khoj/routers/api_chat.py b/src/khoj/routers/api_chat.py index e4b1ad2c..882937b9 100644 --- a/src/khoj/routers/api_chat.py +++ b/src/khoj/routers/api_chat.py @@ -407,8 +407,10 @@ async def websocket_endpoint( # Remove /automated_task prefix from inferred_query unprefixed_query_to_run = re.sub(r"^\/automated_task\s*", "", query_to_run) # Create the automation response + scheme = "http" if not websocket.url.is_secure else "https" + automation_icon_url = f"{scheme}://{websocket.url.netloc}/static/assets/icons/automation.svg" llm_response = f""" - ### 🕒 Automation + ### ![]({automation_icon_url}) Created Automation - Subject: **{subject}** - Query to Run: "{unprefixed_query_to_run}" - Schedule: `{schedule}` @@ -661,8 +663,10 @@ async def chat( # Remove /automated_task prefix from inferred_query unprefixed_query_to_run = re.sub(r"^\/automated_task\s*", "", query_to_run) # Create the Automation response + scheme = "http" if not request.url.is_secure else "https" + automation_icon_url = f"{scheme}://{request.url.netloc}/static/assets/icons/automation.svg" llm_response = f""" - ### 🕒 Automation + ### ![]({automation_icon_url}) Created Automation - Subject: **{subject}** - Query to Run: "{unprefixed_query_to_run}" - Schedule: `{schedule}` From 1238cadd31ee313227a1e909b6f5afefe1864e4e Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Tue, 30 Apr 2024 02:40:02 +0530 Subject: [PATCH 27/42] Allow editting query-to-run from the automation config section --- src/khoj/interface/web/assets/icons/edit.svg | 4 +++ src/khoj/interface/web/config.html | 20 ++++++++++- src/khoj/routers/api.py | 37 ++++++++++++++++++++ 3 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 src/khoj/interface/web/assets/icons/edit.svg diff --git a/src/khoj/interface/web/assets/icons/edit.svg b/src/khoj/interface/web/assets/icons/edit.svg new file mode 100644 index 00000000..9dd66854 --- /dev/null +++ b/src/khoj/interface/web/assets/icons/edit.svg @@ -0,0 +1,4 @@ + + + + diff --git a/src/khoj/interface/web/config.html b/src/khoj/interface/web/config.html index 50858f26..37a68e57 100644 --- a/src/khoj/interface/web/config.html +++ b/src/khoj/interface/web/config.html @@ -689,10 +689,11 @@ ${automationObj.subject} ${automationObj.scheduling_request} - ${automationObj.query_to_run} + ${automationObj.query_to_run} ${automationObj.schedule} Delete Automation + Edit Automation `; @@ -730,6 +731,23 @@ } document.getElementById("create-automation").addEventListener("click", async () => { await createAutomation(); }); + function editAutomation(automationId) { + const query_to_run = window.prompt("What is the query you want to run on this automation's schedule?"); + if (!query_to_run) return; + + fetch(`/api/automation?automation_id=${automationId}&query_to_run=${query_to_run}`, { + method: 'PATCH', + headers: { + 'Content-Type': 'application/json', + }, + }).then(response => { + if (response.ok) { + const automationQueryToRunColumn = document.getElementById(`automation-query-to-run-${automationId}`); + automationQueryToRunColumn.innerHTML = `${query_to_run}`; + } + }); + } + function getIndexedDataSize() { document.getElementById("indexed-data-size").innerHTML = "Calculating..."; fetch('/api/config/index/size') diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py index 663dc99a..7f1d82e4 100644 --- a/src/khoj/routers/api.py +++ b/src/khoj/routers/api.py @@ -487,3 +487,40 @@ async def make_automation( } # Return information about the created automation as a JSON response return Response(content=json.dumps(automation_info), media_type="application/json", status_code=200) + + +@api.patch("/automation", response_class=Response) +@requires(["authenticated"]) +def edit_job( + request: Request, automation_id: str, query_to_run: Optional[str] = None, crontime: Optional[str] = None +) -> Response: + user: KhojUser = request.user.object + + # Perform validation checks + # Check at least one of query or crontime is provided + if not query_to_run and not crontime: + return Response(content="A query or crontime is required", status_code=400) + # Check if user is allowed to edit this automation id + if not automation_id.startswith(f"automation_{user.uuid}_"): + return Response(content="Unauthorized automation deletion request", status_code=403) + # Check if automation with this id exist + automation: Job = state.scheduler.get_job(job_id=automation_id) + if not automation: + return Response(content="Invalid automation", status_code=403) + if not query_to_run.startswith("/automated_task"): + query_to_run = f"/automated_task {query_to_run}" + + # Update automation with new query + automation_metadata = json.loads(automation.name) + automation_metadata["query_to_run"] = query_to_run + automation.modify(kwargs={"query_to_run": query_to_run}, name=json.dumps(automation_metadata)) + + # Collate info about the modified user automation + automation_info = { + "id": automation.id, + "name": automation.name, + "next": automation.next_run_time.strftime("%Y-%m-%d %H:%MS"), + } + + # Return modified automation information as a JSON response + return Response(content=json.dumps(automation_info), media_type="application/json", status_code=200) From 6936875a82e3ac5912f06bbd1b10ae83b7aeec6e Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Tue, 30 Apr 2024 04:00:48 +0530 Subject: [PATCH 28/42] Use DB adapter to unify logic to get, delete automation by auth user To use place with logic to get, view, delete (and edit soon) automations by (authenticated) user, instead of scattered across code --- src/khoj/database/adapters/__init__.py | 60 +++++++++++++++++++++++++- src/khoj/routers/api.py | 59 ++++++------------------- 2 files changed, 73 insertions(+), 46 deletions(-) diff --git a/src/khoj/database/adapters/__init__.py b/src/khoj/database/adapters/__init__.py index d4175704..f9f2193a 100644 --- a/src/khoj/database/adapters/__init__.py +++ b/src/khoj/database/adapters/__init__.py @@ -1,12 +1,16 @@ +import json import logging import math import random +import re import secrets import sys from datetime import date, datetime, timedelta, timezone from enum import Enum -from typing import Callable, List, Optional, Type +from typing import Callable, Iterable, List, Optional, Type +import cron_descriptor +from apscheduler.job import Job from asgiref.sync import sync_to_async from django.contrib.sessions.backends.db import SessionStore from django.db import models @@ -908,3 +912,57 @@ class EntryAdapters: @staticmethod def get_unique_file_sources(user: KhojUser): return Entry.objects.filter(user=user).values_list("file_source", flat=True).distinct().all() + + +class AutomationAdapters: + @staticmethod + def get_automations(user: KhojUser) -> Iterable[Job]: + all_automations: Iterable[Job] = state.scheduler.get_jobs() + for automation in all_automations: + if automation.id.startswith(f"automation_{user.uuid}_"): + yield automation + + @staticmethod + def get_automations_metadata(user: KhojUser): + for automation in AutomationAdapters.get_automations(user): + automation_metadata = json.loads(automation.name) + crontime = automation_metadata["crontime"] + timezone = automation.next_run_time.strftime("%Z") + schedule = f"{cron_descriptor.get_description(crontime)} {timezone}" + yield { + "id": automation.id, + "subject": automation_metadata["subject"], + "query_to_run": re.sub(r"^/automated_task\s*", "", automation_metadata["query_to_run"]), + "scheduling_request": automation_metadata["scheduling_request"], + "schedule": schedule, + "next": automation.next_run_time.strftime("%Y-%m-%d %I:%M %p %Z"), + } + + @staticmethod + def get_automation(user: KhojUser, automation_id: str) -> Job: + # Perform validation checks + # Check if user is allowed to delete this automation id + if not automation_id.startswith(f"automation_{user.uuid}_"): + raise ValueError("Invalid automation id") + # Check if automation with this id exist + automation: Job = state.scheduler.get_job(job_id=automation_id) + if not automation: + raise ValueError("Invalid automation id") + + return automation + + @staticmethod + def delete_automation(user: KhojUser, automation_id: str): + # Get valid, user-owned automation + automation: Job = AutomationAdapters.get_automation(user, automation_id) + + # Collate info about user automation to be deleted + automation_metadata = json.loads(automation.name) + automation_info = { + "id": automation.id, + "name": automation_metadata["query_to_run"], + "next": automation.next_run_time.strftime("%Y-%m-%d %I:%M %p %Z"), + } + + automation.remove() + return automation_info diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py index 7f1d82e4..0c22e5fe 100644 --- a/src/khoj/routers/api.py +++ b/src/khoj/routers/api.py @@ -3,7 +3,6 @@ import json import logging import math import os -import re import time import uuid from typing import Any, Callable, List, Optional, Union @@ -18,6 +17,7 @@ from starlette.authentication import has_required_scope, requires from khoj.configure import initialize_content from khoj.database.adapters import ( + AutomationAdapters, ConversationAdapters, EntryAdapters, get_user_photo, @@ -39,7 +39,7 @@ from khoj.search_filter.date_filter import DateFilter from khoj.search_filter.file_filter import FileFilter from khoj.search_filter.word_filter import WordFilter from khoj.search_type import text_search -from khoj.utils import constants, state +from khoj.utils import state from khoj.utils.config import OfflineChatProcessorModel from khoj.utils.helpers import ConversationCommand, timer from khoj.utils.rawconfig import LocationData, SearchResponse @@ -396,26 +396,9 @@ def user_info(request: Request) -> Response: @requires(["authenticated"]) def get_automations(request: Request) -> Response: user: KhojUser = request.user.object - automations: list[Job] = state.scheduler.get_jobs() # Collate all automations created by user that are still active - automations_info = [] - for automation in automations: - if automation.id.startswith(f"automation_{user.uuid}_"): - automation_metadata = json.loads(automation.name) - crontime = automation_metadata["crontime"] - timezone = automation.next_run_time.strftime("%Z") - schedule = f"{cron_descriptor.get_description(crontime)} {timezone}" - automations_info.append( - { - "id": automation.id, - "subject": automation_metadata["subject"], - "query_to_run": re.sub(r"^/automated_task\s*", "", automation_metadata["query_to_run"]), - "scheduling_request": automation_metadata["scheduling_request"], - "schedule": schedule, - "next": automation.next_run_time.strftime("%Y-%m-%d %I:%M %p %Z"), - } - ) + automations_info = [automation_info for automation_info in AutomationAdapters.get_automations_metadata(user)] # Return tasks information as a JSON response return Response(content=json.dumps(automations_info), media_type="application/json", status_code=200) @@ -426,25 +409,10 @@ def get_automations(request: Request) -> Response: def delete_automation(request: Request, automation_id: str) -> Response: user: KhojUser = request.user.object - # Perform validation checks - # Check if user is allowed to delete this automation id - if not automation_id.startswith(f"automation_{user.uuid}_"): - return Response(content="Unauthorized job deletion request", status_code=403) - # Check if automation with this id exist - automation: Job = state.scheduler.get_job(job_id=automation_id) - if not automation: - return Response(content="Invalid job", status_code=403) - - # Collate info about user task to be deleted - automation_metadata = json.loads(automation.name) - automation_info = { - "id": automation.id, - "name": automation_metadata["query_to_run"], - "next": automation.next_run_time.strftime("%Y-%m-%d %I:%M %p %Z"), - } - - # Delete job - automation.remove() + try: + automation_info = AutomationAdapters.delete_automation(user, automation_id) + except ValueError as e: + return Response(content="Could not find automation", status_code=403) # Return deleted automation information as a JSON response return Response(content=json.dumps(automation_info), media_type="application/json", status_code=200) @@ -500,13 +468,14 @@ def edit_job( # Check at least one of query or crontime is provided if not query_to_run and not crontime: return Response(content="A query or crontime is required", status_code=400) - # Check if user is allowed to edit this automation id - if not automation_id.startswith(f"automation_{user.uuid}_"): - return Response(content="Unauthorized automation deletion request", status_code=403) - # Check if automation with this id exist - automation: Job = state.scheduler.get_job(job_id=automation_id) - if not automation: + + # Check, get automation to edit + try: + automation: Job = AutomationAdapters.get_automation(user, automation_id) + except ValueError as e: return Response(content="Invalid automation", status_code=403) + + # Add /automated_task prefix to query if not present if not query_to_run.startswith("/automated_task"): query_to_run = f"/automated_task {query_to_run}" From ca8a7d8368edbb6f20bfa8f72b45db381223b6c8 Mon Sep 17 00:00:00 2001 From: sabaimran Date: Tue, 30 Apr 2024 13:02:13 +0530 Subject: [PATCH 29/42] Revert sync -> aync in send welcome email method --- src/khoj/routers/email.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/khoj/routers/email.py b/src/khoj/routers/email.py index cb0c39c7..1569eed5 100644 --- a/src/khoj/routers/email.py +++ b/src/khoj/routers/email.py @@ -31,7 +31,7 @@ def is_resend_enabled(): return bool(RESEND_API_KEY) -def send_welcome_email(name, email): +async def send_welcome_email(name, email): if not is_resend_enabled(): logger.debug("Email sending disabled") return From 06213ea814a556d8a0e9500a0cfa550b111bc872 Mon Sep 17 00:00:00 2001 From: sabaimran Date: Tue, 30 Apr 2024 17:41:17 +0530 Subject: [PATCH 30/42] Fix token retrieval when executing the job and name async job approriately --- src/khoj/database/adapters/__init__.py | 9 ++++++++- src/khoj/routers/auth.py | 6 +++--- src/khoj/routers/helpers.py | 4 ++-- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/khoj/database/adapters/__init__.py b/src/khoj/database/adapters/__init__.py index f9f2193a..ee72a76a 100644 --- a/src/khoj/database/adapters/__init__.py +++ b/src/khoj/database/adapters/__init__.py @@ -72,7 +72,14 @@ async def set_notion_config(token: str, user: KhojUser): return notion_config -async def create_khoj_token(user: KhojUser, name=None): +def create_khoj_token(user: KhojUser, name=None): + "Create Khoj API key for user" + token = f"kk-{secrets.token_urlsafe(32)}" + name = name or f"{generate_random_name().title()}" + return KhojApiUser.objects.create(token=token, user=user, name=name) + + +async def acreate_khoj_token(user: KhojUser, name=None): "Create Khoj API key for user" token = f"kk-{secrets.token_urlsafe(32)}" name = name or f"{generate_random_name().title()}" diff --git a/src/khoj/routers/auth.py b/src/khoj/routers/auth.py index 199ccd2b..6e0e30c1 100644 --- a/src/khoj/routers/auth.py +++ b/src/khoj/routers/auth.py @@ -12,7 +12,7 @@ from starlette.responses import HTMLResponse, RedirectResponse, Response from starlette.status import HTTP_302_FOUND from khoj.database.adapters import ( - create_khoj_token, + acreate_khoj_token, delete_khoj_token, get_khoj_tokens, get_or_create_user, @@ -67,9 +67,9 @@ async def login(request: Request): async def generate_token(request: Request, token_name: Optional[str] = None): "Generate API token for given user" if token_name: - token = await create_khoj_token(user=request.user.object, name=token_name) + token = await acreate_khoj_token(user=request.user.object, name=token_name) else: - token = await create_khoj_token(user=request.user.object) + token = await acreate_khoj_token(user=request.user.object) return { "token": token.token, "name": token.name, diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index 29cf95e6..2840a5cb 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -886,9 +886,9 @@ def scheduled_chat(query_to_run: str, scheduling_request: str, subject: str, use # Add authorization request header in non-anonymous mode token = get_khoj_tokens(user) if is_none_or_empty(token): - token = create_khoj_token(user) + token = create_khoj_token(user).token else: - token = token[0] + token = token[0].token headers["Authorization"] = f"Bearer {token}" # Call the chat API endpoint with authenticated user token and query From eb65532386d2d18b74f9983ad8e5d389af9c8e23 Mon Sep 17 00:00:00 2001 From: sabaimran Date: Tue, 30 Apr 2024 17:43:27 +0530 Subject: [PATCH 31/42] Use Django ap scheduler in place of the sqlalchemy one --- pyproject.toml | 1 + src/khoj/app/settings.py | 18 ++++++++++++++++++ src/khoj/main.py | 8 ++------ src/khoj/routers/helpers.py | 3 ++- 4 files changed, 23 insertions(+), 7 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 4c32a1d2..06b2da55 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -83,6 +83,7 @@ dependencies = [ "apscheduler ~= 3.10.0", "pytz ~= 2024.1", "cron-descriptor == 1.4.3", + "django_apscheduler == 0.6.2", ] dynamic = ["version"] diff --git a/src/khoj/app/settings.py b/src/khoj/app/settings.py index 27be968e..2672f98d 100644 --- a/src/khoj/app/settings.py +++ b/src/khoj/app/settings.py @@ -77,6 +77,7 @@ INSTALLED_APPS = [ "django.contrib.messages", "django.contrib.staticfiles", "phonenumber_field", + "django_apscheduler", ] MIDDLEWARE = [ @@ -169,3 +170,20 @@ STATIC_URL = "/static/" # https://docs.djangoproject.com/en/4.2/ref/settings/#default-auto-field DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField" + + +# Format string for displaying run time timestamps in the Django admin site. The default +# just adds seconds to the standard Django format, which is useful for displaying the timestamps +# for jobs that are scheduled to run on intervals of less than one minute. +# +# See https://docs.djangoproject.com/en/dev/ref/settings/#datetime-format for format string +# syntax details. +APSCHEDULER_DATETIME_FORMAT = "N j, Y, f:s a" + +# Maximum run time allowed for jobs that are triggered manually via the Django admin site, which +# prevents admin site HTTP requests from timing out. +# +# Longer running jobs should probably be handed over to a background task processing library +# that supports multiple background worker processes instead (e.g. Dramatiq, Celery, Django-RQ, +# etc. See: https://djangopackages.org/grids/g/workers-queues-tasks/ for popular options). +APSCHEDULER_RUN_NOW_TIMEOUT = 240 # Seconds diff --git a/src/khoj/main.py b/src/khoj/main.py index 6ce30c7a..4a9593af 100644 --- a/src/khoj/main.py +++ b/src/khoj/main.py @@ -128,20 +128,16 @@ def run(should_start_server=True): poll_task_scheduler() # Setup Background Scheduler - from django.conf import settings as django_settings + from django_apscheduler.jobstores import DjangoJobStore - django_db = django_settings.DATABASES["default"] state.scheduler = BackgroundScheduler( { - "apscheduler.jobstores.default": { - "type": "sqlalchemy", - "url": f'postgresql://{django_db["USER"]}:{django_db["PASSWORD"]}@{django_db["HOST"]}:{django_db["PORT"]}/{django_db["NAME"]}', - }, "apscheduler.timezone": "UTC", "apscheduler.job_defaults.misfire_grace_time": "60", # Useful to run scheduled jobs even when worker delayed because it was busy or down "apscheduler.job_defaults.coalesce": "true", # Combine multiple jobs into one if they are scheduled at the same time } ) + state.scheduler.add_jobstore(DjangoJobStore(), "default") state.scheduler.start() # Start Server diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index 2840a5cb..60fda057 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -25,6 +25,7 @@ import openai import pytz import requests from apscheduler.triggers.cron import CronTrigger +from asgiref.sync import sync_to_async from fastapi import Depends, Header, HTTPException, Request, UploadFile from PIL import Image from starlette.authentication import has_required_scope @@ -927,7 +928,7 @@ async def create_automation( ) query_id = hashlib.md5(f"{query_to_run}".encode("utf-8")).hexdigest() job_id = f"automation_{user.uuid}_{crontime_string}_{query_id}" - job = state.scheduler.add_job( + job = await sync_to_async(state.scheduler.add_job)( run_with_process_lock, trigger=trigger, args=( From 311d58e1ed432a0d5ed4dfefc484ef77bfe6eb6f Mon Sep 17 00:00:00 2001 From: sabaimran Date: Tue, 30 Apr 2024 20:07:33 +0530 Subject: [PATCH 32/42] Ensure the automated_task command is removed from the prepended query --- src/khoj/routers/api_chat.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/khoj/routers/api_chat.py b/src/khoj/routers/api_chat.py index 882937b9..ae3f7cd9 100644 --- a/src/khoj/routers/api_chat.py +++ b/src/khoj/routers/api_chat.py @@ -377,7 +377,7 @@ async def websocket_endpoint( is_automated_task = conversation_commands == [ConversationCommand.AutomatedTask] if conversation_commands == [ConversationCommand.Default] or is_automated_task: - conversation_commands = await aget_relevant_information_sources(q, meta_log) + conversation_commands = await aget_relevant_information_sources(q, meta_log, is_automated_task) conversation_commands_str = ", ".join([cmd.value for cmd in conversation_commands]) await send_status_update(f"**🗃️ Chose Data Sources to Search:** {conversation_commands_str}") @@ -626,11 +626,11 @@ async def chat( else: meta_log = conversation.conversation_log - is_task = conversation_commands == [ConversationCommand.AutomatedTask] + is_automated_task = conversation_commands == [ConversationCommand.AutomatedTask] - if conversation_commands == [ConversationCommand.Default] or is_task: - conversation_commands = await aget_relevant_information_sources(q, meta_log) - mode = await aget_relevant_output_modes(q, meta_log, is_task) + if conversation_commands == [ConversationCommand.Default] or is_automated_task: + conversation_commands = await aget_relevant_information_sources(q, meta_log, is_automated_task) + mode = await aget_relevant_output_modes(q, meta_log, is_automated_task) if mode not in conversation_commands: conversation_commands.append(mode) From ad4145e48c63f40a305efbc862c704a141705f2b Mon Sep 17 00:00:00 2001 From: sabaimran Date: Tue, 30 Apr 2024 20:08:05 +0530 Subject: [PATCH 33/42] Fix unique has for job id --- src/khoj/routers/helpers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index 60fda057..53808c21 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -185,7 +185,7 @@ async def agenerate_chat_response(*args): return await loop.run_in_executor(executor, generate_chat_response, *args) -async def aget_relevant_information_sources(query: str, conversation_history: dict): +async def aget_relevant_information_sources(query: str, conversation_history: dict, is_task: bool): """ Given a query, determine which of the available tools the agent should use in order to answer appropriately. """ @@ -216,7 +216,7 @@ async def aget_relevant_information_sources(query: str, conversation_history: di logger.error(f"Invalid response for determining relevant tools: {response}") return tool_options - final_response = [] + final_response = [] if not is_task else [ConversationCommand.AutomatedTask] for llm_suggested_tool in response: if llm_suggested_tool in tool_options.keys(): # Check whether the tool exists as a valid ConversationCommand @@ -926,7 +926,7 @@ async def create_automation( job_metadata = json.dumps( {"query_to_run": query_to_run, "scheduling_request": q, "subject": subject, "crontime": crontime_string} ) - query_id = hashlib.md5(f"{query_to_run}".encode("utf-8")).hexdigest() + query_id = hashlib.md5(f"{query_to_run}{crontime_string}".encode("utf-8")).hexdigest() job_id = f"automation_{user.uuid}_{crontime_string}_{query_id}" job = await sync_to_async(state.scheduler.add_job)( run_with_process_lock, From c52ed333fa3aa7cf7fe9bc8b8f1f962316dc770b Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Tue, 30 Apr 2024 18:08:20 +0530 Subject: [PATCH 34/42] Make content, cards on config pages occupy the whole middle column - Make the config page content use the same top level 3-column layout as the khoj-header-wrapper This ensures the content is aligned with heading pane width - Let cards and other settings sections scale to the width of their grid element. This utilizes more of the screen space and does it consistently across the different settings pages --- src/khoj/interface/web/base_config.html | 10 ++++++---- src/khoj/interface/web/config.html | 1 - 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/khoj/interface/web/base_config.html b/src/khoj/interface/web/base_config.html index ccd301c7..5636eca1 100644 --- a/src/khoj/interface/web/base_config.html +++ b/src/khoj/interface/web/base_config.html @@ -24,9 +24,11 @@
-
+
+
{% block content %} {% endblock %} +
+ + +{% endblock %} diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py index 0c22e5fe..d3a0ef1d 100644 --- a/src/khoj/routers/api.py +++ b/src/khoj/routers/api.py @@ -8,7 +8,9 @@ import uuid from typing import Any, Callable, List, Optional, Union import cron_descriptor +import pytz from apscheduler.job import Job +from apscheduler.triggers.cron import CronTrigger from asgiref.sync import sync_to_async from fastapi import APIRouter, Depends, File, HTTPException, Request, UploadFile from fastapi.requests import Request @@ -33,6 +35,7 @@ from khoj.routers.helpers import ( CommonQueryParams, ConversationCommandRateLimiter, create_automation, + schedule_automation, update_telemetry_state, ) from khoj.search_filter.date_filter import DateFilter @@ -41,7 +44,7 @@ from khoj.search_filter.word_filter import WordFilter from khoj.search_type import text_search from khoj.utils import state from khoj.utils.config import OfflineChatProcessorModel -from khoj.utils.helpers import ConversationCommand, timer +from khoj.utils.helpers import ConversationCommand, is_none_or_empty, timer from khoj.utils.rawconfig import LocationData, SearchResponse from khoj.utils.state import SearchType @@ -411,8 +414,8 @@ def delete_automation(request: Request, automation_id: str) -> Response: try: automation_info = AutomationAdapters.delete_automation(user, automation_id) - except ValueError as e: - return Response(content="Could not find automation", status_code=403) + except ValueError: + return Response(status_code=204) # Return deleted automation information as a JSON response return Response(content=json.dumps(automation_info), media_type="application/json", status_code=200) @@ -420,21 +423,44 @@ def delete_automation(request: Request, automation_id: str) -> Response: @api.post("/automation", response_class=Response) @requires(["authenticated"]) -async def make_automation( +async def post_automation( request: Request, q: str, + subject: str, + crontime: str, city: Optional[str] = None, region: Optional[str] = None, country: Optional[str] = None, timezone: Optional[str] = None, ) -> Response: user: KhojUser = request.user.object - if city or region or country: - location = LocationData(city=city, region=region, country=country) - # Create automation with scheduling query and location data + # Perform validation checks + if is_none_or_empty(q) or is_none_or_empty(subject) or is_none_or_empty(crontime): + return Response(content="A query, subject and crontime is required", status_code=400) + if not cron_descriptor.get_description(crontime): + return Response(content="Invalid crontime", status_code=400) + + # Normalize query parameters + # Add /automated_task prefix to query if not present + q = q.strip() + if not q.startswith("/automated_task"): + query_to_run = f"/automated_task {q}" + # Normalize crontime for AP Scheduler CronTrigger + crontime = crontime.strip() + if len(crontime.split(" ")) > 5: + # Truncate crontime to 5 fields + crontime = " ".join(crontime.split(" ")[:5]) + # Convert crontime to standard unix crontime + crontime = crontime.replace("?", "*") + subject = subject.strip() + + # Schedule automation with query_to_run, timezone, subject directly provided by user try: - automation, crontime, query_to_run, subject = await create_automation(q, location, timezone, user, request.url) + # Get user timezone + user_timezone = pytz.timezone(timezone) + # Use the query to run as the scheduling request if the scheduling request is unset + automation = await schedule_automation(query_to_run, subject, crontime, user_timezone, q, user, request.url) except Exception as e: logger.error(f"Error creating automation {q} for {user.email}: {e}") return Response( @@ -449,25 +475,36 @@ async def make_automation( "id": automation.id, "subject": subject, "query_to_run": query_to_run, - "scheduling_request": crontime, + "scheduling_request": query_to_run, "schedule": schedule, + "crontime": crontime, "next": automation.next_run_time.strftime("%Y-%m-%d %I:%M %p %Z"), } + # Return information about the created automation as a JSON response return Response(content=json.dumps(automation_info), media_type="application/json", status_code=200) -@api.patch("/automation", response_class=Response) +@api.put("/automation", response_class=Response) @requires(["authenticated"]) def edit_job( - request: Request, automation_id: str, query_to_run: Optional[str] = None, crontime: Optional[str] = None + request: Request, + automation_id: str, + q: Optional[str], + subject: Optional[str], + crontime: Optional[str], + city: Optional[str] = None, + region: Optional[str] = None, + country: Optional[str] = None, + timezone: Optional[str] = None, ) -> Response: user: KhojUser = request.user.object # Perform validation checks - # Check at least one of query or crontime is provided - if not query_to_run and not crontime: - return Response(content="A query or crontime is required", status_code=400) + if is_none_or_empty(q) or is_none_or_empty(subject) or is_none_or_empty(crontime): + return Response(content="A query, subject and crontime is required", status_code=400) + if not cron_descriptor.get_description(crontime): + return Response(content="Invalid crontime", status_code=400) # Check, get automation to edit try: @@ -475,14 +512,31 @@ def edit_job( except ValueError as e: return Response(content="Invalid automation", status_code=403) + # Normalize query parameters # Add /automated_task prefix to query if not present - if not query_to_run.startswith("/automated_task"): - query_to_run = f"/automated_task {query_to_run}" + q = q.strip() + if not q.startswith("/automated_task"): + query_to_run = f"/automated_task {q}" + # Normalize crontime for AP Scheduler CronTrigger + crontime = crontime.strip() + if len(crontime.split(" ")) > 5: + # Truncate crontime to 5 fields + crontime = " ".join(crontime.split(" ")[:5]) + # Convert crontime to standard unix crontime + crontime = crontime.replace("?", "*") - # Update automation with new query + # Construct updated automation metadata automation_metadata = json.loads(automation.name) automation_metadata["query_to_run"] = query_to_run - automation.modify(kwargs={"query_to_run": query_to_run}, name=json.dumps(automation_metadata)) + automation_metadata["subject"] = subject.strip() + + # Modify automation with updated query, subject, crontime + automation.modify(kwargs={"query_to_run": query_to_run, "subject": subject}, name=json.dumps(automation_metadata)) + + # Reschedule automation if crontime updated + trigger = CronTrigger.from_crontab(crontime) + if automation.trigger != trigger: + automation.reschedule(trigger=trigger) # Collate info about the modified user automation automation_info = { diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index fda45469..7a8d869c 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -922,14 +922,32 @@ async def create_automation( q: str, location: LocationData, timezone: str, user: KhojUser, calling_url: URL, meta_log: dict = {} ): user_timezone = pytz.timezone(timezone) - crontime_string, query_to_run, subject = await schedule_query(q, location, meta_log) - trigger = CronTrigger.from_crontab(crontime_string, user_timezone) + crontime, query_to_run, subject = await schedule_query(q, location, meta_log) + job = await schedule_automation(query_to_run, subject, crontime, user_timezone, q, user, calling_url) + return job, crontime, query_to_run, subject + + +async def schedule_automation( + query_to_run: str, + subject: str, + crontime: str, + user_timezone, + scheduling_request: str, + user: KhojUser, + calling_url: URL, +): + trigger = CronTrigger.from_crontab(crontime, user_timezone) # Generate id and metadata used by task scheduler and process locks for the task runs job_metadata = json.dumps( - {"query_to_run": query_to_run, "scheduling_request": q, "subject": subject, "crontime": crontime_string} + { + "query_to_run": query_to_run, + "scheduling_request": scheduling_request, + "subject": subject, + "crontime": crontime, + } ) - query_id = hashlib.md5(f"{query_to_run}{crontime_string}".encode("utf-8")).hexdigest() - job_id = f"automation_{user.uuid}_{crontime_string}_{query_id}" + query_id = hashlib.md5(f"{query_to_run}_{crontime}".encode("utf-8")).hexdigest() + job_id = f"automation_{user.uuid}_{query_id}" job = await sync_to_async(state.scheduler.add_job)( run_with_process_lock, trigger=trigger, @@ -939,7 +957,7 @@ async def create_automation( ), kwargs={ "query_to_run": query_to_run, - "scheduling_request": q, + "scheduling_request": scheduling_request, "subject": subject, "user": user, "calling_url": calling_url, @@ -949,7 +967,7 @@ async def create_automation( max_instances=2, # Allow second instance to kill any previous instance with stale lock jitter=30, ) - return job, crontime_string, query_to_run, subject + return job def construct_automation_created_message(automation: Job, crontime: str, query_to_run: str, subject: str, url: URL): @@ -968,5 +986,5 @@ def construct_automation_created_message(automation: Job, crontime: str, query_t - Schedule: `{schedule}` - Next Run At: {next_run_time} -Manage your tasks [here](/config#automations). +Manage your automations [here](/automations). """.strip() diff --git a/src/khoj/routers/web_client.py b/src/khoj/routers/web_client.py index 9e3a39b5..047273e9 100644 --- a/src/khoj/routers/web_client.py +++ b/src/khoj/routers/web_client.py @@ -11,6 +11,7 @@ from starlette.authentication import has_required_scope, requires from khoj.database import adapters from khoj.database.adapters import ( AgentAdapters, + AutomationAdapters, ConversationAdapters, EntryAdapters, get_user_github_config, @@ -364,3 +365,23 @@ def computer_config_page(request: Request): "khoj_version": state.khoj_version, }, ) + + +@web_client.get("/automations", response_class=HTMLResponse) +@requires(["authenticated"], redirect="login_page") +def automations_config_page(request: Request): + user = request.user.object + user_picture = request.session.get("user", {}).get("picture") + has_documents = EntryAdapters.user_has_entries(user=user) + + return templates.TemplateResponse( + "config_automation.html", + context={ + "request": request, + "username": user.username, + "user_photo": user_picture, + "is_active": has_required_scope(request, ["premium"]), + "has_documents": has_documents, + "khoj_version": state.khoj_version, + }, + ) From 21bdf45d6f71c9629135fa54750b663241e9016b Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Wed, 1 May 2024 03:16:38 +0530 Subject: [PATCH 37/42] Add link to Automate page in nav pane of the web app --- src/khoj/interface/web/assets/khoj.css | 4 ++-- src/khoj/interface/web/utils.html | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/khoj/interface/web/assets/khoj.css b/src/khoj/interface/web/assets/khoj.css index 3c3536a7..1c57fbce 100644 --- a/src/khoj/interface/web/assets/khoj.css +++ b/src/khoj/interface/web/assets/khoj.css @@ -199,7 +199,7 @@ img.khoj-logo { border: 3px solid var(--primary-hover); } -@media screen and (max-width: 700px) { +@media screen and (max-width: 1000px) { .khoj-nav-dropdown-content { display: block; grid-auto-flow: row; @@ -215,7 +215,7 @@ img.khoj-logo { } } -@media only screen and (max-width: 700px) { +@media only screen and (max-width: 1000px) { div.khoj-header { display: grid; grid-auto-flow: column; diff --git a/src/khoj/interface/web/utils.html b/src/khoj/interface/web/utils.html index f9372482..b2d719cb 100644 --- a/src/khoj/interface/web/utils.html +++ b/src/khoj/interface/web/utils.html @@ -10,6 +10,9 @@ Agents Agents + + Automation + Automate {% if has_documents %} Search From 815966cb2500231c8e114f2cd92aad420883aeeb Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Wed, 1 May 2024 04:39:45 +0530 Subject: [PATCH 38/42] Unify, modularize DB adapters to get automation metadata by user further --- src/khoj/database/adapters/__init__.py | 43 ++++++++++++++------------ src/khoj/routers/api.py | 20 +++--------- 2 files changed, 28 insertions(+), 35 deletions(-) diff --git a/src/khoj/database/adapters/__init__.py b/src/khoj/database/adapters/__init__.py index ee72a76a..048df839 100644 --- a/src/khoj/database/adapters/__init__.py +++ b/src/khoj/database/adapters/__init__.py @@ -929,21 +929,31 @@ class AutomationAdapters: if automation.id.startswith(f"automation_{user.uuid}_"): yield automation + @staticmethod + def get_automation_metadata(user: KhojUser, automation: Job): + # Perform validation checks + # Check if user is allowed to delete this automation id + if not automation.id.startswith(f"automation_{user.uuid}_"): + raise ValueError("Invalid automation id") + + automation_metadata = json.loads(automation.name) + crontime = automation_metadata["crontime"] + timezone = automation.next_run_time.strftime("%Z") + schedule = f"{cron_descriptor.get_description(crontime)} {timezone}" + return { + "id": automation.id, + "subject": automation_metadata["subject"], + "query_to_run": re.sub(r"^/automated_task\s*", "", automation_metadata["query_to_run"]), + "scheduling_request": automation_metadata["scheduling_request"], + "schedule": schedule, + "crontime": crontime, + "next": automation.next_run_time.strftime("%Y-%m-%d %I:%M %p %Z"), + } + @staticmethod def get_automations_metadata(user: KhojUser): for automation in AutomationAdapters.get_automations(user): - automation_metadata = json.loads(automation.name) - crontime = automation_metadata["crontime"] - timezone = automation.next_run_time.strftime("%Z") - schedule = f"{cron_descriptor.get_description(crontime)} {timezone}" - yield { - "id": automation.id, - "subject": automation_metadata["subject"], - "query_to_run": re.sub(r"^/automated_task\s*", "", automation_metadata["query_to_run"]), - "scheduling_request": automation_metadata["scheduling_request"], - "schedule": schedule, - "next": automation.next_run_time.strftime("%Y-%m-%d %I:%M %p %Z"), - } + yield AutomationAdapters.get_automation_metadata(user, automation) @staticmethod def get_automation(user: KhojUser, automation_id: str) -> Job: @@ -964,12 +974,7 @@ class AutomationAdapters: automation: Job = AutomationAdapters.get_automation(user, automation_id) # Collate info about user automation to be deleted - automation_metadata = json.loads(automation.name) - automation_info = { - "id": automation.id, - "name": automation_metadata["query_to_run"], - "next": automation.next_run_time.strftime("%Y-%m-%d %I:%M %p %Z"), - } + automation_metadata = AutomationAdapters.get_automation_metadata(user, automation) automation.remove() - return automation_info + return automation_metadata diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py index d3a0ef1d..b7c5d5ab 100644 --- a/src/khoj/routers/api.py +++ b/src/khoj/routers/api.py @@ -470,16 +470,7 @@ async def post_automation( ) # Collate info about the created user automation - schedule = f'{cron_descriptor.get_description(crontime)} {automation.next_run_time.strftime("%Z")}' - automation_info = { - "id": automation.id, - "subject": subject, - "query_to_run": query_to_run, - "scheduling_request": query_to_run, - "schedule": schedule, - "crontime": crontime, - "next": automation.next_run_time.strftime("%Y-%m-%d %I:%M %p %Z"), - } + automation_info = AutomationAdapters.get_automation_metadata(user, automation) # Return information about the created automation as a JSON response return Response(content=json.dumps(automation_info), media_type="application/json", status_code=200) @@ -538,12 +529,9 @@ def edit_job( if automation.trigger != trigger: automation.reschedule(trigger=trigger) - # Collate info about the modified user automation - automation_info = { - "id": automation.id, - "name": automation.name, - "next": automation.next_run_time.strftime("%Y-%m-%d %H:%MS"), - } + # Collate info about the updated user automation + automation = AutomationAdapters.get_automation(user, automation.id) + automation_info = AutomationAdapters.get_automation_metadata(user, automation) # Return modified automation information as a JSON response return Response(content=json.dumps(automation_info), media_type="application/json", status_code=200) From 8f28f6cc1ed3be7be22b15fcee144a919e99d709 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Wed, 1 May 2024 05:32:10 +0530 Subject: [PATCH 39/42] Remove now unused location data from being passed to automation funcs --- src/khoj/routers/api_chat.py | 4 ++-- src/khoj/routers/helpers.py | 8 +++----- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/khoj/routers/api_chat.py b/src/khoj/routers/api_chat.py index ce9d4c01..79a68969 100644 --- a/src/khoj/routers/api_chat.py +++ b/src/khoj/routers/api_chat.py @@ -392,7 +392,7 @@ async def websocket_endpoint( if ConversationCommand.Automation in conversation_commands: try: automation, crontime, query_to_run, subject = await create_automation( - q, location, timezone, user, websocket.url, meta_log + q, timezone, user, websocket.url, meta_log ) except Exception as e: logger.error(f"Error scheduling task {q} for {user.email}: {e}") @@ -633,7 +633,7 @@ async def chat( if ConversationCommand.Automation in conversation_commands: try: automation, crontime, query_to_run, subject = await create_automation( - q, location, timezone, user, request.url, meta_log + q, timezone, user, request.url, meta_log ) except Exception as e: logger.error(f"Error creating automation {q} for {user.email}: {e}") diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index 7a8d869c..559aaf38 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -336,7 +336,7 @@ async def generate_online_subqueries(q: str, conversation_history: dict, locatio return [q] -async def schedule_query(q: str, location_data: LocationData, conversation_history: dict) -> Tuple[str, ...]: +async def schedule_query(q: str, conversation_history: dict) -> Tuple[str, ...]: """ Schedule the date, time to run the query. Assume the server timezone is UTC. """ @@ -918,11 +918,9 @@ def scheduled_chat(query_to_run: str, scheduling_request: str, subject: str, use return raw_response -async def create_automation( - q: str, location: LocationData, timezone: str, user: KhojUser, calling_url: URL, meta_log: dict = {} -): +async def create_automation(q: str, timezone: str, user: KhojUser, calling_url: URL, meta_log: dict = {}): user_timezone = pytz.timezone(timezone) - crontime, query_to_run, subject = await schedule_query(q, location, meta_log) + crontime, query_to_run, subject = await schedule_query(q, meta_log) job = await schedule_automation(query_to_run, subject, crontime, user_timezone, q, user, calling_url) return job, crontime, query_to_run, subject From 70ee9ddf91522b4c5d04024879d69c568640a735 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Wed, 1 May 2024 08:32:07 +0530 Subject: [PATCH 40/42] Merge migrations from main with feature branch --- .../database/migrations/0039_merge_20240501_0301.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 src/khoj/database/migrations/0039_merge_20240501_0301.py diff --git a/src/khoj/database/migrations/0039_merge_20240501_0301.py b/src/khoj/database/migrations/0039_merge_20240501_0301.py new file mode 100644 index 00000000..c2bb1a87 --- /dev/null +++ b/src/khoj/database/migrations/0039_merge_20240501_0301.py @@ -0,0 +1,12 @@ +# Generated by Django 4.2.10 on 2024-05-01 03:01 + +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("database", "0038_merge_20240425_0857"), + ("database", "0038_merge_20240426_1640"), + ] + + operations: list = [] From 19c5af3ebc8e9c2bd25e71e78502f6df4d291916 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Wed, 1 May 2024 09:06:59 +0530 Subject: [PATCH 41/42] Handle natural language to cron translation error on web client --- src/khoj/interface/web/config_automation.html | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/khoj/interface/web/config_automation.html b/src/khoj/interface/web/config_automation.html index 89074796..1b067996 100644 --- a/src/khoj/interface/web/config_automation.html +++ b/src/khoj/interface/web/config_automation.html @@ -172,6 +172,17 @@ // Get cron string from natural language user schedule, if changed const crontime = scheduleEl.getAttribute('data-original') !== scheduleEl.value ? getCronString(scheduleEl.value) : scheduleEl.getAttribute('data-cron'); + if (crontime.startsWith("ERROR:")) { + notificationEl.textContent = `⚠️ Failed to automate. Fix or simplify Schedule input field.`; + notificationEl.style.display = "block"; + let originalScheduleElBorder = scheduleEl.style.border; + scheduleEl.style.border = "2px solid red"; + setTimeout(function() { + scheduleEl.style.border = originalScheduleElBorder; + }, 2000); + + return; + } const encodedCrontime = encodeURIComponent(crontime); // Construct query string and select method for API call From 89a8dbb81ab76fc2d0ec97b61c48db250403562d Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Wed, 1 May 2024 09:32:15 +0530 Subject: [PATCH 42/42] Fix edit job API. Use user timezone, pass all reqd. params to automation - Pass user and calling_url to the scheduled chat too when modifying params of automation - Update to use user timezone even when update job via API - Move timezone string to timezone object calculation into the schedule automation method --- src/khoj/routers/api.py | 23 ++++++++++++++++------- src/khoj/routers/helpers.py | 6 +++--- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py index b7c5d5ab..625238c1 100644 --- a/src/khoj/routers/api.py +++ b/src/khoj/routers/api.py @@ -34,7 +34,6 @@ from khoj.routers.helpers import ( ApiUserRateLimiter, CommonQueryParams, ConversationCommandRateLimiter, - create_automation, schedule_automation, update_telemetry_state, ) @@ -457,10 +456,8 @@ async def post_automation( # Schedule automation with query_to_run, timezone, subject directly provided by user try: - # Get user timezone - user_timezone = pytz.timezone(timezone) # Use the query to run as the scheduling request if the scheduling request is unset - automation = await schedule_automation(query_to_run, subject, crontime, user_timezone, q, user, request.url) + automation = await schedule_automation(query_to_run, subject, crontime, timezone, q, user, request.url) except Exception as e: logger.error(f"Error creating automation {q} for {user.email}: {e}") return Response( @@ -518,14 +515,26 @@ def edit_job( # Construct updated automation metadata automation_metadata = json.loads(automation.name) + automation_metadata["scheduling_request"] = q automation_metadata["query_to_run"] = query_to_run automation_metadata["subject"] = subject.strip() + automation_metadata["crontime"] = crontime - # Modify automation with updated query, subject, crontime - automation.modify(kwargs={"query_to_run": query_to_run, "subject": subject}, name=json.dumps(automation_metadata)) + # Modify automation with updated query, subject + automation.modify( + name=json.dumps(automation_metadata), + kwargs={ + "query_to_run": query_to_run, + "subject": subject, + "scheduling_request": q, + "user": user, + "calling_url": request.url, + }, + ) # Reschedule automation if crontime updated - trigger = CronTrigger.from_crontab(crontime) + user_timezone = pytz.timezone(timezone) + trigger = CronTrigger.from_crontab(crontime, user_timezone) if automation.trigger != trigger: automation.reschedule(trigger=trigger) diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index 559aaf38..0736e68f 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -919,9 +919,8 @@ def scheduled_chat(query_to_run: str, scheduling_request: str, subject: str, use async def create_automation(q: str, timezone: str, user: KhojUser, calling_url: URL, meta_log: dict = {}): - user_timezone = pytz.timezone(timezone) crontime, query_to_run, subject = await schedule_query(q, meta_log) - job = await schedule_automation(query_to_run, subject, crontime, user_timezone, q, user, calling_url) + job = await schedule_automation(query_to_run, subject, crontime, timezone, q, user, calling_url) return job, crontime, query_to_run, subject @@ -929,11 +928,12 @@ async def schedule_automation( query_to_run: str, subject: str, crontime: str, - user_timezone, + timezone: str, scheduling_request: str, user: KhojUser, calling_url: URL, ): + user_timezone = pytz.timezone(timezone) trigger = CronTrigger.from_crontab(crontime, user_timezone) # Generate id and metadata used by task scheduler and process locks for the task runs job_metadata = json.dumps(