Switch to let Khoj infer chat query based on user automation query

This tries to decouple the automation query from the chat query. So
the chat model doesn't have to know it is running in an automation
context or figure how to notify user or send automation response. It
just has to respond to the AI generated `query_to_run' corresponding
to the `scheduling_request` automation by the user.

For example, a `scheduling_request' of `notify me when X happens'
results in the automation calling the chat api with a `query_to_run`
like `tell me about X` and deciding if to notify based on information
gathered about X from the scheduled run. If these two are not
decoupled, the chat model may respond with how it can notify about X
instead of just asking about it.

Swap query_to_run with scheduling_request on the automation web page
This commit is contained in:
Debanjum
2024-12-20 16:11:25 -08:00
parent 3600a9a4f3
commit 6d219dcc1d
4 changed files with 71 additions and 52 deletions

View File

@@ -167,68 +167,68 @@ const timestamp = Date.now();
const suggestedAutomationsMetadata: AutomationsData[] = [ const suggestedAutomationsMetadata: AutomationsData[] = [
{ {
subject: "Weekly Newsletter", subject: "Weekly Newsletter",
query_to_run: scheduling_request:
"/research Compile a message including: 1. A recap of news from last week 2. An at-home workout I can do before work 3. A quote to inspire me for the week ahead", "/research Compile a message including: 1. A recap of news from last week 2. An at-home workout I can do before work 3. A quote to inspire me for the week ahead",
schedule: "9AM every Monday", schedule: "9AM every Monday",
next: "Next run at 9AM on Monday", next: "Next run at 9AM on Monday",
crontime: "0 9 * * 1", crontime: "0 9 * * 1",
id: timestamp, id: timestamp,
scheduling_request: "", query_to_run: "",
},
{
subject: "Daily Bedtime Story",
query_to_run:
"Compose a bedtime story that a five-year-old might enjoy. It should not exceed five paragraphs. Appeal to the imagination, but weave in learnings.",
schedule: "9PM every night",
next: "Next run at 9PM today",
crontime: "0 21 * * *",
id: timestamp + 1,
scheduling_request: "",
}, },
{ {
subject: "Front Page of Hacker News", subject: "Front Page of Hacker News",
query_to_run: scheduling_request:
"/research Summarize the top 5 posts from https://news.ycombinator.com/best and share them with me, including links", "/research Summarize the top 5 posts from https://news.ycombinator.com/best and share them with me, including links",
schedule: "9PM on every Wednesday", schedule: "9PM on every Wednesday",
next: "Next run at 9PM on Wednesday", next: "Next run at 9PM on Wednesday",
crontime: "0 21 * * 3", crontime: "0 21 * * 3",
id: timestamp + 2, id: timestamp + 2,
scheduling_request: "", query_to_run: "",
}, },
{ {
subject: "Market Summary", subject: "Market Summary",
query_to_run: scheduling_request:
"/research Get the market summary for today and share it with me. Focus on tech stocks and the S&P 500.", "/research Get the market summary for today and share it with me. Focus on tech stocks and the S&P 500.",
schedule: "9AM on every weekday", schedule: "9AM on every weekday",
next: "Next run at 9AM on Monday", next: "Next run at 9AM on Monday",
crontime: "0 9 * * *", crontime: "0 9 * * *",
id: timestamp + 3, id: timestamp + 3,
scheduling_request: "", query_to_run: "",
}, },
{ {
subject: "Market Crash Notification", subject: "Market Crash Notification",
query_to_run: "Notify me if the stock market fell by more than 5% today.", scheduling_request: "Notify me if the stock market fell by more than 5% today.",
schedule: "5PM every evening", schedule: "5PM every evening",
next: "Next run at 5PM today", next: "Next run at 5PM today",
crontime: "0 17 * * *", crontime: "0 17 * * *",
id: timestamp + 5, id: timestamp + 5,
scheduling_request: "", query_to_run: "",
}, },
{ {
subject: "Round-up of research papers about AI in healthcare", subject: "Round-up of research papers about AI in healthcare",
query_to_run: scheduling_request:
"/research Summarize the top 3 research papers about AI in healthcare that were published in the last week. Include links to the full papers.", "/research Summarize the top 3 research papers about AI in healthcare that were published in the last week. Include links to the full papers.",
schedule: "9AM every Friday", schedule: "9AM every Friday",
next: "Next run at 9AM on Friday", next: "Next run at 9AM on Friday",
crontime: "0 9 * * 5", crontime: "0 9 * * 5",
id: timestamp + 4, id: timestamp + 4,
scheduling_request: "", query_to_run: "",
},
{
subject: "Daily Bedtime Story",
scheduling_request:
"Compose a bedtime story that a five-year-old might enjoy. It should not exceed five paragraphs. Appeal to the imagination, but weave in learnings.",
schedule: "9PM every night",
next: "Next run at 9PM today",
crontime: "0 21 * * *",
id: timestamp + 1,
query_to_run: "",
}, },
]; ];
function createShareLink(automation: AutomationsData) { function createShareLink(automation: AutomationsData) {
const encodedSubject = encodeURIComponent(automation.subject); const encodedSubject = encodeURIComponent(automation.subject);
const encodedQuery = encodeURIComponent(automation.query_to_run); const encodedQuery = encodeURIComponent(automation.scheduling_request);
const encodedCrontime = encodeURIComponent(automation.crontime); const encodedCrontime = encodeURIComponent(automation.crontime);
const shareLink = `${window.location.origin}/automations?subject=${encodedSubject}&query=${encodedQuery}&crontime=${encodedCrontime}`; const shareLink = `${window.location.origin}/automations?subject=${encodedSubject}&query=${encodedQuery}&crontime=${encodedCrontime}`;
@@ -391,7 +391,7 @@ function AutomationsCard(props: AutomationsCardProps) {
</CardTitle> </CardTitle>
</CardHeader> </CardHeader>
<CardContent className="text-secondary-foreground break-all"> <CardContent className="text-secondary-foreground break-all">
{updatedAutomationData?.query_to_run || automation.query_to_run} {updatedAutomationData?.scheduling_request || automation.scheduling_request}
</CardContent> </CardContent>
<CardFooter className="flex flex-col items-start md:flex-row md:justify-between md:items-center gap-2"> <CardFooter className="flex flex-col items-start md:flex-row md:justify-between md:items-center gap-2">
<div className="flex gap-2"> <div className="flex gap-2">
@@ -451,8 +451,8 @@ function SharedAutomationCard(props: SharedAutomationCardProps) {
const automation: AutomationsData = { const automation: AutomationsData = {
id: 0, id: 0,
subject: decodeURIComponent(subject), subject: decodeURIComponent(subject),
query_to_run: decodeURIComponent(query), scheduling_request: decodeURIComponent(query),
scheduling_request: "", query_to_run: "",
schedule: cronToHumanReadableString(decodeURIComponent(crontime)), schedule: cronToHumanReadableString(decodeURIComponent(crontime)),
crontime: decodeURIComponent(crontime), crontime: decodeURIComponent(crontime),
next: "", next: "",
@@ -480,7 +480,7 @@ const EditAutomationSchema = z.object({
dayOfWeek: z.optional(z.number()), dayOfWeek: z.optional(z.number()),
dayOfMonth: z.optional(z.string()), dayOfMonth: z.optional(z.string()),
timeRecurrence: z.string({ required_error: "Time Recurrence is required" }), timeRecurrence: z.string({ required_error: "Time Recurrence is required" }),
queryToRun: z.string({ required_error: "Query to Run is required" }), schedulingRequest: z.string({ required_error: "Query to Run is required" }),
}); });
interface EditCardProps { interface EditCardProps {
@@ -507,7 +507,7 @@ function EditCard(props: EditCardProps) {
? getTimeRecurrenceFromCron(automation.crontime) ? getTimeRecurrenceFromCron(automation.crontime)
: "12:00 PM", : "12:00 PM",
dayOfMonth: automation?.crontime ? getDayOfMonthFromCron(automation.crontime) : "1", dayOfMonth: automation?.crontime ? getDayOfMonthFromCron(automation.crontime) : "1",
queryToRun: automation?.query_to_run, schedulingRequest: automation?.scheduling_request,
}, },
}); });
@@ -520,7 +520,7 @@ function EditCard(props: EditCardProps) {
); );
let updateQueryUrl = `/api/automation?`; let updateQueryUrl = `/api/automation?`;
updateQueryUrl += `q=${encodeURIComponent(values.queryToRun)}`; updateQueryUrl += `q=${encodeURIComponent(values.schedulingRequest)}`;
if (automation?.id && !props.createNew) { if (automation?.id && !props.createNew) {
updateQueryUrl += `&automation_id=${encodeURIComponent(automation.id)}`; updateQueryUrl += `&automation_id=${encodeURIComponent(automation.id)}`;
} }
@@ -829,7 +829,7 @@ function AutomationModificationForm(props: AutomationModificationFormProps) {
)} )}
<FormField <FormField
control={props.form.control} control={props.form.control}
name="queryToRun" name="schedulingRequest"
render={({ field }) => ( render={({ field }) => (
<FormItem className="space-y-1"> <FormItem className="space-y-1">
<FormLabel>Instructions</FormLabel> <FormLabel>Instructions</FormLabel>
@@ -850,7 +850,7 @@ function AutomationModificationForm(props: AutomationModificationFormProps) {
</FormControl> </FormControl>
<FormMessage /> <FormMessage />
{errors.subject && ( {errors.subject && (
<FormMessage>{errors.queryToRun?.message}</FormMessage> <FormMessage>{errors.schedulingRequest?.message}</FormMessage>
)} )}
</FormItem> </FormItem>
)} )}

View File

@@ -1783,6 +1783,19 @@ class AutomationAdapters:
return automation return automation
@staticmethod
async def aget_automation(user: KhojUser, automation_id: str) -> Job:
# Perform validation checks
# Check if user is allowed to delete this automation id
if not automation_id.startswith(f"automation_{user.uuid}_"):
raise ValueError("Invalid automation id")
# Check if automation with this id exist
automation: Job = await sync_to_async(state.scheduler.get_job)(job_id=automation_id)
if not automation:
raise ValueError("Invalid automation id")
return automation
@staticmethod @staticmethod
def delete_automation(user: KhojUser, automation_id: str): def delete_automation(user: KhojUser, automation_id: str):
# Get valid, user-owned automation # Get valid, user-owned automation

View File

@@ -935,7 +935,7 @@ AI: Here is one I found: "It's not denial. I'm just selective about the reality
User: Hahah, nice! Show a new one every morning. User: Hahah, nice! Show a new one every morning.
Khoj: {{ Khoj: {{
"crontime": "0 9 * * *", "crontime": "0 9 * * *",
"query": "/automated_task Share a funny Calvin and Hobbes or Bill Watterson quote from my notes", "query": "Share a funny Calvin and Hobbes or Bill Watterson quote from my notes",
"subject": "Your Calvin and Hobbes Quote for the Day" "subject": "Your Calvin and Hobbes Quote for the Day"
}} }}
@@ -955,7 +955,7 @@ AI: The latest released Khoj python package version is 1.5.0.
User: Notify me when version 2.0.0 is released User: Notify me when version 2.0.0 is released
Khoj: {{ Khoj: {{
"crontime": "0 10 * * *", "crontime": "0 10 * * *",
"query": "/automated_task What is the latest released version of the Khoj python package?", "query": "/automated_task /research What is the latest released version of the Khoj python package?",
"subject": "Khoj Python Package Version 2.0.0 Release" "subject": "Khoj Python Package Version 2.0.0 Release"
}} }}

View File

@@ -47,6 +47,7 @@ from khoj.routers.helpers import (
acreate_title_from_query, acreate_title_from_query,
get_user_config, get_user_config,
schedule_automation, schedule_automation,
schedule_query,
update_telemetry_state, update_telemetry_state,
) )
from khoj.search_filter.date_filter import DateFilter from khoj.search_filter.date_filter import DateFilter
@@ -584,11 +585,15 @@ async def post_automation(
if not cron_descriptor.get_description(crontime): if not cron_descriptor.get_description(crontime):
return Response(content="Invalid crontime", status_code=400) return Response(content="Invalid crontime", status_code=400)
# Infer subject, query to run
_, query_to_run, generated_subject = await schedule_query(q, conversation_history={}, user=user)
subject = subject or generated_subject
# Normalize query parameters # Normalize query parameters
# Add /automated_task prefix to query if not present # Add /automated_task prefix to query if not present
q = q.strip() query_to_run = query_to_run.strip()
if not q.startswith("/automated_task"): if not query_to_run.startswith("/automated_task"):
query_to_run = f"/automated_task {q}" query_to_run = f"/automated_task {query_to_run}"
# Normalize crontime for AP Scheduler CronTrigger # Normalize crontime for AP Scheduler CronTrigger
crontime = crontime.strip() crontime = crontime.strip()
@@ -603,23 +608,18 @@ async def post_automation(
minute_value = crontime.split(" ")[0] minute_value = crontime.split(" ")[0]
if not minute_value.isdigit(): if not minute_value.isdigit():
return Response( return Response(
content="Recurrence of every X minutes is unsupported. Please create a less frequent schedule.", content="Minute level recurrence is unsupported. Please create a less frequent schedule.",
status_code=400, status_code=400,
) )
if not subject:
subject = await acreate_title_from_query(q, user)
title = f"Automation: {subject}"
# Create new Conversation Session associated with this new task # Create new Conversation Session associated with this new task
title = f"Automation: {subject}"
conversation = await ConversationAdapters.acreate_conversation_session(user, request.user.client_app, title=title) conversation = await ConversationAdapters.acreate_conversation_session(user, request.user.client_app, title=title)
calling_url = request.url.replace(query=f"{request.url.query}")
# Schedule automation with query_to_run, timezone, subject directly provided by user # Schedule automation with query_to_run, timezone, subject directly provided by user
try: try:
# Use the query to run as the scheduling request if the scheduling request is unset # Use the query to run as the scheduling request if the scheduling request is unset
calling_url = request.url.replace(query=f"{request.url.query}")
automation = await schedule_automation( automation = await schedule_automation(
query_to_run, subject, crontime, timezone, q, user, calling_url, str(conversation.id) query_to_run, subject, crontime, timezone, q, user, calling_url, str(conversation.id)
) )
@@ -665,7 +665,7 @@ def trigger_manual_job(
@api.put("/automation", response_class=Response) @api.put("/automation", response_class=Response)
@requires(["authenticated"]) @requires(["authenticated"])
def edit_job( async def edit_job(
request: Request, request: Request,
automation_id: str, automation_id: str,
q: Optional[str], q: Optional[str],
@@ -686,16 +686,20 @@ def edit_job(
# Check, get automation to edit # Check, get automation to edit
try: try:
automation: Job = AutomationAdapters.get_automation(user, automation_id) automation: Job = await AutomationAdapters.aget_automation(user, automation_id)
except ValueError as e: except ValueError as e:
logger.error(f"Error editing automation {automation_id} for {user.email}: {e}", exc_info=True) logger.error(f"Error editing automation {automation_id} for {user.email}: {e}", exc_info=True)
return Response(content="Invalid automation", status_code=403) return Response(content="Invalid automation", status_code=403)
# Infer subject, query to run
_, query_to_run, _ = await schedule_query(q, conversation_history={}, user=user)
subject = subject
# Normalize query parameters # Normalize query parameters
# Add /automated_task prefix to query if not present # Add /automated_task prefix to query if not present
q = q.strip() query_to_run = query_to_run.strip()
if not q.startswith("/automated_task"): if not query_to_run.startswith("/automated_task"):
query_to_run = f"/automated_task {q}" query_to_run = f"/automated_task {query_to_run}"
# Normalize crontime for AP Scheduler CronTrigger # Normalize crontime for AP Scheduler CronTrigger
crontime = crontime.strip() crontime = crontime.strip()
if len(crontime.split(" ")) > 5: if len(crontime.split(" ")) > 5:
@@ -724,13 +728,15 @@ def edit_job(
title = f"Automation: {subject}" title = f"Automation: {subject}"
# Create new Conversation Session associated with this new task # Create new Conversation Session associated with this new task
conversation = ConversationAdapters.create_conversation_session(user, request.user.client_app, title=title) conversation = await ConversationAdapters.acreate_conversation_session(
user, request.user.client_app, title=title
)
conversation_id = str(conversation.id) conversation_id = str(conversation.id)
automation_metadata["conversation_id"] = conversation_id automation_metadata["conversation_id"] = conversation_id
# Modify automation with updated query, subject # Modify automation with updated query, subject
automation.modify( await sync_to_async(automation.modify)(
name=json.dumps(automation_metadata), name=json.dumps(automation_metadata),
kwargs={ kwargs={
"query_to_run": query_to_run, "query_to_run": query_to_run,
@@ -746,11 +752,11 @@ def edit_job(
user_timezone = pytz.timezone(timezone) user_timezone = pytz.timezone(timezone)
trigger = CronTrigger.from_crontab(crontime, user_timezone) trigger = CronTrigger.from_crontab(crontime, user_timezone)
if automation.trigger != trigger: if automation.trigger != trigger:
automation.reschedule(trigger=trigger) await sync_to_async(automation.reschedule)(trigger=trigger)
# Collate info about the updated user automation # Collate info about the updated user automation
automation = AutomationAdapters.get_automation(user, automation.id) automation = await AutomationAdapters.aget_automation(user, automation.id)
automation_info = AutomationAdapters.get_automation_metadata(user, automation) automation_info = await sync_to_async(AutomationAdapters.get_automation_metadata)(user, automation)
# Return modified automation information as a JSON response # Return modified automation information as a JSON response
return Response(content=json.dumps(automation_info), media_type="application/json", status_code=200) return Response(content=json.dumps(automation_info), media_type="application/json", status_code=200)