diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 609dd601..63812331 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -22,7 +22,7 @@ on: jobs: test: - name: Run Tests + name: Setup Application and Lint runs-on: ubuntu-latest strategy: fail-fast: false @@ -42,7 +42,7 @@ jobs: python -m pip install --upgrade pip - name: ⬇️ Install Application - run: pip install --upgrade .[dev] + run: pip install --no-cache-dir --upgrade .[dev] - name: 🌡️ Validate Application run: pre-commit run --hook-stage manual --all diff --git a/documentation/docs/online-data-sources/_category_.json b/documentation/docs/data-sources/_category_.json similarity index 100% rename from documentation/docs/online-data-sources/_category_.json rename to documentation/docs/data-sources/_category_.json diff --git a/documentation/docs/online-data-sources/github_integration.md b/documentation/docs/data-sources/github_integration.md similarity index 100% rename from documentation/docs/online-data-sources/github_integration.md rename to documentation/docs/data-sources/github_integration.md diff --git a/documentation/docs/online-data-sources/notion_integration.md b/documentation/docs/data-sources/notion_integration.md similarity index 100% rename from documentation/docs/online-data-sources/notion_integration.md rename to documentation/docs/data-sources/notion_integration.md diff --git a/documentation/docs/data-sources/share_your_data.md b/documentation/docs/data-sources/share_your_data.md new file mode 100644 index 00000000..2299b7d0 --- /dev/null +++ b/documentation/docs/data-sources/share_your_data.md @@ -0,0 +1,16 @@ +--- +sidebar_position: 0 +keywords: ["upload data", "upload files", "share data", "share files", "pdf ai", "ai for pdf", "ai for documents", "ai for files", "local ai pdf", "local ai documents", "local ai files"] + +--- + +# Upload your data + +There are several ways you can get started with sharing your data with the Khoj AI. + +- Drag and drop your documents via [the web UI](/clients/web/#upload-documents). This is best if you have a one-off document you need to interact with. +- Use the desktop app to [upload and sync your documents](/clients/desktop). This is best if you have a lot of documents on your computer or you need the docs to stay in sync. +- Setup the sync options for either [Obsidian](/clients/obsidian) or [Emacs](/clients/emacs) to automatically sync your documents with Khoj. This is best if you are already using these tools and want to leverage Khoj's AI capabilities. +- Configure your [Notion](/data-sources/notion_integration) or [Github](/data-sources/github_integration) to sync with Khoj. By providing your credentials, you can keep the data synced in the background. + +![demo of dragging and dropping a file](https://khoj-web-bucket.s3.amazonaws.com/drag_drop_file.gif) diff --git a/documentation/docs/features/all_features.md b/documentation/docs/features/all_features.md index 3d3b8941..28c49f9e 100644 --- a/documentation/docs/features/all_features.md +++ b/documentation/docs/features/all_features.md @@ -29,6 +29,6 @@ Khoj is available as a [Desktop app](/clients/desktop), [Emacs package](/clients ![](/img/khoj_clients.svg ':size=400px') ### Supported Data Sources -Khoj can understand your org-mode, markdown, PDF, plaintext files, [Github projects](/online-data-sources/github_integration) and [Notion pages](/online-data-sources/notion_integration). +Khoj can understand your org-mode, markdown, PDF, plaintext files, [Github projects](/data-sources/github_integration) and [Notion pages](/data-sources/notion_integration). ![](/img/khoj_datasources.svg ':size=200px') diff --git a/documentation/docs/get-started/overview.md b/documentation/docs/get-started/overview.md index b0d2a51c..57f93804 100644 --- a/documentation/docs/get-started/overview.md +++ b/documentation/docs/get-started/overview.md @@ -1,6 +1,7 @@ --- sidebar_position: 0 slug: / +keywords: ["khoj", "khoj ai", "khoj docs", "khoj documentation", "khoj features", "khoj overview", "khoj quickstart", "khoj chat", "khoj search", "khoj cloud", "khoj self-host", "khoj setup", "open source ai", "local llm", "ai copilot", "second brain ai", "ai search engine"] --- # Overview @@ -28,7 +29,7 @@ Welcome to the Khoj Docs! This is the best place to get setup and explore Khoj's - Khoj is an open source, personal AI - You can [chat](/features/chat) with it about anything. It'll use files you shared with it to respond, when relevant - Quickly [find](/features/search) relevant notes and documents using natural language -- It understands pdf, plaintext, markdown, org-mode files, [notion pages](/online-data-sources/notion_integration) and [github repositories](/online-data-sources/github_integration) +- It understands pdf, plaintext, markdown, org-mode files, [notion pages](/data-sources/notion_integration) and [github repositories](/data-sources/github_integration) - Access it from your [Emacs](/clients/emacs), [Obsidian](/clients/obsidian), [Web browser](/clients/web) or the [Khoj Desktop app](/clients/desktop) - Use [cloud](https://app.khoj.dev/login) to access your Khoj anytime from anywhere, [self-host](/get-started/setup) on consumer hardware for privacy diff --git a/documentation/docs/get-started/setup.mdx b/documentation/docs/get-started/setup.mdx index 36aefd08..b9f32048 100644 --- a/documentation/docs/get-started/setup.mdx +++ b/documentation/docs/get-started/setup.mdx @@ -199,15 +199,24 @@ To disable HTTPS, set the `KHOJ_NO_HTTPS` environment variable to `True`. This c ### 2. Configure 1. Go to http://localhost:42110/server/admin and login with your admin credentials. - 1. Go to [OpenAI settings](http://localhost:42110/server/admin/database/openaiprocessorconversationconfig/) in the server admin settings to add an OpenAI processor conversation config. This is where you set your API key. Alternatively, you can go to the [offline chat settings](http://localhost:42110/server/admin/database/offlinechatprocessorconversationconfig/) and simply create a new setting with `Enabled` set to `True`. - 2. Go to the ChatModelOptions if you want to add additional models for chat. - - Set the `chat-model` field to a supported chat model[^1] of your choice. For example, you can specify `gpt-4-turbo-preview` if you're using OpenAI or `NousResearch/Hermes-2-Pro-Mistral-7B-GGUF` if you're using offline chat. - - Make sure to set the `model-type` field to `OpenAI` or `Offline` respectively. - - The `tokenizer` and `max-prompt-size` fields are optional. Set them only when using a non-standard model (i.e not mistral, gpt or llama2 model). +#### Configure Chat Model +##### Configure OpenAI or a custom OpenAI-compatible proxy server +1. Go to the [OpenAI settings](http://localhost:42110/server/admin/database/openaiprocessorconversationconfig/) in the server admin settings to add an OpenAI processor conversation config. This is where you set your API key and server API base URL. The API base URL is optional - it's only relevant if you're using another OpenAI-compatible proxy server. +2. Go over to configure your [chat model options](http://localhost:42110/server/admin/database/chatmodeloptions/). Set the `chat-model` field to a supported chat model[^1] of your choice. For example, you can specify `gpt-4-turbo-preview` if you're using OpenAI. + - Make sure to set the `model-type` field to `OpenAI`. + - The `tokenizer` and `max-prompt-size` fields are optional. Set them only if you're sure of the tokenizer or token limit for the model you're using. Contact us if you're unsure what to do here. + +##### Configure Offline Chat +1. No need to setup a conversation processor config! +2. Go over to configure your [chat model options](http://localhost:42110/server/admin/database/chatmodeloptions/). Set the `chat-model` field to a supported chat model[^1] of your choice. For example, we recommend `NousResearch/Hermes-2-Pro-Mistral-7B-GGUF`, but [any gguf model on huggingface](https://huggingface.co/models?library=gguf) should work. + - Make sure to set the `model-type` to `Offline`. Do not set `openai config`. + - The `tokenizer` and `max-prompt-size` fields are optional. Set them only when using a non-standard model (i.e not mistral, gpt or llama2 model) when you know the token limit. + +#### Share your data 1. Select files and folders to index [using the desktop client](/get-started/setup#2-download-the-desktop-client). When you click 'Save', the files will be sent to your server for indexing. - Select Notion workspaces and Github repositories to index using the web interface. -[^1]: Khoj, by default, can use [OpenAI GPT3.5+ chat models](https://platform.openai.com/docs/models/overview) or [GGUF chat models](https://huggingface.co/models?library=gguf). See [this section](/miscellaneous/advanced#use-openai-compatible-llm-api-server-self-hosting) to use non-standard chat models +[^1]: Khoj, by default, can use [OpenAI GPT3.5+ chat models](https://platform.openai.com/docs/models/overview) or [GGUF chat models](https://huggingface.co/models?library=gguf). See [this section](/miscellaneous/advanced#use-openai-compatible-llm-api-server-self-hosting) on how to locally use OpenAI-format compatible proxy servers. :::tip[Note] Using Safari on Mac? You might not be able to login to the admin panel. Try using Chrome or Firefox instead. diff --git a/manifest.json b/manifest.json index 0d0fef0e..d9cbf4bd 100644 --- a/manifest.json +++ b/manifest.json @@ -1,7 +1,7 @@ { "id": "khoj", "name": "Khoj", - "version": "1.11.0", + "version": "1.12.0", "minAppVersion": "0.15.0", "description": "An AI copilot for your Second Brain", "author": "Khoj Inc.", diff --git a/pyproject.toml b/pyproject.toml index 304b3886..06b2da55 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -76,9 +76,14 @@ dependencies = [ "django-phonenumber-field == 7.3.0", "phonenumbers == 8.13.27", "markdownify ~= 0.11.6", + "markdown-it-py ~= 3.0.0", "websockets == 12.0", "psutil >= 5.8.0", "huggingface-hub >= 0.22.2", + "apscheduler ~= 3.10.0", + "pytz ~= 2024.1", + "cron-descriptor == 1.4.3", + "django_apscheduler == 0.6.2", ] dynamic = ["version"] diff --git a/src/interface/desktop/chat.html b/src/interface/desktop/chat.html index 9dbcf908..3e719fe4 100644 --- a/src/interface/desktop/chat.html +++ b/src/interface/desktop/chat.html @@ -40,6 +40,7 @@ let region = null; let city = null; let countryName = null; + let timezone = null; fetch("https://ipapi.co/json") .then(response => response.json()) @@ -47,6 +48,7 @@ region = data.region; city = data.city; countryName = data.country_name; + timezone = data.timezone; }) .catch(err => { console.log(err); @@ -463,16 +465,16 @@ } // Generate backend API URL to execute query - let chatApi = `${hostURL}/api/chat?q=${encodeURIComponent(query)}&n=${resultsCount}&client=web&stream=true&conversation_id=${conversationID}®ion=${region}&city=${city}&country=${countryName}`; + let chatApi = `${hostURL}/api/chat?q=${encodeURIComponent(query)}&n=${resultsCount}&client=web&stream=true&conversation_id=${conversationID}®ion=${region}&city=${city}&country=${countryName}&timezone=${timezone}`; - let new_response = document.createElement("div"); - new_response.classList.add("chat-message", "khoj"); - new_response.attributes["data-meta"] = "🏮 Khoj at " + formatDate(new Date()); - chat_body.appendChild(new_response); + let newResponseEl = document.createElement("div"); + newResponseEl.classList.add("chat-message", "khoj"); + newResponseEl.attributes["data-meta"] = "🏮 Khoj at " + formatDate(new Date()); + chat_body.appendChild(newResponseEl); - let newResponseText = document.createElement("div"); - newResponseText.classList.add("chat-message-text", "khoj"); - new_response.appendChild(newResponseText); + let newResponseTextEl = document.createElement("div"); + newResponseTextEl.classList.add("chat-message-text", "khoj"); + newResponseEl.appendChild(newResponseTextEl); // Temporary status message to indicate that Khoj is thinking let loadingEllipsis = document.createElement("div"); @@ -495,7 +497,7 @@ loadingEllipsis.appendChild(thirdEllipsis); loadingEllipsis.appendChild(fourthEllipsis); - newResponseText.appendChild(loadingEllipsis); + newResponseTextEl.appendChild(loadingEllipsis); document.getElementById("chat-body").scrollTop = document.getElementById("chat-body").scrollHeight; let chatTooltip = document.getElementById("chat-tooltip"); @@ -540,11 +542,11 @@ // If the chunk is not a JSON object, just display it as is rawResponse += chunk; } finally { - newResponseText.innerHTML = ""; - newResponseText.appendChild(formatHTMLMessage(rawResponse)); + newResponseTextEl.innerHTML = ""; + newResponseTextEl.appendChild(formatHTMLMessage(rawResponse)); if (references != null) { - newResponseText.appendChild(references); + newResponseTextEl.appendChild(references); } document.getElementById("chat-body").scrollTop = document.getElementById("chat-body").scrollHeight; @@ -563,7 +565,7 @@ if (done) { // Append any references after all the data has been streamed if (references != {}) { - newResponseText.appendChild(createReferenceSection(references)); + newResponseTextEl.appendChild(createReferenceSection(references)); } document.getElementById("chat-body").scrollTop = document.getElementById("chat-body").scrollHeight; document.getElementById("chat-input").removeAttribute("disabled"); @@ -576,8 +578,8 @@ if (chunk.includes("### compiled references:")) { const additionalResponse = chunk.split("### compiled references:")[0]; rawResponse += additionalResponse; - newResponseText.innerHTML = ""; - newResponseText.appendChild(formatHTMLMessage(rawResponse)); + newResponseTextEl.innerHTML = ""; + newResponseTextEl.appendChild(formatHTMLMessage(rawResponse)); const rawReference = chunk.split("### compiled references:")[1]; const rawReferenceAsJson = JSON.parse(rawReference); @@ -589,14 +591,14 @@ readStream(); } else { // Display response from Khoj - if (newResponseText.getElementsByClassName("lds-ellipsis").length > 0) { - newResponseText.removeChild(loadingEllipsis); + if (newResponseTextEl.getElementsByClassName("lds-ellipsis").length > 0) { + newResponseTextEl.removeChild(loadingEllipsis); } // If the chunk is not a JSON object, just display it as is rawResponse += chunk; - newResponseText.innerHTML = ""; - newResponseText.appendChild(formatHTMLMessage(rawResponse)); + newResponseTextEl.innerHTML = ""; + newResponseTextEl.appendChild(formatHTMLMessage(rawResponse)); readStream(); } @@ -1073,11 +1075,12 @@ threeDotMenu.appendChild(conversationMenu); let deleteButton = document.createElement('button'); + deleteButton.type = "button"; deleteButton.innerHTML = "Delete"; deleteButton.classList.add("delete-conversation-button"); deleteButton.classList.add("three-dot-menu-button-item"); - deleteButton.addEventListener('click', function() { - // Ask for confirmation before deleting chat session + deleteButton.addEventListener('click', function(event) { + event.preventDefault(); let confirmation = confirm('Are you sure you want to delete this chat session?'); if (!confirmation) return; let deleteURL = `/api/chat/history?client=web&conversation_id=${incomingConversationId}`; @@ -1507,7 +1510,7 @@ #chat-input { font-family: var(--font-family); font-size: small; - height: 36px; + height: 48px; border-radius: 16px; resize: none; overflow-y: hidden; @@ -1725,52 +1728,43 @@ } .first-run-message-heading { - font-size: 20px; - font-weight: 300; - line-height: 1.5em; - color: var(--main-text-color); - margin: 0; - padding: 10px; + font-size: 20px; + font-weight: 300; + line-height: 1.5em; + color: var(--main-text-color); + margin: 0; + padding: 10px; } .first-run-message-text { - font-size: 18px; - font-weight: 300; - line-height: 1.5em; - color: var(--main-text-color); - margin: 0; - padding-bottom: 25px; + font-size: 18px; + font-weight: 300; + line-height: 1.5em; + color: var(--main-text-color); + margin: 0; + padding-bottom: 25px; } a.inline-chat-link { - display: block; - text-align: center; - font-size: 14px; - color: #fff; - padding: 6px 15px; - border-radius: 999px; - text-decoration: none; - background-color: rgba(71, 85, 105, 0.6); - transition: background-color 0.3s ease-in-out; + color: #475569; + text-decoration: none; + border-bottom: 1px dotted #475569; } - a.inline-chat-link:hover { - background-color: #475569; - } a.first-run-message-link { - display: block; - text-align: center; - font-size: 14px; - color: #fff; - padding: 6px 15px; - border-radius: 999px; - text-decoration: none; - background-color: rgba(71, 85, 105, 0.6); - transition: background-color 0.3s ease-in-out; + display: block; + text-align: center; + font-size: 14px; + color: #fff; + padding: 6px 15px; + border-radius: 999px; + text-decoration: none; + background-color: rgba(71, 85, 105, 0.6); + transition: background-color 0.3s ease-in-out; } a.first-run-message-link:hover { - background-color: #475569; + background-color: #475569; } a.reference-link { @@ -1934,6 +1928,7 @@ text-align: left; display: flex; position: relative; + margin: 0 8px; } .three-dot-menu { diff --git a/src/interface/desktop/package.json b/src/interface/desktop/package.json index 0260f47a..91c76e28 100644 --- a/src/interface/desktop/package.json +++ b/src/interface/desktop/package.json @@ -1,6 +1,6 @@ { "name": "Khoj", - "version": "1.11.0", + "version": "1.12.0", "description": "An AI copilot for your Second Brain", "author": "Saba Imran, Debanjum Singh Solanky ", "license": "GPL-3.0-or-later", diff --git a/src/interface/desktop/search.html b/src/interface/desktop/search.html index 9a3e0e0b..36a81d9b 100644 --- a/src/interface/desktop/search.html +++ b/src/interface/desktop/search.html @@ -2,6 +2,7 @@ + Khoj - Search diff --git a/src/interface/emacs/khoj.el b/src/interface/emacs/khoj.el index f3083a76..392d581e 100644 --- a/src/interface/emacs/khoj.el +++ b/src/interface/emacs/khoj.el @@ -6,7 +6,7 @@ ;; Saba Imran ;; Description: An AI copilot for your Second Brain ;; Keywords: search, chat, org-mode, outlines, markdown, pdf, image -;; Version: 1.11.0 +;; Version: 1.12.0 ;; Package-Requires: ((emacs "27.1") (transient "0.3.0") (dash "2.19.1")) ;; URL: https://github.com/khoj-ai/khoj/tree/master/src/interface/emacs diff --git a/src/interface/obsidian/manifest.json b/src/interface/obsidian/manifest.json index 0d0fef0e..d9cbf4bd 100644 --- a/src/interface/obsidian/manifest.json +++ b/src/interface/obsidian/manifest.json @@ -1,7 +1,7 @@ { "id": "khoj", "name": "Khoj", - "version": "1.11.0", + "version": "1.12.0", "minAppVersion": "0.15.0", "description": "An AI copilot for your Second Brain", "author": "Khoj Inc.", diff --git a/src/interface/obsidian/package.json b/src/interface/obsidian/package.json index 2887b069..47518d94 100644 --- a/src/interface/obsidian/package.json +++ b/src/interface/obsidian/package.json @@ -1,6 +1,6 @@ { "name": "Khoj", - "version": "1.11.0", + "version": "1.12.0", "description": "An AI copilot for your Second Brain", "author": "Debanjum Singh Solanky, Saba Imran ", "license": "GPL-3.0-or-later", diff --git a/src/interface/obsidian/src/chat_modal.ts b/src/interface/obsidian/src/chat_modal.ts index 504ce4db..31b938a1 100644 --- a/src/interface/obsidian/src/chat_modal.ts +++ b/src/interface/obsidian/src/chat_modal.ts @@ -15,6 +15,7 @@ export class KhojChatModal extends Modal { region: string; city: string; countryName: string; + timezone: string; constructor(app: App, setting: KhojSetting) { super(app); @@ -30,6 +31,7 @@ export class KhojChatModal extends Modal { this.region = data.region; this.city = data.city; this.countryName = data.country_name; + this.timezone = data.timezone; }) .catch(err => { console.log(err); @@ -393,7 +395,7 @@ export class KhojChatModal extends Modal { // Get chat response from Khoj backend let encodedQuery = encodeURIComponent(query); - let chatUrl = `${this.setting.khojUrl}/api/chat?q=${encodedQuery}&n=${this.setting.resultsCount}&client=obsidian&stream=true®ion=${this.region}&city=${this.city}&country=${this.countryName}`; + let chatUrl = `${this.setting.khojUrl}/api/chat?q=${encodedQuery}&n=${this.setting.resultsCount}&client=obsidian&stream=true®ion=${this.region}&city=${this.city}&country=${this.countryName}&timezone=${this.timezone}`; let responseElement = this.createKhojResponseDiv(); // Temporary status message to indicate that Khoj is thinking diff --git a/src/interface/obsidian/versions.json b/src/interface/obsidian/versions.json index c71fd3f2..8aaaaeb1 100644 --- a/src/interface/obsidian/versions.json +++ b/src/interface/obsidian/versions.json @@ -45,5 +45,8 @@ "1.10.0": "0.15.0", "1.10.1": "0.15.0", "1.10.2": "0.15.0", - "1.11.0": "0.15.0" + "1.11.0": "0.15.0", + "1.11.1": "0.15.0", + "1.11.2": "0.15.0", + "1.12.0": "0.15.0" } diff --git a/src/khoj/app/settings.py b/src/khoj/app/settings.py index c49cdf6d..2672f98d 100644 --- a/src/khoj/app/settings.py +++ b/src/khoj/app/settings.py @@ -40,6 +40,8 @@ CSRF_TRUSTED_ORIGINS = [ f"https://app.{KHOJ_DOMAIN}", ] +DISABLE_HTTPS = is_env_var_true("KHOJ_NO_HTTPS") + COOKIE_SAMESITE = "None" if DEBUG or os.getenv("KHOJ_DOMAIN") == None: SESSION_COOKIE_DOMAIN = "localhost" @@ -48,12 +50,21 @@ else: # Production Settings SESSION_COOKIE_DOMAIN = KHOJ_DOMAIN CSRF_COOKIE_DOMAIN = KHOJ_DOMAIN - SECURE_PROXY_SSL_HEADER = ("HTTP_X_FORWARDED_PROTO", "https") + if not DISABLE_HTTPS: + SECURE_PROXY_SSL_HEADER = ("HTTP_X_FORWARDED_PROTO", "https") -SESSION_COOKIE_SECURE = not is_env_var_true("KHOJ_NO_HTTPS") -CSRF_COOKIE_SECURE = not is_env_var_true("KHOJ_NO_HTTPS") -COOKIE_SAMESITE = "None" -SESSION_COOKIE_SAMESITE = "None" +if DISABLE_HTTPS: + SESSION_COOKIE_SECURE = False + CSRF_COOKIE_SECURE = False + + # These need to be set to Lax in order to work with http in some browsers. See reference: https://docs.djangoproject.com/en/5.0/ref/settings/#std-setting-SESSION_COOKIE_SECURE + COOKIE_SAMESITE = "Lax" + SESSION_COOKIE_SAMESITE = "Lax" +else: + SESSION_COOKIE_SECURE = True + CSRF_COOKIE_SECURE = True + COOKIE_SAMESITE = "None" + SESSION_COOKIE_SAMESITE = "None" # Application definition @@ -66,6 +77,7 @@ INSTALLED_APPS = [ "django.contrib.messages", "django.contrib.staticfiles", "phonenumber_field", + "django_apscheduler", ] MIDDLEWARE = [ @@ -158,3 +170,20 @@ STATIC_URL = "/static/" # https://docs.djangoproject.com/en/4.2/ref/settings/#default-auto-field DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField" + + +# Format string for displaying run time timestamps in the Django admin site. The default +# just adds seconds to the standard Django format, which is useful for displaying the timestamps +# for jobs that are scheduled to run on intervals of less than one minute. +# +# See https://docs.djangoproject.com/en/dev/ref/settings/#datetime-format for format string +# syntax details. +APSCHEDULER_DATETIME_FORMAT = "N j, Y, f:s a" + +# Maximum run time allowed for jobs that are triggered manually via the Django admin site, which +# prevents admin site HTTP requests from timing out. +# +# Longer running jobs should probably be handed over to a background task processing library +# that supports multiple background worker processes instead (e.g. Dramatiq, Celery, Django-RQ, +# etc. See: https://djangopackages.org/grids/g/workers-queues-tasks/ for popular options). +APSCHEDULER_RUN_NOW_TIMEOUT = 240 # Seconds diff --git a/src/khoj/configure.py b/src/khoj/configure.py index 38b8223f..81e653d4 100644 --- a/src/khoj/configure.py +++ b/src/khoj/configure.py @@ -324,7 +324,7 @@ def update_content_index(): @schedule.repeat(schedule.every(22).to(25).hours) def update_content_index_regularly(): ProcessLockAdapters.run_with_lock( - update_content_index, ProcessLock.Operation.UPDATE_EMBEDDINGS, max_duration_in_seconds=60 * 60 * 2 + update_content_index, ProcessLock.Operation.INDEX_CONTENT, max_duration_in_seconds=60 * 60 * 2 ) diff --git a/src/khoj/database/adapters/__init__.py b/src/khoj/database/adapters/__init__.py index 96d8e51d..1a217414 100644 --- a/src/khoj/database/adapters/__init__.py +++ b/src/khoj/database/adapters/__init__.py @@ -1,17 +1,23 @@ +import json import logging import math import random +import re import secrets import sys from datetime import date, datetime, timedelta, timezone from enum import Enum -from typing import Callable, List, Optional, Type +from typing import Callable, Iterable, List, Optional, Type +import cron_descriptor +from apscheduler.job import Job from asgiref.sync import sync_to_async from django.contrib.sessions.backends.db import SessionStore from django.db import models from django.db.models import Q from django.db.models.manager import BaseManager +from django.db.utils import IntegrityError +from django_apscheduler.models import DjangoJob, DjangoJobExecution from fastapi import HTTPException from pgvector.django import CosineDistance from torch import Tensor @@ -30,6 +36,7 @@ from khoj.database.models import ( NotionConfig, OpenAIProcessorConversationConfig, ProcessLock, + PublicConversation, ReflectiveQuestion, SearchModelConfig, SpeechToTextModelOptions, @@ -68,7 +75,14 @@ async def set_notion_config(token: str, user: KhojUser): return notion_config -async def create_khoj_token(user: KhojUser, name=None): +def create_khoj_token(user: KhojUser, name=None): + "Create Khoj API key for user" + token = f"kk-{secrets.token_urlsafe(32)}" + name = name or f"{generate_random_name().title()}" + return KhojApiUser.objects.create(token=token, user=user, name=name) + + +async def acreate_khoj_token(user: KhojUser, name=None): "Create Khoj API key for user" token = f"kk-{secrets.token_urlsafe(32)}" name = name or f"{generate_random_name().title()}" @@ -429,7 +443,7 @@ class ProcessLockAdapters: return ProcessLock.objects.filter(name=process_name).delete() @staticmethod - def run_with_lock(func: Callable, operation: ProcessLock.Operation, max_duration_in_seconds: int = 600): + def run_with_lock(func: Callable, operation: ProcessLock.Operation, max_duration_in_seconds: int = 600, **kwargs): # Exit early if process lock is already taken if ProcessLockAdapters.is_process_locked(operation): logger.info(f"🔒 Skip executing {func} as {operation} lock is already taken") @@ -443,8 +457,11 @@ class ProcessLockAdapters: # Execute Function with timer(f"🔒 Run {func} with {operation} process lock", logger): - func() + func(**kwargs) success = True + except IntegrityError as e: + logger.error(f"⚠️ Unable to create the process lock for {func} with {operation}: {e}") + success = False except Exception as e: logger.error(f"🚨 Error executing {func} with {operation} process lock: {e}", exc_info=True) success = False @@ -454,6 +471,13 @@ class ProcessLockAdapters: logger.info(f"🔓 Unlocked {operation} process after executing {func} {'Succeeded' if success else 'Failed'}") +def run_with_process_lock(*args, **kwargs): + """Wrapper function used for scheduling jobs. + Required as APScheduler can't discover the `ProcessLockAdapter.run_with_lock' method on its own. + """ + return ProcessLockAdapters.run_with_lock(*args, **kwargs) + + class ClientApplicationAdapters: @staticmethod async def aget_client_application_by_id(client_id: str, client_secret: str): @@ -537,7 +561,28 @@ class AgentAdapters: return await Agent.objects.filter(name=AgentAdapters.DEFAULT_AGENT_NAME).afirst() +class PublicConversationAdapters: + @staticmethod + def get_public_conversation_by_slug(slug: str): + return PublicConversation.objects.filter(slug=slug).first() + + @staticmethod + def get_public_conversation_url(public_conversation: PublicConversation): + # Public conversations are viewable by anyone, but not editable. + return f"/share/chat/{public_conversation.slug}/" + + class ConversationAdapters: + @staticmethod + def make_public_conversation_copy(conversation: Conversation): + return PublicConversation.objects.create( + source_owner=conversation.user, + agent=conversation.agent, + conversation_log=conversation.conversation_log, + slug=conversation.slug, + title=conversation.title, + ) + @staticmethod def get_conversation_by_user( user: KhojUser, client_application: ClientApplication = None, conversation_id: int = None @@ -623,10 +668,6 @@ class ConversationAdapters: def get_openai_conversation_config(): return OpenAIProcessorConversationConfig.objects.filter().first() - @staticmethod - async def aget_openai_conversation_config(): - return await OpenAIProcessorConversationConfig.objects.filter().afirst() - @staticmethod def has_valid_openai_conversation_config(): return OpenAIProcessorConversationConfig.objects.filter().exists() @@ -659,7 +700,20 @@ class ConversationAdapters: @staticmethod async def aget_default_conversation_config(): - return await ChatModelOptions.objects.filter().afirst() + return await ChatModelOptions.objects.filter().prefetch_related("openai_config").afirst() + + @staticmethod + def create_conversation_from_public_conversation( + user: KhojUser, public_conversation: PublicConversation, client_app: ClientApplication + ): + return Conversation.objects.create( + user=user, + conversation_log=public_conversation.conversation_log, + client=client_app, + slug=public_conversation.slug, + title=public_conversation.title, + agent=public_conversation.agent, + ) @staticmethod def save_conversation( @@ -697,29 +751,15 @@ class ConversationAdapters: user_conversation_config.setting = new_config user_conversation_config.save() - @staticmethod - async def get_default_offline_llm(): - return await ChatModelOptions.objects.filter(model_type="offline").afirst() - @staticmethod async def aget_user_conversation_config(user: KhojUser): - config = await UserConversationConfig.objects.filter(user=user).prefetch_related("setting").afirst() + config = ( + await UserConversationConfig.objects.filter(user=user).prefetch_related("setting__openai_config").afirst() + ) if not config: return None return config.setting - @staticmethod - async def has_openai_chat(): - return await OpenAIProcessorConversationConfig.objects.filter().aexists() - - @staticmethod - async def aget_default_openai_llm(): - return await ChatModelOptions.objects.filter(model_type="openai").afirst() - - @staticmethod - async def get_openai_chat_config(): - return await OpenAIProcessorConversationConfig.objects.filter().afirst() - @staticmethod async def get_speech_to_text_config(): return await SpeechToTextModelOptions.objects.filter().afirst() @@ -744,7 +784,8 @@ class ConversationAdapters: @staticmethod def get_valid_conversation_config(user: KhojUser, conversation: Conversation): - if conversation.agent and conversation.agent.chat_model: + agent: Agent = conversation.agent if AgentAdapters.get_default_agent() != conversation.agent else None + if agent and agent.chat_model: conversation_config = conversation.agent.chat_model else: conversation_config = ConversationAdapters.get_conversation_config(user) @@ -760,8 +801,7 @@ class ConversationAdapters: return conversation_config - openai_chat_config = ConversationAdapters.get_openai_conversation_config() - if openai_chat_config and conversation_config.model_type == "openai": + if conversation_config.model_type == "openai" and conversation_config.openai_config: return conversation_config else: @@ -919,3 +959,79 @@ class EntryAdapters: @staticmethod def get_unique_file_sources(user: KhojUser): return Entry.objects.filter(user=user).values_list("file_source", flat=True).distinct().all() + + +class AutomationAdapters: + @staticmethod + def get_automations(user: KhojUser) -> Iterable[Job]: + all_automations: Iterable[Job] = state.scheduler.get_jobs() + for automation in all_automations: + if automation.id.startswith(f"automation_{user.uuid}_"): + yield automation + + @staticmethod + def get_automation_metadata(user: KhojUser, automation: Job): + # Perform validation checks + # Check if user is allowed to delete this automation id + if not automation.id.startswith(f"automation_{user.uuid}_"): + raise ValueError("Invalid automation id") + + automation_metadata = json.loads(automation.name) + crontime = automation_metadata["crontime"] + timezone = automation.next_run_time.strftime("%Z") + schedule = f"{cron_descriptor.get_description(crontime)} {timezone}" + return { + "id": automation.id, + "subject": automation_metadata["subject"], + "query_to_run": re.sub(r"^/automated_task\s*", "", automation_metadata["query_to_run"]), + "scheduling_request": automation_metadata["scheduling_request"], + "schedule": schedule, + "crontime": crontime, + "next": automation.next_run_time.strftime("%Y-%m-%d %I:%M %p %Z"), + } + + @staticmethod + def get_job_last_run(user: KhojUser, automation: Job): + # Perform validation checks + # Check if user is allowed to delete this automation id + if not automation.id.startswith(f"automation_{user.uuid}_"): + raise ValueError("Invalid automation id") + + django_job = DjangoJob.objects.filter(id=automation.id).first() + execution = DjangoJobExecution.objects.filter(job=django_job, status="Executed") + + last_run_time = None + + if execution.exists(): + last_run_time = execution.latest("run_time").run_time + + return last_run_time.strftime("%Y-%m-%d %I:%M %p %Z") if last_run_time else None + + @staticmethod + def get_automations_metadata(user: KhojUser): + for automation in AutomationAdapters.get_automations(user): + yield AutomationAdapters.get_automation_metadata(user, automation) + + @staticmethod + def get_automation(user: KhojUser, automation_id: str) -> Job: + # Perform validation checks + # Check if user is allowed to delete this automation id + if not automation_id.startswith(f"automation_{user.uuid}_"): + raise ValueError("Invalid automation id") + # Check if automation with this id exist + automation: Job = state.scheduler.get_job(job_id=automation_id) + if not automation: + raise ValueError("Invalid automation id") + + return automation + + @staticmethod + def delete_automation(user: KhojUser, automation_id: str): + # Get valid, user-owned automation + automation: Job = AutomationAdapters.get_automation(user, automation_id) + + # Collate info about user automation to be deleted + automation_metadata = AutomationAdapters.get_automation_metadata(user, automation) + + automation.remove() + return automation_metadata diff --git a/src/khoj/database/admin.py b/src/khoj/database/admin.py index b76b0fce..2fc5a1cd 100644 --- a/src/khoj/database/admin.py +++ b/src/khoj/database/admin.py @@ -15,6 +15,7 @@ from khoj.database.models import ( KhojUser, NotionConfig, OpenAIProcessorConversationConfig, + ProcessLock, ReflectiveQuestion, SearchModelConfig, SpeechToTextModelOptions, @@ -44,10 +45,10 @@ class KhojUserAdmin(UserAdmin): admin.site.register(KhojUser, KhojUserAdmin) admin.site.register(ChatModelOptions) +admin.site.register(ProcessLock) admin.site.register(SpeechToTextModelOptions) admin.site.register(OpenAIProcessorConversationConfig) admin.site.register(SearchModelConfig) -admin.site.register(Subscription) admin.site.register(ReflectiveQuestion) admin.site.register(UserSearchModelConfig) admin.site.register(TextToImageModelConfig) @@ -83,6 +84,18 @@ class EntryAdmin(admin.ModelAdmin): ordering = ("-created_at",) +@admin.register(Subscription) +class KhojUserSubscription(admin.ModelAdmin): + list_display = ( + "id", + "user", + "type", + ) + + search_fields = ("id", "user__email", "user__username", "type") + list_filter = ("type",) + + @admin.register(Conversation) class ConversationAdmin(admin.ModelAdmin): list_display = ( diff --git a/src/khoj/database/migrations/0036_alter_processlock_name.py b/src/khoj/database/migrations/0036_alter_processlock_name.py new file mode 100644 index 00000000..87c9a0d4 --- /dev/null +++ b/src/khoj/database/migrations/0036_alter_processlock_name.py @@ -0,0 +1,19 @@ +# Generated by Django 4.2.10 on 2024-04-16 18:39 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("database", "0035_processlock"), + ] + + operations = [ + migrations.AlterField( + model_name="processlock", + name="name", + field=models.CharField( + choices=[("index_content", "Index Content"), ("scheduled_job", "Scheduled Job")], max_length=200 + ), + ), + ] diff --git a/src/khoj/database/migrations/0036_publicconversation.py b/src/khoj/database/migrations/0036_publicconversation.py new file mode 100644 index 00000000..54a98fa1 --- /dev/null +++ b/src/khoj/database/migrations/0036_publicconversation.py @@ -0,0 +1,42 @@ +# Generated by Django 4.2.10 on 2024-04-17 13:27 + +import django.db.models.deletion +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("database", "0035_processlock"), + ] + + operations = [ + migrations.CreateModel( + name="PublicConversation", + fields=[ + ("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")), + ("created_at", models.DateTimeField(auto_now_add=True)), + ("updated_at", models.DateTimeField(auto_now=True)), + ("conversation_log", models.JSONField(default=dict)), + ("slug", models.CharField(blank=True, default=None, max_length=200, null=True)), + ("title", models.CharField(blank=True, default=None, max_length=200, null=True)), + ( + "agent", + models.ForeignKey( + blank=True, + default=None, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + to="database.agent", + ), + ), + ( + "source_owner", + models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL), + ), + ], + options={ + "abstract": False, + }, + ), + ] diff --git a/src/khoj/database/migrations/0037_chatmodeloptions_openai_config_and_more.py b/src/khoj/database/migrations/0037_chatmodeloptions_openai_config_and_more.py new file mode 100644 index 00000000..a7487e8c --- /dev/null +++ b/src/khoj/database/migrations/0037_chatmodeloptions_openai_config_and_more.py @@ -0,0 +1,51 @@ +# Generated by Django 4.2.10 on 2024-04-24 05:46 + +import django.db.models.deletion +from django.db import migrations, models + + +def attach_openai_config(apps, schema_editor): + OpenAIProcessorConversationConfig = apps.get_model("database", "OpenAIProcessorConversationConfig") + openai_processor_conversation_config = OpenAIProcessorConversationConfig.objects.first() + if openai_processor_conversation_config: + ChatModelOptions = apps.get_model("database", "ChatModelOptions") + for chat_model_option in ChatModelOptions.objects.all(): + if chat_model_option.model_type == "openai": + chat_model_option.openai_config = openai_processor_conversation_config + chat_model_option.save() + + +def separate_openai_config(apps, schema_editor): + pass + + +class Migration(migrations.Migration): + dependencies = [ + ("database", "0036_delete_offlinechatprocessorconversationconfig"), + ] + + operations = [ + migrations.AddField( + model_name="chatmodeloptions", + name="openai_config", + field=models.ForeignKey( + blank=True, + default=None, + null=True, + on_delete=django.db.models.deletion.CASCADE, + to="database.openaiprocessorconversationconfig", + ), + ), + migrations.AddField( + model_name="openaiprocessorconversationconfig", + name="api_base_url", + field=models.URLField(blank=True, default=None, null=True), + ), + migrations.AddField( + model_name="openaiprocessorconversationconfig", + name="name", + field=models.CharField(default="default", max_length=200), + preserve_default=False, + ), + migrations.RunPython(attach_openai_config, reverse_code=separate_openai_config), + ] diff --git a/src/khoj/database/migrations/0038_merge_20240425_0857.py b/src/khoj/database/migrations/0038_merge_20240425_0857.py new file mode 100644 index 00000000..3bb20914 --- /dev/null +++ b/src/khoj/database/migrations/0038_merge_20240425_0857.py @@ -0,0 +1,14 @@ +# Generated by Django 4.2.10 on 2024-04-25 08:57 + +from typing import List + +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("database", "0037_chatmodeloptions_openai_config_and_more"), + ("database", "0037_searchmodelconfig_bi_encoder_docs_encode_config_and_more"), + ] + + operations: List[str] = [] diff --git a/src/khoj/database/migrations/0038_merge_20240426_1640.py b/src/khoj/database/migrations/0038_merge_20240426_1640.py new file mode 100644 index 00000000..74cabb85 --- /dev/null +++ b/src/khoj/database/migrations/0038_merge_20240426_1640.py @@ -0,0 +1,12 @@ +# Generated by Django 4.2.10 on 2024-04-26 16:40 + +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("database", "0036_alter_processlock_name"), + ("database", "0037_searchmodelconfig_bi_encoder_docs_encode_config_and_more"), + ] + + operations: list = [] diff --git a/src/khoj/database/migrations/0039_merge_20240501_0301.py b/src/khoj/database/migrations/0039_merge_20240501_0301.py new file mode 100644 index 00000000..c2bb1a87 --- /dev/null +++ b/src/khoj/database/migrations/0039_merge_20240501_0301.py @@ -0,0 +1,12 @@ +# Generated by Django 4.2.10 on 2024-05-01 03:01 + +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("database", "0038_merge_20240425_0857"), + ("database", "0038_merge_20240426_1640"), + ] + + operations: list = [] diff --git a/src/khoj/database/migrations/0040_alter_processlock_name.py b/src/khoj/database/migrations/0040_alter_processlock_name.py new file mode 100644 index 00000000..8c43aedb --- /dev/null +++ b/src/khoj/database/migrations/0040_alter_processlock_name.py @@ -0,0 +1,26 @@ +# Generated by Django 4.2.10 on 2024-05-04 13:12 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + def delete_all_existing_process_locks(apps, schema_editor): + ProcessLock = apps.get_model("database", "ProcessLock") + ProcessLock.objects.all().delete() + + dependencies = [ + ("database", "0039_merge_20240501_0301"), + ] + + operations = [ + migrations.AlterField( + model_name="processlock", + name="name", + field=models.CharField( + choices=[("index_content", "Index Content"), ("scheduled_job", "Scheduled Job")], + max_length=200, + unique=True, + ), + ), + migrations.RunPython(delete_all_existing_process_locks, reverse_code=migrations.RunPython.noop), + ] diff --git a/src/khoj/database/migrations/0040_merge_20240504_1010.py b/src/khoj/database/migrations/0040_merge_20240504_1010.py new file mode 100644 index 00000000..3abedb3c --- /dev/null +++ b/src/khoj/database/migrations/0040_merge_20240504_1010.py @@ -0,0 +1,14 @@ +# Generated by Django 4.2.10 on 2024-05-04 10:10 + +from typing import List + +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("database", "0036_publicconversation"), + ("database", "0039_merge_20240501_0301"), + ] + + operations: List[str] = [] diff --git a/src/khoj/database/migrations/0041_merge_20240505_1234.py b/src/khoj/database/migrations/0041_merge_20240505_1234.py new file mode 100644 index 00000000..b3cea861 --- /dev/null +++ b/src/khoj/database/migrations/0041_merge_20240505_1234.py @@ -0,0 +1,14 @@ +# Generated by Django 4.2.10 on 2024-05-05 12:34 + +from typing import List + +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("database", "0040_alter_processlock_name"), + ("database", "0040_merge_20240504_1010"), + ] + + operations: List[str] = [] diff --git a/src/khoj/database/models/__init__.py b/src/khoj/database/models/__init__.py index ae13e980..6921fcae 100644 --- a/src/khoj/database/models/__init__.py +++ b/src/khoj/database/models/__init__.py @@ -1,3 +1,4 @@ +import re import uuid from random import choice @@ -73,6 +74,12 @@ class Subscription(BaseModel): renewal_date = models.DateTimeField(null=True, default=None, blank=True) +class OpenAIProcessorConversationConfig(BaseModel): + name = models.CharField(max_length=200) + api_key = models.CharField(max_length=200) + api_base_url = models.URLField(max_length=200, default=None, blank=True, null=True) + + class ChatModelOptions(BaseModel): class ModelType(models.TextChoices): OPENAI = "openai" @@ -82,6 +89,9 @@ class ChatModelOptions(BaseModel): tokenizer = models.CharField(max_length=200, default=None, null=True, blank=True) chat_model = models.CharField(max_length=200, default="NousResearch/Hermes-2-Pro-Mistral-7B-GGUF") model_type = models.CharField(max_length=200, choices=ModelType.choices, default=ModelType.OFFLINE) + openai_config = models.ForeignKey( + OpenAIProcessorConversationConfig, on_delete=models.CASCADE, default=None, null=True, blank=True + ) class Agent(BaseModel): @@ -100,11 +110,12 @@ class Agent(BaseModel): class ProcessLock(BaseModel): class Operation(models.TextChoices): - UPDATE_EMBEDDINGS = "update_embeddings" + INDEX_CONTENT = "index_content" + SCHEDULED_JOB = "scheduled_job" # We need to make sure that some operations are thread-safe. To do so, add locks for potentially shared operations. # For example, we need to make sure that only one process is updating the embeddings at a time. - name = models.CharField(max_length=200, choices=Operation.choices) + name = models.CharField(max_length=200, choices=Operation.choices, unique=True) started_at = models.DateTimeField(auto_now_add=True) max_duration_in_seconds = models.IntegerField(default=60 * 60 * 12) # 12 hours @@ -211,10 +222,6 @@ class TextToImageModelConfig(BaseModel): model_type = models.CharField(max_length=200, choices=ModelType.choices, default=ModelType.OPENAI) -class OpenAIProcessorConversationConfig(BaseModel): - api_key = models.CharField(max_length=200) - - class SpeechToTextModelOptions(BaseModel): class ModelType(models.TextChoices): OPENAI = "openai" @@ -243,6 +250,36 @@ class Conversation(BaseModel): agent = models.ForeignKey(Agent, on_delete=models.SET_NULL, default=None, null=True, blank=True) +class PublicConversation(BaseModel): + source_owner = models.ForeignKey(KhojUser, on_delete=models.CASCADE) + conversation_log = models.JSONField(default=dict) + slug = models.CharField(max_length=200, default=None, null=True, blank=True) + title = models.CharField(max_length=200, default=None, null=True, blank=True) + agent = models.ForeignKey(Agent, on_delete=models.SET_NULL, default=None, null=True, blank=True) + + +@receiver(pre_save, sender=PublicConversation) +def verify_public_conversation(sender, instance, **kwargs): + def generate_random_alphanumeric(length): + characters = "0123456789abcdefghijklmnopqrstuvwxyz" + return "".join(choice(characters) for _ in range(length)) + + # check if this is a new instance + if instance._state.adding: + slug = re.sub(r"\W+", "-", instance.slug.lower())[:50] + observed_random_id = set() + while PublicConversation.objects.filter(slug=slug).exists(): + try: + random_id = generate_random_alphanumeric(7) + except IndexError: + raise ValidationError( + "Unable to generate a unique slug for the Public Conversation. Please try again later." + ) + observed_random_id.add(random_id) + slug = f"{slug}-{random_id}" + instance.slug = slug + + class ReflectiveQuestion(BaseModel): question = models.CharField(max_length=500) user = models.ForeignKey(KhojUser, on_delete=models.CASCADE, default=None, null=True, blank=True) diff --git a/src/khoj/interface/email/task.html b/src/khoj/interface/email/task.html new file mode 100644 index 00000000..d6782a49 --- /dev/null +++ b/src/khoj/interface/email/task.html @@ -0,0 +1,41 @@ + + + + Khoj AI - Automation + + + + + +
+
+

Your Automation, From Your Personal AI (Preview)

+ +
+
+ +

{{subject}}

+
+

{{result}}

+
+
+

The automation I ran on your behalf: {{query}}

+

You can manage your automations via the settings page.

+

This is an experimental feature. Please share any feedback with team@khoj.dev.

+
+
+

- Khoj

+ + + + + + + + +
DocsGitHubTwitterLinkedInDiscord
+ + + diff --git a/src/khoj/interface/web/assets/icons/automation.svg b/src/khoj/interface/web/assets/icons/automation.svg new file mode 100644 index 00000000..162dd9ba --- /dev/null +++ b/src/khoj/interface/web/assets/icons/automation.svg @@ -0,0 +1,37 @@ + + + + + + + diff --git a/src/khoj/interface/web/assets/icons/collapse.svg b/src/khoj/interface/web/assets/icons/collapse.svg new file mode 100644 index 00000000..8b3f04cf --- /dev/null +++ b/src/khoj/interface/web/assets/icons/collapse.svg @@ -0,0 +1,17 @@ + + + + + collapse + Created with Sketch Beta. + + + + + + + + + + + diff --git a/src/khoj/interface/web/assets/icons/copy-solid.svg b/src/khoj/interface/web/assets/icons/copy-solid.svg deleted file mode 100644 index da7020be..00000000 --- a/src/khoj/interface/web/assets/icons/copy-solid.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/src/khoj/interface/web/assets/icons/delete.svg b/src/khoj/interface/web/assets/icons/delete.svg new file mode 100644 index 00000000..8e078275 --- /dev/null +++ b/src/khoj/interface/web/assets/icons/delete.svg @@ -0,0 +1,26 @@ + + + + + diff --git a/src/khoj/interface/web/assets/icons/edit.svg b/src/khoj/interface/web/assets/icons/edit.svg new file mode 100644 index 00000000..9dd66854 --- /dev/null +++ b/src/khoj/interface/web/assets/icons/edit.svg @@ -0,0 +1,4 @@ + + + + diff --git a/src/khoj/interface/web/assets/icons/microphone-solid.svg b/src/khoj/interface/web/assets/icons/microphone-solid.svg deleted file mode 100644 index 3fc4b91d..00000000 --- a/src/khoj/interface/web/assets/icons/microphone-solid.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/src/khoj/interface/web/assets/icons/new.svg b/src/khoj/interface/web/assets/icons/new.svg new file mode 100644 index 00000000..f27d95f6 --- /dev/null +++ b/src/khoj/interface/web/assets/icons/new.svg @@ -0,0 +1,23 @@ + + + + + diff --git a/src/khoj/interface/web/assets/icons/trash-solid.svg b/src/khoj/interface/web/assets/icons/trash-solid.svg deleted file mode 100644 index 768d80f8..00000000 --- a/src/khoj/interface/web/assets/icons/trash-solid.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/src/khoj/interface/web/assets/khoj.css b/src/khoj/interface/web/assets/khoj.css index 3c3536a7..1c57fbce 100644 --- a/src/khoj/interface/web/assets/khoj.css +++ b/src/khoj/interface/web/assets/khoj.css @@ -199,7 +199,7 @@ img.khoj-logo { border: 3px solid var(--primary-hover); } -@media screen and (max-width: 700px) { +@media screen and (max-width: 1000px) { .khoj-nav-dropdown-content { display: block; grid-auto-flow: row; @@ -215,7 +215,7 @@ img.khoj-logo { } } -@media only screen and (max-width: 700px) { +@media only screen and (max-width: 1000px) { div.khoj-header { display: grid; grid-auto-flow: column; diff --git a/src/khoj/interface/web/assets/natural-cron.min.js b/src/khoj/interface/web/assets/natural-cron.min.js new file mode 100644 index 00000000..d974dc4f --- /dev/null +++ b/src/khoj/interface/web/assets/natural-cron.min.js @@ -0,0 +1 @@ +!function(e){"object"==typeof exports&&"undefined"!=typeof module?module.exports=e():"function"==typeof define&&define.amd?define([],e):("undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:this).getCronString=e()}(function(){return function r(a,o,s){function u(n,e){if(!o[n]){if(!a[n]){var t="function"==typeof require&&require;if(!e&&t)return t(n,!0);if(i)return i(n,!0);throw(t=new Error("Cannot find module '"+n+"'")).code="MODULE_NOT_FOUND",t}t=o[n]={exports:{}},a[n][0].call(t.exports,function(e){return u(a[n][1][e]||e)},t,t.exports,r,a,o,s)}return o[n].exports}for(var i="function"==typeof require&&require,e=0;e - Khoj - Settings + Khoj + + + @@ -57,11 +66,12 @@ To get started, just start typing below. You can also type / to see a list of co let region = null; let city = null; let countryName = null; + let timezone = null; let waitingForLocation = true; let websocketState = { - newResponseText: null, - newResponseElement: null, + newResponseTextEl: null, + newResponseEl: null, loadingEllipsis: null, references: {}, rawResponse: "", @@ -73,13 +83,14 @@ To get started, just start typing below. You can also type / to see a list of co region = data.region; city = data.city; countryName = data.country_name; + timezone = data.timezone; }) .catch(err => { console.log(err); return; }) .finally(() => { - console.debug("Region:", region, "City:", city, "Country:", countryName); + console.debug("Region:", region, "City:", city, "Country:", countryName, "Timezone:", timezone); waitingForLocation = false; setupWebSocket(); }); @@ -341,6 +352,10 @@ To get started, just start typing below. You can also type / to see a list of co var md = window.markdownit(); let newHTML = message; + // Replace LaTeX delimiters with placeholders + newHTML = newHTML.replace(/\\\(/g, 'LEFTPAREN').replace(/\\\)/g, 'RIGHTPAREN') + .replace(/\\\[/g, 'LEFTBRACKET').replace(/\\\]/g, 'RIGHTBRACKET'); + // Remove any text between [INST] and tags. These are spurious instructions for the AI chat model. newHTML = newHTML.replace(/\[INST\].+(<\/s>)?/g, ''); @@ -357,6 +372,11 @@ To get started, just start typing below. You can also type / to see a list of co // Render markdown newHTML = raw ? newHTML : md.render(newHTML); + + // Replace placeholders with LaTeX delimiters + newHTML = newHTML.replace(/LEFTPAREN/g, '\\(').replace(/RIGHTPAREN/g, '\\)') + .replace(/LEFTBRACKET/g, '\\[').replace(/RIGHTBRACKET/g, '\\]'); + // Set rendered markdown to HTML DOM element let element = document.createElement('div'); element.innerHTML = newHTML; @@ -375,6 +395,19 @@ To get started, just start typing below. You can also type / to see a list of co element.append(copyButton); } + renderMathInElement(element, { + // customised options + // • auto-render specific keys, e.g.: + delimiters: [ + {left: '$$', right: '$$', display: true}, + {left: '$', right: '$', display: false}, + {left: '\\(', right: '\\)', display: false}, + {left: '\\[', right: '\\]', display: true} + ], + // • rendering keys, e.g.: + throwOnError : false + }); + // Get any elements with a class that starts with "language" let codeBlockElements = element.querySelectorAll('[class^="language-"]'); // For each element, add a parent div with the class "programmatic-output" @@ -458,8 +491,6 @@ To get started, just start typing below. You can also type / to see a list of co } async function chat() { - // Extract required fields for search from form - if (websocket) { sendMessageViaWebSocket(); return; @@ -512,7 +543,7 @@ To get started, just start typing below. You can also type / to see a list of co chatInput.classList.remove("option-enabled"); // Generate backend API URL to execute query - let url = `/api/chat?q=${encodeURIComponent(query)}&n=${resultsCount}&client=web&stream=true&conversation_id=${conversationID}®ion=${region}&city=${city}&country=${countryName}`; + let url = `/api/chat?q=${encodeURIComponent(query)}&n=${resultsCount}&client=web&stream=true&conversation_id=${conversationID}®ion=${region}&city=${city}&country=${countryName}&timezone=${timezone}`; // Call specified Khoj API let response = await fetch(url); @@ -898,8 +929,8 @@ To get started, just start typing below. You can also type / to see a list of co } websocketState = { - newResponseText: null, - newResponseElement: null, + newResponseTextEl: null, + newResponseEl: null, loadingEllipsis: null, references: {}, rawResponse: "", @@ -907,7 +938,7 @@ To get started, just start typing below. You can also type / to see a list of co if (chatBody.dataset.conversationId) { webSocketUrl += `?conversation_id=${chatBody.dataset.conversationId}`; - webSocketUrl += (!!region && !!city && !!countryName) ? `®ion=${region}&city=${city}&country=${countryName}` : ''; + webSocketUrl += (!!region && !!city && !!countryName) && !!timezone ? `®ion=${region}&city=${city}&country=${countryName}&timezone=${timezone}` : ''; websocket = new WebSocket(webSocketUrl); websocket.onmessage = function(event) { @@ -919,12 +950,12 @@ To get started, just start typing below. You can also type / to see a list of co } else if(chunk == "end_llm_response") { console.log("Stopped streaming", new Date()); // Append any references after all the data has been streamed - finalizeChatBodyResponse(websocketState.references, websocketState.newResponseText); + finalizeChatBodyResponse(websocketState.references, websocketState.newResponseTextEl); // Reset variables websocketState = { - newResponseText: null, - newResponseElement: null, + newResponseTextEl: null, + newResponseEl: null, loadingEllipsis: null, references: {}, rawResponse: "", @@ -949,9 +980,9 @@ To get started, just start typing below. You can also type / to see a list of co websocketState.rawResponse = rawResponse; websocketState.references = references; } else if (chunk.type == "status") { - handleStreamResponse(websocketState.newResponseText, chunk.message, null, false); + handleStreamResponse(websocketState.newResponseTextEl, chunk.message, null, false); } else if (chunk.type == "rate_limit") { - handleStreamResponse(websocketState.newResponseText, chunk.message, websocketState.loadingEllipsis, true); + handleStreamResponse(websocketState.newResponseTextEl, chunk.message, websocketState.loadingEllipsis, true); } else { rawResponse = chunk.response; } @@ -960,21 +991,21 @@ To get started, just start typing below. You can also type / to see a list of co websocketState.rawResponse += chunk; } finally { if (chunk.type != "status" && chunk.type != "rate_limit") { - addMessageToChatBody(websocketState.rawResponse, websocketState.newResponseText, websocketState.references); + addMessageToChatBody(websocketState.rawResponse, websocketState.newResponseTextEl, websocketState.references); } } } else { // Handle streamed response of type text/event-stream or text/plain if (chunk && chunk.includes("### compiled references:")) { - ({ rawResponse, references } = handleCompiledReferences(websocketState.newResponseText, chunk, websocketState.references, websocketState.rawResponse)); + ({ rawResponse, references } = handleCompiledReferences(websocketState.newResponseTextEl, chunk, websocketState.references, websocketState.rawResponse)); websocketState.rawResponse = rawResponse; websocketState.references = references; } else { // If the chunk is not a JSON object, just display it as is websocketState.rawResponse += chunk; - if (websocketState.newResponseText) { - handleStreamResponse(websocketState.newResponseText, websocketState.rawResponse, websocketState.loadingEllipsis); + if (websocketState.newResponseTextEl) { + handleStreamResponse(websocketState.newResponseTextEl, websocketState.rawResponse, websocketState.loadingEllipsis); } } @@ -1023,19 +1054,19 @@ To get started, just start typing below. You can also type / to see a list of co autoResize(); document.getElementById("chat-input").setAttribute("disabled", "disabled"); - let newResponseElement = document.createElement("div"); - newResponseElement.classList.add("chat-message", "khoj"); - newResponseElement.attributes["data-meta"] = "🏮 Khoj at " + formatDate(new Date()); - chatBody.appendChild(newResponseElement); + let newResponseEl = document.createElement("div"); + newResponseEl.classList.add("chat-message", "khoj"); + newResponseEl.attributes["data-meta"] = "🏮 Khoj at " + formatDate(new Date()); + chatBody.appendChild(newResponseEl); - let newResponseText = document.createElement("div"); - newResponseText.classList.add("chat-message-text", "khoj"); - newResponseElement.appendChild(newResponseText); + let newResponseTextEl = document.createElement("div"); + newResponseTextEl.classList.add("chat-message-text", "khoj"); + newResponseEl.appendChild(newResponseTextEl); // Temporary status message to indicate that Khoj is thinking let loadingEllipsis = createLoadingEllipse(); - newResponseText.appendChild(loadingEllipsis); + newResponseTextEl.appendChild(loadingEllipsis); document.getElementById("chat-body").scrollTop = document.getElementById("chat-body").scrollHeight; let chatTooltip = document.getElementById("chat-tooltip"); @@ -1050,8 +1081,8 @@ To get started, just start typing below. You can also type / to see a list of co let references = {}; websocketState = { - newResponseText, - newResponseElement, + newResponseTextEl, + newResponseEl, loadingEllipsis, references, rawResponse, @@ -1169,7 +1200,7 @@ To get started, just start typing below. You can also type / to see a list of co chat_log.message, chat_log.by, chat_log.context, - new Date(chat_log.created), + new Date(chat_log.created + "Z"), chat_log.onlineContext, chat_log.intent?.type, chat_log.intent?.["inferred-queries"]); @@ -1264,7 +1295,7 @@ To get started, just start typing below. You can also type / to see a list of co chat_log.message, chat_log.by, chat_log.context, - new Date(chat_log.created), + new Date(chat_log.created + "Z"), chat_log.onlineContext, chat_log.intent?.type, chat_log.intent?.["inferred-queries"] @@ -1530,11 +1561,79 @@ To get started, just start typing below. You can also type / to see a list of co conversationMenu.appendChild(editTitleButton); threeDotMenu.appendChild(conversationMenu); + let shareButton = document.createElement('button'); + shareButton.innerHTML = "Share"; + shareButton.type = "button"; + shareButton.classList.add("share-conversation-button"); + shareButton.classList.add("three-dot-menu-button-item"); + shareButton.addEventListener('click', function(event) { + event.preventDefault(); + let confirmation = confirm('Are you sure you want to share this chat session? This will make the conversation public.'); + if (!confirmation) return; + let duplicateURL = `/api/chat/share?client=web&conversation_id=${incomingConversationId}`; + fetch(duplicateURL , { method: "POST" }) + .then(response => response.ok ? response.json() : Promise.reject(response)) + .then(data => { + if (data.status == "ok") { + flashStatusInChatInput("✅ Conversation shared successfully"); + } + // Make a pop-up that shows data.url to share the conversation + let shareURL = data.url; + let shareModal = document.createElement('div'); + shareModal.classList.add("modal"); + shareModal.id = "share-conversation-modal"; + let shareModalContent = document.createElement('div'); + shareModalContent.classList.add("modal-content"); + let shareModalHeader = document.createElement('div'); + shareModalHeader.classList.add("modal-header"); + let shareModalTitle = document.createElement('h2'); + shareModalTitle.textContent = "Share Conversation"; + let shareModalCloseButton = document.createElement('button'); + shareModalCloseButton.classList.add("modal-close-button"); + shareModalCloseButton.innerHTML = "×"; + shareModalCloseButton.addEventListener('click', function() { + shareModal.remove(); + }); + shareModalHeader.appendChild(shareModalTitle); + shareModalHeader.appendChild(shareModalCloseButton); + shareModalContent.appendChild(shareModalHeader); + let shareModalBody = document.createElement('div'); + shareModalBody.classList.add("modal-body"); + let shareModalText = document.createElement('p'); + shareModalText.textContent = "The link has been copied to your clipboard. Use it to share your conversation with others!"; + let shareModalLink = document.createElement('input'); + shareModalLink.setAttribute("value", shareURL); + shareModalLink.setAttribute("readonly", ""); + shareModalLink.classList.add("share-link"); + let copyButton = document.createElement('button'); + copyButton.textContent = "Copy"; + copyButton.addEventListener('click', function() { + shareModalLink.select(); + document.execCommand('copy'); + }); + copyButton.id = "copy-share-url-button"; + shareModalBody.appendChild(shareModalText); + shareModalBody.appendChild(shareModalLink); + shareModalBody.appendChild(copyButton); + shareModalContent.appendChild(shareModalBody); + shareModal.appendChild(shareModalContent); + document.body.appendChild(shareModal); + shareModalLink.select(); + document.execCommand('copy'); + }) + .catch(err => { + return; + }); + }); + conversationMenu.appendChild(shareButton); + let deleteButton = document.createElement('button'); + deleteButton.type = "button"; deleteButton.innerHTML = "Delete"; deleteButton.classList.add("delete-conversation-button"); deleteButton.classList.add("three-dot-menu-button-item"); - deleteButton.addEventListener('click', function() { + deleteButton.addEventListener('click', function(event) { + event.preventDefault(); // Ask for confirmation before deleting chat session let confirmation = confirm('Are you sure you want to delete this chat session?'); if (!confirmation) return; @@ -1986,7 +2085,7 @@ To get started, just start typing below. You can also type / to see a list of co } div#conversation-list { - height: 1; + height: 1px; } div#side-panel-wrapper { @@ -2136,6 +2235,10 @@ To get started, just start typing below. You can also type / to see a list of co img.text-to-image { max-width: 60%; } + h3 > img.text-to-image { + height: 24px; + vertical-align: sub; + } #chat-footer { padding: 0; @@ -2159,7 +2262,7 @@ To get started, just start typing below. You can also type / to see a list of co #chat-input { font-family: var(--font-family); font-size: medium; - height: 36px; + height: 48px; border-radius: 16px; resize: none; overflow-y: hidden; @@ -2189,7 +2292,7 @@ To get started, just start typing below. You can also type / to see a list of co } .side-panel-button { - background: var(--background-color); + background: none; border: none; box-shadow: none; font-size: 14px; @@ -2408,6 +2511,7 @@ To get started, just start typing below. You can also type / to see a list of co margin-bottom: 8px; } + button#copy-share-url-button, button#new-conversation-button { display: inline-flex; align-items: center; @@ -2428,17 +2532,15 @@ To get started, just start typing below. You can also type / to see a list of co text-align: left; display: flex; position: relative; + margin: 0 8px; } .three-dot-menu { - display: none; - /* background: var(--background-color); */ - /* border: 1px solid var(--main-text-color); */ + display: block; border-radius: 5px; - /* position: relative; */ position: absolute; - right: 4; - top: 4; + right: 4px; + top: 4px; } button.three-dot-menu-button-item { @@ -2617,13 +2719,6 @@ To get started, just start typing below. You can also type / to see a list of co color: #333; } - #agent-instructions { - font-size: 14px; - color: #666; - height: 50px; - overflow: auto; - } - #agent-owned-by-user { font-size: 12px; color: #007BFF; @@ -2645,7 +2740,7 @@ To get started, just start typing below. You can also type / to see a list of co margin: 15% auto; /* 15% from the top and centered */ padding: 20px; border: 1px solid #888; - width: 250px; + width: 300px; text-align: left; background: var(--background-color); border-radius: 5px; @@ -2719,6 +2814,28 @@ To get started, just start typing below. You can also type / to see a list of co border: 1px solid var(--main-text-color); } + .share-link { + display: block; + width: 100%; + padding: 10px; + margin-top: 10px; + border: 1px solid #ccc; + border-radius: 4px; + background-color: #f9f9f9; + font-family: 'Courier New', monospace; + color: #333; + font-size: 16px; + box-sizing: border-box; + transition: all 0.3s ease; + } + + .share-link:focus { + outline: none; + border-color: #007BFF; + box-shadow: 0 0 0 0.2rem rgba(0,123,255,.25); + } + + button#copy-share-url-button, button#new-conversation-submit-button { background: var(--summer-sun); transition: background 0.2s ease-in-out; @@ -2729,6 +2846,7 @@ To get started, just start typing below. You can also type / to see a list of co transition: background 0.2s ease-in-out; } + button#copy-share-url-button:hover, button#new-conversation-submit-button:hover { background: var(--primary); } diff --git a/src/khoj/interface/web/config.html b/src/khoj/interface/web/config.html index f0537a2a..b808ef33 100644 --- a/src/khoj/interface/web/config.html +++ b/src/khoj/interface/web/config.html @@ -22,7 +22,7 @@ -

Content

+

Content

@@ -32,7 +32,7 @@
Computer

- Files + Files Github

- Github + Github Notion

- Notion + Notion Language

- Language + Language

@@ -180,7 +180,7 @@
Chat

- Chat + Chat

@@ -191,9 +191,15 @@
+ {% if (not billing_enabled) or (subscription_state != 'unsubscribed' and subscription_state != 'expired') %} + {% else %} + + {% endif %}
@@ -205,7 +211,9 @@
API Key -

API Keys

+

+ API Keys +

Manage access from your client apps to Khoj

@@ -231,7 +239,9 @@
WhatsApp icon -

WhatsApp

+

+ WhatsApp +

Your number is connected. You can now chat with Khoj on WhatsApp at +1-848-800-4242. Learn more about the integration here.

@@ -326,7 +336,6 @@
{% endif %}
-
@@ -567,13 +576,18 @@ function copyAPIKey(token) { // Copy API key to clipboard navigator.clipboard.writeText(token); - // Flash the API key copied message - const copyApiKeyButton = document.getElementById(`api-key-${token}`); - original_html = copyApiKeyButton.innerHTML + // Flash the API key copied icon + const apiKeyColumn = document.getElementById(`api-key-${token}`); + const original_html = apiKeyColumn.innerHTML; + const copyApiKeyButton = document.getElementById(`api-key-copy-${token}`); setTimeout(function() { - copyApiKeyButton.innerHTML = "✅ Copied!"; + copyApiKeyButton.src = "/static/assets/icons/copy-button-success.svg"; + setTimeout(() => { + copyApiKeyButton.src = "/static/assets/icons/copy-button.svg"; + }, 1000); + apiKeyColumn.innerHTML = "✅ Copied!"; setTimeout(function() { - copyApiKeyButton.innerHTML = original_html; + apiKeyColumn.innerHTML = original_html; }, 1000); }, 100); } @@ -601,12 +615,11 @@ ${tokenName} ${truncatedToken} - Copy API Key - Delete API Key + Copy API Key + Delete API Key `; - } function listApiKeys() { @@ -614,10 +627,93 @@ fetch('/auth/token') .then(response => response.json()) .then(tokens => { - apiKeyList.innerHTML = tokens.map(generateTokenRow).join(""); + if (!tokens?.length > 0) return; + apiKeyList.innerHTML = tokens?.map(generateTokenRow).join(""); }); } + // List user's API keys on page load + listApiKeys(); + + function deleteAutomation(automationId) { + const AutomationList = document.getElementById("automations-list"); + fetch(`/api/automation?automation_id=${automationId}`, { + method: 'DELETE', + }) + .then(response => { + if (response.status == 200) { + const AutomationItem = document.getElementById(`automation-item-${automationId}`); + AutomationList.removeChild(AutomationItem); + } + }); + } + + function generateAutomationRow(automationObj) { + let automationId = automationObj.id; + let automationNextRun = `Next run at ${automationObj.next}`; + return ` + + ${automationObj.subject} + ${automationObj.scheduling_request} + ${automationObj.query_to_run} + ${automationObj.schedule} + + Delete Automation + Edit Automation + + + `; + } + + function listAutomations() { + const AutomationsList = document.getElementById("automations-list"); + fetch('/api/automations') + .then(response => response.json()) + .then(automations => { + if (!automations?.length > 0) return; + AutomationsList.innerHTML = automations.map(generateAutomationRow).join(""); + }); + } + + async function createAutomation() { + const scheduling_request = window.prompt("Describe the automation you want to create"); + if (!scheduling_request) return; + + const ip_response = await fetch("https://ipapi.co/json"); + const ip_data = await ip_response.json(); + + const query_string = `q=${scheduling_request}&city=${ip_data.city}®ion=${ip_data.region}&country=${ip_data.country_name}&timezone=${ip_data.timezone}`; + const automation_response = await fetch(`/api/automation?${query_string}`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + }); + if (!automation_response.ok) { + throw new Error(`Failed to create automation: ${automation_response.status}`); + } + + listAutomations(); + } + document.getElementById("create-automation").addEventListener("click", async () => { await createAutomation(); }); + + function editAutomation(automationId) { + const query_to_run = window.prompt("What is the query you want to run on this automation's schedule?"); + if (!query_to_run) return; + + fetch(`/api/automation?automation_id=${automationId}&query_to_run=${query_to_run}`, { + method: 'PATCH', + headers: { + 'Content-Type': 'application/json', + }, + }).then(response => { + if (response.ok) { + const automationQueryToRunColumn = document.getElementById(`automation-query-to-run-${automationId}`); + automationQueryToRunColumn.innerHTML = `${query_to_run}`; + } + }); + } + function getIndexedDataSize() { document.getElementById("indexed-data-size").innerHTML = "Calculating..."; fetch('/api/config/index/size') @@ -627,8 +723,8 @@ }); } - // List user's API keys on page load - listApiKeys(); + // List user's automations on page load + listAutomations(); function removeFile(path) { fetch('/api/config/data/file?filename=' + path, { diff --git a/src/khoj/interface/web/config_automation.html b/src/khoj/interface/web/config_automation.html new file mode 100644 index 00000000..d307a463 --- /dev/null +++ b/src/khoj/interface/web/config_automation.html @@ -0,0 +1,355 @@ +{% extends "base_config.html" %} +{% block content %} +
+
+

+ Automate + Automate (Preview) +
+ You can automate queries to run on a schedule using Khoj's automations for smart reminders. Results will be sent straight to your inbox. This is an experimental feature, so your results may vary. Report any issues to team@khoj.dev. +
+

+
+ +
+
+
+
+ + + +{% endblock %} diff --git a/src/khoj/interface/web/content_source_github_input.html b/src/khoj/interface/web/content_source_github_input.html index d298d520..a7d6ccdf 100644 --- a/src/khoj/interface/web/content_source_github_input.html +++ b/src/khoj/interface/web/content_source_github_input.html @@ -6,7 +6,7 @@ Github Github
diff --git a/src/khoj/interface/web/content_source_notion_input.html b/src/khoj/interface/web/content_source_notion_input.html index 1303730c..176da713 100644 --- a/src/khoj/interface/web/content_source_notion_input.html +++ b/src/khoj/interface/web/content_source_notion_input.html @@ -5,6 +5,9 @@

Notion Notion +
+ ⓘ Help +
diff --git a/src/khoj/interface/web/login.html b/src/khoj/interface/web/login.html index bc8304c0..6443e6a4 100644 --- a/src/khoj/interface/web/login.html +++ b/src/khoj/interface/web/login.html @@ -7,6 +7,7 @@ + @@ -15,7 +16,7 @@
- + - -{% endblock %} diff --git a/src/khoj/interface/web/public_conversation.html b/src/khoj/interface/web/public_conversation.html new file mode 100644 index 00000000..e23bdb5e --- /dev/null +++ b/src/khoj/interface/web/public_conversation.html @@ -0,0 +1,1917 @@ + + + + + + Khoj: {{ public_conversation_slug | replace("-", " ")}} + + + + + + + + + + + + + + + + + +
+
+ + + + + + {% import 'utils.html' as utils %} + {{ utils.heading_pane(user_photo, username, is_active, has_documents) }} +
+
+
+
+
Agents
+ +
+
+
+ {% for agent in agents %} + + + + {% endfor %} +
+
+ + + +
+
+ +
+
+
+ +
+ + + +
+
+ + + + + diff --git a/src/khoj/interface/web/utils.html b/src/khoj/interface/web/utils.html index f9372482..b2d719cb 100644 --- a/src/khoj/interface/web/utils.html +++ b/src/khoj/interface/web/utils.html @@ -10,6 +10,9 @@ Agents Agents + + Automation + Automate {% if has_documents %} Search diff --git a/src/khoj/main.py b/src/khoj/main.py index 745b77fb..40b449d7 100644 --- a/src/khoj/main.py +++ b/src/khoj/main.py @@ -6,6 +6,7 @@ from contextlib import redirect_stdout import logging import io import os +import atexit import sys import locale @@ -23,6 +24,7 @@ warnings.filterwarnings("ignore", message=r"legacy way to download files from th import uvicorn import django +from apscheduler.schedulers.background import BackgroundScheduler from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from fastapi.staticfiles import StaticFiles @@ -92,6 +94,11 @@ from khoj.utils.cli import cli from khoj.utils.initialization import initialization +def shutdown_scheduler(): + logger.info("🌑 Shutting down Khoj") + state.scheduler.shutdown() + + def run(should_start_server=True): # Turn Tokenizers Parallelism Off. App does not support it. os.environ["TOKENIZERS_PARALLELISM"] = "false" @@ -126,6 +133,19 @@ def run(should_start_server=True): # Setup task scheduler poll_task_scheduler() + # Setup Background Scheduler + from django_apscheduler.jobstores import DjangoJobStore + + state.scheduler = BackgroundScheduler( + { + "apscheduler.timezone": "UTC", + "apscheduler.job_defaults.misfire_grace_time": "60", # Useful to run scheduled jobs even when worker delayed because it was busy or down + "apscheduler.job_defaults.coalesce": "true", # Combine multiple jobs into one if they are scheduled at the same time + } + ) + state.scheduler.add_jobstore(DjangoJobStore(), "default") + state.scheduler.start() + # Start Server configure_routes(app) @@ -144,6 +164,8 @@ def run(should_start_server=True): # If the server is started through gunicorn (external to the script), don't start the server if should_start_server: start_server(app, host=args.host, port=args.port, socket=args.socket) + # Teardown + shutdown_scheduler() def set_state(args): @@ -185,3 +207,4 @@ if __name__ == "__main__": run() else: run(should_start_server=False) + atexit.register(shutdown_scheduler) diff --git a/src/khoj/migrations/migrate_server_pg.py b/src/khoj/migrations/migrate_server_pg.py index 0ab3522b..a770a38d 100644 --- a/src/khoj/migrations/migrate_server_pg.py +++ b/src/khoj/migrations/migrate_server_pg.py @@ -121,14 +121,16 @@ def migrate_server_pg(args): if openai.get("chat-model") is None: openai["chat-model"] = "gpt-3.5-turbo" - OpenAIProcessorConversationConfig.objects.create( - api_key=openai.get("api-key"), + openai_config = OpenAIProcessorConversationConfig.objects.create( + api_key=openai.get("api-key"), name="default" ) + ChatModelOptions.objects.create( chat_model=openai.get("chat-model"), tokenizer=processor_conversation.get("tokenizer"), max_prompt_size=processor_conversation.get("max-prompt-size"), model_type=ChatModelOptions.ModelType.OPENAI, + openai_config=openai_config, ) save_config_to_file(raw_config, args.config_file) diff --git a/src/khoj/processor/conversation/openai/gpt.py b/src/khoj/processor/conversation/openai/gpt.py index 6fb0ca0f..c25f05fd 100644 --- a/src/khoj/processor/conversation/openai/gpt.py +++ b/src/khoj/processor/conversation/openai/gpt.py @@ -23,6 +23,7 @@ def extract_questions( model: Optional[str] = "gpt-4-turbo-preview", conversation_log={}, api_key=None, + api_base_url=None, temperature=0, max_tokens=100, location_data: LocationData = None, @@ -64,12 +65,12 @@ def extract_questions( # Get Response from GPT response = completion_with_backoff( messages=messages, - completion_kwargs={"temperature": temperature, "max_tokens": max_tokens}, - model_kwargs={ - "model_name": model, - "openai_api_key": api_key, - "model_kwargs": {"response_format": {"type": "json_object"}}, - }, + model=model, + temperature=temperature, + max_tokens=max_tokens, + api_base_url=api_base_url, + model_kwargs={"response_format": {"type": "json_object"}}, + openai_api_key=api_key, ) # Extract, Clean Message from GPT's Response @@ -89,7 +90,7 @@ def extract_questions( return questions -def send_message_to_model(messages, api_key, model, response_type="text"): +def send_message_to_model(messages, api_key, model, response_type="text", api_base_url=None): """ Send message to model """ @@ -97,11 +98,10 @@ def send_message_to_model(messages, api_key, model, response_type="text"): # Get Response from GPT return completion_with_backoff( messages=messages, - model_kwargs={ - "model_name": model, - "openai_api_key": api_key, - "model_kwargs": {"response_format": {"type": response_type}}, - }, + model=model, + openai_api_key=api_key, + api_base_url=api_base_url, + model_kwargs={"response_format": {"type": response_type}}, ) @@ -112,6 +112,7 @@ def converse( conversation_log={}, model: str = "gpt-3.5-turbo", api_key: Optional[str] = None, + api_base_url: Optional[str] = None, temperature: float = 0.2, completion_func=None, conversation_commands=[ConversationCommand.Default], @@ -181,6 +182,7 @@ def converse( model_name=model, temperature=temperature, openai_api_key=api_key, + api_base_url=api_base_url, completion_func=completion_func, model_kwargs={"stop": ["Notes:\n["]}, ) diff --git a/src/khoj/processor/conversation/openai/utils.py b/src/khoj/processor/conversation/openai/utils.py index 908a035d..0c37ba53 100644 --- a/src/khoj/processor/conversation/openai/utils.py +++ b/src/khoj/processor/conversation/openai/utils.py @@ -1,12 +1,9 @@ import logging import os from threading import Thread -from typing import Any +from typing import Dict import openai -from langchain.callbacks.base import BaseCallbackManager -from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler -from langchain_openai import ChatOpenAI from tenacity import ( before_sleep_log, retry, @@ -20,14 +17,7 @@ from khoj.processor.conversation.utils import ThreadedGenerator logger = logging.getLogger(__name__) - -class StreamingChatCallbackHandler(StreamingStdOutCallbackHandler): - def __init__(self, gen: ThreadedGenerator): - super().__init__() - self.gen = gen - - def on_llm_new_token(self, token: str, **kwargs) -> Any: - self.gen.send(token) +openai_clients: Dict[str, openai.OpenAI] = {} @retry( @@ -43,13 +33,37 @@ class StreamingChatCallbackHandler(StreamingStdOutCallbackHandler): before_sleep=before_sleep_log(logger, logging.DEBUG), reraise=True, ) -def completion_with_backoff(messages, model_kwargs={}, completion_kwargs={}) -> str: - if not "openai_api_key" in model_kwargs: - model_kwargs["openai_api_key"] = os.getenv("OPENAI_API_KEY") - llm = ChatOpenAI(**model_kwargs, request_timeout=20, max_retries=1) +def completion_with_backoff( + messages, model, temperature=0, openai_api_key=None, api_base_url=None, model_kwargs=None, max_tokens=None +) -> str: + client_key = f"{openai_api_key}--{api_base_url}" + client: openai.OpenAI = openai_clients.get(client_key) + if not client: + client = openai.OpenAI( + api_key=openai_api_key or os.getenv("OPENAI_API_KEY"), + base_url=api_base_url, + ) + openai_clients[client_key] = client + + formatted_messages = [{"role": message.role, "content": message.content} for message in messages] + + chat = client.chat.completions.create( + stream=True, + messages=formatted_messages, # type: ignore + model=model, # type: ignore + temperature=temperature, + timeout=20, + max_tokens=max_tokens, + **(model_kwargs or dict()), + ) aggregated_response = "" - for chunk in llm.stream(messages, **completion_kwargs): - aggregated_response += chunk.content + for chunk in chat: + delta_chunk = chunk.choices[0].delta # type: ignore + if isinstance(delta_chunk, str): + aggregated_response += delta_chunk + elif delta_chunk.content: + aggregated_response += delta_chunk.content + return aggregated_response @@ -73,30 +87,45 @@ def chat_completion_with_backoff( model_name, temperature, openai_api_key=None, + api_base_url=None, completion_func=None, model_kwargs=None, ): g = ThreadedGenerator(compiled_references, online_results, completion_func=completion_func) - t = Thread(target=llm_thread, args=(g, messages, model_name, temperature, openai_api_key, model_kwargs)) + t = Thread( + target=llm_thread, args=(g, messages, model_name, temperature, openai_api_key, api_base_url, model_kwargs) + ) t.start() return g -def llm_thread(g, messages, model_name, temperature, openai_api_key=None, model_kwargs=None): - callback_handler = StreamingChatCallbackHandler(g) - chat = ChatOpenAI( - streaming=True, - verbose=True, - callback_manager=BaseCallbackManager([callback_handler]), - model_name=model_name, # type: ignore +def llm_thread(g, messages, model_name, temperature, openai_api_key=None, api_base_url=None, model_kwargs=None): + client_key = f"{openai_api_key}--{api_base_url}" + if client_key not in openai_clients: + client: openai.OpenAI = openai.OpenAI( + api_key=openai_api_key or os.getenv("OPENAI_API_KEY"), + base_url=api_base_url, + ) + openai_clients[client_key] = client + else: + client: openai.OpenAI = openai_clients[client_key] + + formatted_messages = [{"role": message.role, "content": message.content} for message in messages] + + chat = client.chat.completions.create( + stream=True, + messages=formatted_messages, + model=model_name, # type: ignore temperature=temperature, - openai_api_key=openai_api_key or os.getenv("OPENAI_API_KEY"), - model_kwargs=model_kwargs, - request_timeout=20, - max_retries=1, - client=None, + timeout=20, + **(model_kwargs or dict()), ) - chat(messages=messages) + for chunk in chat: + delta_chunk = chunk.choices[0].delta + if isinstance(delta_chunk, str): + g.send(delta_chunk) + elif delta_chunk.content: + g.send(delta_chunk.content) g.close() diff --git a/src/khoj/processor/conversation/prompts.py b/src/khoj/processor/conversation/prompts.py index f5700167..f3b65e15 100644 --- a/src/khoj/processor/conversation/prompts.py +++ b/src/khoj/processor/conversation/prompts.py @@ -10,9 +10,11 @@ You were created by Khoj Inc. with the following capabilities: - You *CAN REMEMBER ALL NOTES and PERSONAL INFORMATION FOREVER* that the user ever shares with you. - Users can share files and other information with you using the Khoj Desktop, Obsidian or Emacs app. They can also drag and drop their files into the chat window. -- You *CAN* generate images, look-up real-time information from the internet, and answer questions based on the user's notes. -- You cannot set reminders. +- You *CAN* generate images, look-up real-time information from the internet, set reminders and answer questions based on the user's notes. - Say "I don't know" or "I don't understand" if you don't know what to say or if you don't know the answer to a question. +- Make sure to use the specific LaTeX math mode delimiters for your response. LaTex math mode specific delimiters as following + - inline math mode : `\\(` and `\\)` + - display math mode: insert linebreak after opening `$$`, `\\[` and before closing `$$`, `\\]` - Ask crisp follow-up questions to get additional context, when the answer cannot be inferred from the provided notes or past conversations. - Sometimes the user will share personal information that needs to be remembered, like an account ID or a residential address. These can be acknowledged with a simple "Got it" or "Okay". - Provide inline references to quotes from the user's notes or any web pages you refer to in your responses in markdown format. For example, "The farmer had ten sheep. [1](https://example.com)". *ALWAYS CITE YOUR SOURCES AND PROVIDE REFERENCES*. Add them inline to directly support your claim. @@ -31,6 +33,9 @@ You were created by Khoj Inc. with the following capabilities: - You *CAN REMEMBER ALL NOTES and PERSONAL INFORMATION FOREVER* that the user ever shares with you. - Users can share files and other information with you using the Khoj Desktop, Obsidian or Emacs app. They can also drag and drop their files into the chat window. - Say "I don't know" or "I don't understand" if you don't know what to say or if you don't know the answer to a question. +- Make sure to use the specific LaTeX math mode delimiters for your response. LaTex math mode specific delimiters as following + - inline math mode : `\\(` and `\\)` + - display math mode: insert linebreak after opening `$$`, `\\[` and before closing `$$`, `\\]` - Ask crisp follow-up questions to get additional context, when the answer cannot be inferred from the provided notes or past conversations. - Sometimes the user will share personal information that needs to be remembered, like an account ID or a residential address. These can be acknowledged with a simple "Got it" or "Okay". @@ -301,6 +306,22 @@ AI: I can help with that. I see online that there is a new model of the Dell XPS Q: What are the specs of the new Dell XPS 15? Khoj: default +Example: +Chat History: +User: Where did I go on my last vacation? +AI: You went to Jordan and visited Petra, the Dead Sea, and Wadi Rum. + +Q: Remind me who did I go with on that trip? +Khoj: default + +Example: +Chat History: +User: How's the weather outside? Current Location: Bali, Indonesia +AI: It's currently 28°C and partly cloudy in Bali. + +Q: Share a painting using the weather for Bali every morning. +Khoj: reminder + Now it's your turn to pick the mode you would like to use to answer the user's question. Provide your response as a string. Chat History: @@ -399,8 +420,8 @@ History: User: I like to use Hacker News to get my tech news. AI: Hacker News is an online forum for sharing and discussing the latest tech news. It is a great place to learn about new technologies and startups. -Q: Summarize this post about vector database on Hacker News, https://news.ycombinator.com/item?id=12345 -Khoj: {{"links": ["https://news.ycombinator.com/item?id=12345"]}} +Q: Summarize top posts on Hacker News today +Khoj: {{"links": ["https://news.ycombinator.com/best"]}} History: User: I'm currently living in New York but I'm thinking about moving to San Francisco. @@ -445,8 +466,13 @@ History: User: I like to use Hacker News to get my tech news. AI: Hacker News is an online forum for sharing and discussing the latest tech news. It is a great place to learn about new technologies and startups. -Q: Summarize posts about vector databases on Hacker News since Feb 2024 -Khoj: {{"queries": ["site:news.ycombinator.com vector database since 1 February 2024"]}} +Q: Summarize the top posts on HackerNews +Khoj: {{"queries": ["top posts on HackerNews"]}} + +History: + +Q: Tell me the latest news about the farmers protest in Colombia and China on Reuters +Khoj: {{"queries": ["site:reuters.com farmers protest Colombia", "site:reuters.com farmers protest China"]}} History: User: I'm currently living in New York but I'm thinking about moving to San Francisco. @@ -492,6 +518,135 @@ Khoj: """.strip() ) +# Automations +# -- +crontime_prompt = PromptTemplate.from_template( + """ +You are Khoj, an extremely smart and helpful task scheduling assistant +- Given a user query, infer the date, time to run the query at as a cronjob time string +- Use an approximate time that makes sense, if it not unspecified. +- Also extract the search query to run at the scheduled time. Add any context required from the chat history to improve the query. +- Return a JSON object with the cronjob time, the search query to run and the task subject in it. + +# Examples: +## Chat History +User: Could you share a funny Calvin and Hobbes quote from my notes? +AI: Here is one I found: "It's not denial. I'm just selective about the reality I accept." + +User: Hahah, nice! Show a new one every morning. +Khoj: {{ + "crontime": "0 9 * * *", + "query": "/automated_task Share a funny Calvin and Hobbes or Bill Watterson quote from my notes", + "subject": "Your Calvin and Hobbes Quote for the Day" +}} + +## Chat History + +User: Every monday evening at 6 share the top posts on hacker news from last week. Format it as a newsletter +Khoj: {{ + "crontime": "0 18 * * 1", + "query": "/automated_task Top posts last week on Hacker News", + "subject": "Your Weekly Top Hacker News Posts Newsletter" +}} + +## Chat History +User: What is the latest version of the khoj python package? +AI: The latest released Khoj python package version is 1.5.0. + +User: Notify me when version 2.0.0 is released +Khoj: {{ + "crontime": "0 10 * * *", + "query": "/automated_task What is the latest released version of the Khoj python package?", + "subject": "Khoj Python Package Version 2.0.0 Release" +}} + +## Chat History + +User: Tell me the latest local tech news on the first sunday of every month +Khoj: {{ + "crontime": "0 8 1-7 * 0", + "query": "/automated_task Find the latest local tech, AI and engineering news. Format it as a newsletter.", + "subject": "Your Monthly Dose of Local Tech News" +}} + +## Chat History + +User: Inform me when the national election results are declared. Run task at 4pm every thursday. +Khoj: {{ + "crontime": "0 16 * * 4", + "query": "/automated_task Check if the Indian national election results are officially declared", + "subject": "Indian National Election Results Declared" +}} + +# Chat History: +{chat_history} + +User: {query} +Khoj: +""".strip() +) + +subject_generation = PromptTemplate.from_template( + """ +You are an extremely smart and helpful title generator assistant. Given a user query, extract the subject or title of the task to be performed. +- Use the user query to infer the subject or title of the task. + +# Examples: +User: Show a new Calvin and Hobbes quote every morning at 9am. My Current Location: Shanghai, China +Khoj: Your daily Calvin and Hobbes Quote + +User: Notify me when version 2.0.0 of the sentence transformers python package is released. My Current Location: Mexico City, Mexico +Khoj: Sentence Transformers Python Package Version 2.0.0 Release + +User: Gather the latest tech news on the first sunday of every month. +Khoj: Your Monthly Dose of Tech News + +User Query: {query} +Khoj: +""".strip() +) + +to_notify_or_not = PromptTemplate.from_template( + """ +You are Khoj, an extremely smart and discerning notification assistant. +- Decide whether the user should be notified of the AI's response using the Original User Query, Executed User Query and AI Response triplet. +- Notify the user only if the AI's response satisfies the user specified requirements. +- You should only respond with a "Yes" or "No". Do not say anything else. + +# Examples: +Original User Query: Hahah, nice! Show a new one every morning at 9am. My Current Location: Shanghai, China +Executed User Query: Could you share a funny Calvin and Hobbes quote from my notes? +AI Reponse: Here is one I found: "It's not denial. I'm just selective about the reality I accept." +Khoj: Yes + +Original User Query: Every evening check if it's going to rain tomorrow. Notify me only if I'll need an umbrella. My Current Location: Nairobi, Kenya +Executed User Query: Is it going to rain tomorrow in Nairobi, Kenya +AI Response: Tomorrow's forecast is sunny with a high of 28°C and a low of 18°C +Khoj: No + +Original User Query: Tell me when version 2.0.0 is released. My Current Location: Mexico City, Mexico +Executed User Query: Check if version 2.0.0 of the Khoj python package is released +AI Response: The latest released Khoj python package version is 1.5.0. +Khoj: No + +Original User Query: Paint me a sunset every evening. My Current Location: Shanghai, China +Executed User Query: Paint me a sunset in Shanghai, China +AI Response: https://khoj-generated-images.khoj.dev/user110/image78124.webp +Khoj: Yes + +Original User Query: Share a summary of the tasks I've completed at the end of the day. My Current Location: Oslo, Norway +Executed User Query: Share a summary of the tasks I've completed today. +AI Response: I'm sorry, I couldn't find any relevant notes to respond to your message. +Khoj: No + +Original User Query: {original_query} +Executed User Query: {executed_query} +AI Response: {response} +Khoj: +""".strip() +) + + # System messages to user # -- help_message = PromptTemplate.from_template( diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py index 877e5a43..775848c8 100644 --- a/src/khoj/processor/conversation/utils.py +++ b/src/khoj/processor/conversation/utils.py @@ -14,6 +14,7 @@ from transformers import AutoTokenizer from khoj.database.adapters import ConversationAdapters from khoj.database.models import ClientApplication, KhojUser from khoj.processor.conversation.offline.utils import download_model, infer_max_tokens +from khoj.utils import state from khoj.utils.helpers import is_none_or_empty, merge_dicts logger = logging.getLogger(__name__) @@ -101,6 +102,7 @@ def save_to_conversation_log( intent_type: str = "remember", client_application: ClientApplication = None, conversation_id: int = None, + automation_id: str = None, ): user_message_time = user_message_time or datetime.now().strftime("%Y-%m-%d %H:%M:%S") updated_conversation = message_to_log( @@ -111,6 +113,7 @@ def save_to_conversation_log( "context": compiled_references, "intent": {"inferred-queries": inferred_queries, "type": intent_type}, "onlineContext": online_results, + "automationId": automation_id, }, conversation_log=meta_log.get("chat", []), ) @@ -186,19 +189,31 @@ def truncate_messages( max_prompt_size, model_name: str, loaded_model: Optional[Llama] = None, - tokenizer_name="hf-internal-testing/llama-tokenizer", + tokenizer_name=None, ) -> list[ChatMessage]: """Truncate messages to fit within max prompt size supported by model""" + default_tokenizer = "hf-internal-testing/llama-tokenizer" + try: if loaded_model: encoder = loaded_model.tokenizer() elif model_name.startswith("gpt-"): encoder = tiktoken.encoding_for_model(model_name) + elif tokenizer_name: + if tokenizer_name in state.pretrained_tokenizers: + encoder = state.pretrained_tokenizers[tokenizer_name] + else: + encoder = AutoTokenizer.from_pretrained(tokenizer_name) + state.pretrained_tokenizers[tokenizer_name] = encoder else: encoder = download_model(model_name).tokenizer() except: - encoder = AutoTokenizer.from_pretrained(tokenizer_name) + if default_tokenizer in state.pretrained_tokenizers: + encoder = state.pretrained_tokenizers[default_tokenizer] + else: + encoder = AutoTokenizer.from_pretrained(default_tokenizer) + state.pretrained_tokenizers[default_tokenizer] = encoder logger.warning( f"Fallback to default chat model tokenizer: {tokenizer_name}.\nConfigure tokenizer for unsupported model: {model_name} in Khoj settings to improve context stuffing." ) diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py index 362038b7..efda9841 100644 --- a/src/khoj/routers/api.py +++ b/src/khoj/routers/api.py @@ -7,6 +7,10 @@ import time import uuid from typing import Any, Callable, List, Optional, Union +import cron_descriptor +import pytz +from apscheduler.job import Job +from apscheduler.triggers.cron import CronTrigger from asgiref.sync import sync_to_async from fastapi import APIRouter, Depends, File, HTTPException, Request, UploadFile from fastapi.requests import Request @@ -15,6 +19,7 @@ from starlette.authentication import has_required_scope, requires from khoj.configure import initialize_content from khoj.database.adapters import ( + AutomationAdapters, ConversationAdapters, EntryAdapters, get_user_photo, @@ -29,15 +34,17 @@ from khoj.routers.helpers import ( ApiUserRateLimiter, CommonQueryParams, ConversationCommandRateLimiter, + acreate_title_from_query, + schedule_automation, update_telemetry_state, ) from khoj.search_filter.date_filter import DateFilter from khoj.search_filter.file_filter import FileFilter from khoj.search_filter.word_filter import WordFilter from khoj.search_type import text_search -from khoj.utils import constants, state +from khoj.utils import state from khoj.utils.config import OfflineChatProcessorModel -from khoj.utils.helpers import ConversationCommand, timer +from khoj.utils.helpers import ConversationCommand, is_none_or_empty, timer from khoj.utils.rawconfig import LocationData, SearchResponse from khoj.utils.state import SearchType @@ -267,7 +274,6 @@ async def transcribe( async def extract_references_and_questions( request: Request, - common: CommonQueryParams, meta_log: dict, q: str, n: int, @@ -303,14 +309,12 @@ async def extract_references_and_questions( # Infer search queries from user message with timer("Extracting search queries took", logger): # If we've reached here, either the user has enabled offline chat or the openai model is enabled. - conversation_config = await ConversationAdapters.aget_conversation_config(user) - if conversation_config is None: - conversation_config = await ConversationAdapters.aget_default_conversation_config() + conversation_config = await ConversationAdapters.aget_default_conversation_config() + if conversation_config.model_type == ChatModelOptions.ModelType.OFFLINE: using_offline_chat = True - default_offline_llm = await ConversationAdapters.get_default_offline_llm() - chat_model = default_offline_llm.chat_model - max_tokens = default_offline_llm.max_prompt_size + chat_model = conversation_config.chat_model + max_tokens = conversation_config.max_prompt_size if state.offline_chat_processor_config is None: state.offline_chat_processor_config = OfflineChatProcessorModel(chat_model, max_tokens) @@ -324,11 +328,10 @@ async def extract_references_and_questions( location_data=location_data, max_prompt_size=conversation_config.max_prompt_size, ) - elif conversation_config and conversation_config.model_type == ChatModelOptions.ModelType.OPENAI: - openai_chat_config = await ConversationAdapters.get_openai_chat_config() - default_openai_llm = await ConversationAdapters.aget_default_openai_llm() + elif conversation_config.model_type == ChatModelOptions.ModelType.OPENAI: + openai_chat_config = conversation_config.openai_config api_key = openai_chat_config.api_key - chat_model = default_openai_llm.chat_model + chat_model = conversation_config.chat_model inferred_queries = extract_questions( defiltered_query, model=chat_model, @@ -390,3 +393,154 @@ def user_info(request: Request) -> Response: # Return user information as a JSON response return Response(content=json.dumps(user_info), media_type="application/json", status_code=200) + + +@api.get("/automations", response_class=Response) +@requires(["authenticated"]) +def get_automations(request: Request) -> Response: + user: KhojUser = request.user.object + + # Collate all automations created by user that are still active + automations_info = [automation_info for automation_info in AutomationAdapters.get_automations_metadata(user)] + + # Return tasks information as a JSON response + return Response(content=json.dumps(automations_info), media_type="application/json", status_code=200) + + +@api.delete("/automation", response_class=Response) +@requires(["authenticated"]) +def delete_automation(request: Request, automation_id: str) -> Response: + user: KhojUser = request.user.object + + try: + automation_info = AutomationAdapters.delete_automation(user, automation_id) + except ValueError: + return Response(status_code=204) + + # Return deleted automation information as a JSON response + return Response(content=json.dumps(automation_info), media_type="application/json", status_code=200) + + +@api.post("/automation", response_class=Response) +@requires(["authenticated"]) +async def post_automation( + request: Request, + q: str, + crontime: str, + city: Optional[str] = None, + region: Optional[str] = None, + country: Optional[str] = None, + timezone: Optional[str] = None, +) -> Response: + user: KhojUser = request.user.object + + # Perform validation checks + if is_none_or_empty(q) or is_none_or_empty(crontime): + return Response(content="A query and crontime is required", status_code=400) + if not cron_descriptor.get_description(crontime): + return Response(content="Invalid crontime", status_code=400) + + # Normalize query parameters + # Add /automated_task prefix to query if not present + q = q.strip() + if not q.startswith("/automated_task"): + query_to_run = f"/automated_task {q}" + # Normalize crontime for AP Scheduler CronTrigger + crontime = crontime.strip() + if len(crontime.split(" ")) > 5: + # Truncate crontime to 5 fields + crontime = " ".join(crontime.split(" ")[:5]) + # Convert crontime to standard unix crontime + crontime = crontime.replace("?", "*") + subject = await acreate_title_from_query(q) + + # Schedule automation with query_to_run, timezone, subject directly provided by user + try: + # Use the query to run as the scheduling request if the scheduling request is unset + automation = await schedule_automation(query_to_run, subject, crontime, timezone, q, user, request.url) + except Exception as e: + logger.error(f"Error creating automation {q} for {user.email}: {e}", exc_info=True) + return Response( + content=f"Unable to create automation. Ensure the automation doesn't already exist.", + media_type="text/plain", + status_code=500, + ) + + # Collate info about the created user automation + automation_info = AutomationAdapters.get_automation_metadata(user, automation) + + # Return information about the created automation as a JSON response + return Response(content=json.dumps(automation_info), media_type="application/json", status_code=200) + + +@api.put("/automation", response_class=Response) +@requires(["authenticated"]) +def edit_job( + request: Request, + automation_id: str, + q: Optional[str], + subject: Optional[str], + crontime: Optional[str], + city: Optional[str] = None, + region: Optional[str] = None, + country: Optional[str] = None, + timezone: Optional[str] = None, +) -> Response: + user: KhojUser = request.user.object + + # Perform validation checks + if is_none_or_empty(q) or is_none_or_empty(subject) or is_none_or_empty(crontime): + return Response(content="A query, subject and crontime is required", status_code=400) + if not cron_descriptor.get_description(crontime): + return Response(content="Invalid crontime", status_code=400) + + # Check, get automation to edit + try: + automation: Job = AutomationAdapters.get_automation(user, automation_id) + except ValueError as e: + return Response(content="Invalid automation", status_code=403) + + # Normalize query parameters + # Add /automated_task prefix to query if not present + q = q.strip() + if not q.startswith("/automated_task"): + query_to_run = f"/automated_task {q}" + # Normalize crontime for AP Scheduler CronTrigger + crontime = crontime.strip() + if len(crontime.split(" ")) > 5: + # Truncate crontime to 5 fields + crontime = " ".join(crontime.split(" ")[:5]) + # Convert crontime to standard unix crontime + crontime = crontime.replace("?", "*") + + # Construct updated automation metadata + automation_metadata = json.loads(automation.name) + automation_metadata["scheduling_request"] = q + automation_metadata["query_to_run"] = query_to_run + automation_metadata["subject"] = subject.strip() + automation_metadata["crontime"] = crontime + + # Modify automation with updated query, subject + automation.modify( + name=json.dumps(automation_metadata), + kwargs={ + "query_to_run": query_to_run, + "subject": subject, + "scheduling_request": q, + "user": user, + "calling_url": request.url, + }, + ) + + # Reschedule automation if crontime updated + user_timezone = pytz.timezone(timezone) + trigger = CronTrigger.from_crontab(crontime, user_timezone) + if automation.trigger != trigger: + automation.reschedule(trigger=trigger) + + # Collate info about the updated user automation + automation = AutomationAdapters.get_automation(user, automation.id) + automation_info = AutomationAdapters.get_automation_metadata(user, automation) + + # Return modified automation information as a JSON response + return Response(content=json.dumps(automation_info), media_type="application/json", status_code=200) diff --git a/src/khoj/routers/api_chat.py b/src/khoj/routers/api_chat.py index b4ec2454..415b5530 100644 --- a/src/khoj/routers/api_chat.py +++ b/src/khoj/routers/api_chat.py @@ -1,6 +1,7 @@ import json import logging import math +from datetime import datetime from typing import Dict, Optional from urllib.parse import unquote @@ -12,7 +13,12 @@ from starlette.authentication import requires from starlette.websockets import WebSocketDisconnect from websockets import ConnectionClosedOK -from khoj.database.adapters import ConversationAdapters, EntryAdapters, aget_user_name +from khoj.database.adapters import ( + ConversationAdapters, + EntryAdapters, + PublicConversationAdapters, + aget_user_name, +) from khoj.database.models import KhojUser from khoj.processor.conversation.prompts import ( help_message, @@ -29,11 +35,15 @@ from khoj.routers.api import extract_references_and_questions from khoj.routers.helpers import ( ApiUserRateLimiter, CommonQueryParams, + CommonQueryParamsClass, ConversationCommandRateLimiter, agenerate_chat_response, aget_relevant_information_sources, aget_relevant_output_modes, + construct_automation_created_message, + create_automation, get_conversation_command, + is_query_empty, is_ready_to_chat, text_to_image, update_telemetry_state, @@ -128,6 +138,60 @@ def chat_history( return {"status": "ok", "response": meta_log} +@api_chat.get("/share/history") +def get_shared_chat( + request: Request, + common: CommonQueryParams, + public_conversation_slug: str, + n: Optional[int] = None, +): + user = request.user.object if request.user.is_authenticated else None + + # Load Conversation History + conversation = PublicConversationAdapters.get_public_conversation_by_slug(public_conversation_slug) + + if conversation is None: + return Response( + content=json.dumps({"status": "error", "message": f"Conversation: {public_conversation_slug} not found"}), + status_code=404, + ) + + agent_metadata = None + if conversation.agent: + agent_metadata = { + "slug": conversation.agent.slug, + "name": conversation.agent.name, + "avatar": conversation.agent.avatar, + "isCreator": conversation.agent.creator == user, + } + + meta_log = conversation.conversation_log + meta_log.update( + { + "conversation_id": conversation.id, + "slug": conversation.title if conversation.title else conversation.slug, + "agent": agent_metadata, + } + ) + + if n: + # Get latest N messages if N > 0 + if n > 0 and meta_log.get("chat"): + meta_log["chat"] = meta_log["chat"][-n:] + # Else return all messages except latest N + elif n < 0 and meta_log.get("chat"): + meta_log["chat"] = meta_log["chat"][:n] + + update_telemetry_state( + request=request, + telemetry_type="api", + api="public_conversation_history", + **common.__dict__, + ) + + return {"status": "ok", "response": meta_log} + + @api_chat.delete("/history") @requires(["authenticated"]) async def clear_chat_history( @@ -150,6 +214,69 @@ async def clear_chat_history( return {"status": "ok", "message": "Conversation history cleared"} +@api_chat.post("/share/fork") +@requires(["authenticated"]) +def fork_public_conversation( + request: Request, + common: CommonQueryParams, + public_conversation_slug: str, +): + user = request.user.object + + # Load Conversation History + public_conversation = PublicConversationAdapters.get_public_conversation_by_slug(public_conversation_slug) + + # Duplicate Public Conversation to User's Private Conversation + ConversationAdapters.create_conversation_from_public_conversation( + user, public_conversation, request.user.client_app + ) + + chat_metadata = {"forked_conversation": public_conversation.slug} + + update_telemetry_state( + request=request, + telemetry_type="api", + api="fork_public_conversation", + **common.__dict__, + metadata=chat_metadata, + ) + + redirect_uri = str(request.app.url_path_for("chat_page")) + + return Response(status_code=200, content=json.dumps({"status": "ok", "next_url": redirect_uri})) + + +@api_chat.post("/share") +@requires(["authenticated"]) +def duplicate_chat_history_public_conversation( + request: Request, + common: CommonQueryParams, + conversation_id: int, +): + user = request.user.object + + # Duplicate Conversation History to Public Conversation + conversation = ConversationAdapters.get_conversation_by_user(user, request.user.client_app, conversation_id) + + public_conversation = ConversationAdapters.make_public_conversation_copy(conversation) + + public_conversation_url = PublicConversationAdapters.get_public_conversation_url(public_conversation) + + domain = request.headers.get("host") + scheme = request.url.scheme + + update_telemetry_state( + request=request, + telemetry_type="api", + api="post_chat_share", + **common.__dict__, + ) + + return Response( + status_code=200, content=json.dumps({"status": "ok", "url": f"{scheme}://{domain}{public_conversation_url}"}) + ) + + @api_chat.get("/sessions") @requires(["authenticated"]) def chat_sessions( @@ -212,7 +339,8 @@ async def chat_options( ) -> Response: cmd_options = {} for cmd in ConversationCommand: - cmd_options[cmd.value] = command_descriptions[cmd] + if cmd in command_descriptions: + cmd_options[cmd.value] = command_descriptions[cmd] update_telemetry_state( request=request, @@ -260,6 +388,7 @@ async def websocket_endpoint( city: Optional[str] = None, region: Optional[str] = None, country: Optional[str] = None, + timezone: Optional[str] = None, ): connection_alive = True @@ -338,6 +467,8 @@ async def websocket_endpoint( await websocket.accept() while connection_alive: try: + if conversation: + await sync_to_async(conversation.refresh_from_db)(fields=["conversation_log"]) q = await websocket.receive_text() except WebSocketDisconnect: logger.debug(f"User {user} disconnected web socket") @@ -350,6 +481,15 @@ async def websocket_endpoint( await send_rate_limit_message(e.detail) break + if is_query_empty(q): + await send_message("start_llm_response") + await send_message( + "It seems like your query is incomplete. Could you please provide more details or specify what you need help with?" + ) + await send_message("end_llm_response") + continue + + user_message_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") conversation_commands = [get_conversation_command(query=q, any_references=True)] await send_status_update(f"**👀 Understanding Query**: {q}") @@ -364,13 +504,14 @@ async def websocket_endpoint( continue meta_log = conversation.conversation_log + is_automated_task = conversation_commands == [ConversationCommand.AutomatedTask] - if conversation_commands == [ConversationCommand.Default]: - conversation_commands = await aget_relevant_information_sources(q, meta_log) + if conversation_commands == [ConversationCommand.Default] or is_automated_task: + conversation_commands = await aget_relevant_information_sources(q, meta_log, is_automated_task) conversation_commands_str = ", ".join([cmd.value for cmd in conversation_commands]) await send_status_update(f"**🗃️ Chose Data Sources to Search:** {conversation_commands_str}") - mode = await aget_relevant_output_modes(q, meta_log) + mode = await aget_relevant_output_modes(q, meta_log, is_automated_task) await send_status_update(f"**🧑🏾‍💻 Decided Response Mode:** {mode.value}") if mode not in conversation_commands: conversation_commands.append(mode) @@ -379,8 +520,47 @@ async def websocket_endpoint( await conversation_command_rate_limiter.update_and_check_if_valid(websocket, cmd) q = q.replace(f"/{cmd.value}", "").strip() + if ConversationCommand.Automation in conversation_commands: + try: + automation, crontime, query_to_run, subject = await create_automation( + q, timezone, user, websocket.url, meta_log + ) + except Exception as e: + logger.error(f"Error scheduling task {q} for {user.email}: {e}") + await send_complete_llm_response( + f"Unable to create automation. Ensure the automation doesn't already exist." + ) + continue + + llm_response = construct_automation_created_message(automation, crontime, query_to_run, subject) + await sync_to_async(save_to_conversation_log)( + q, + llm_response, + user, + meta_log, + user_message_time, + intent_type="automation", + client_application=websocket.user.client_app, + conversation_id=conversation_id, + inferred_queries=[query_to_run], + automation_id=automation.id, + ) + common = CommonQueryParamsClass( + client=websocket.user.client_app, + user_agent=websocket.headers.get("user-agent"), + host=websocket.headers.get("host"), + ) + update_telemetry_state( + request=websocket, + telemetry_type="api", + api="chat", + **common.__dict__, + ) + await send_complete_llm_response(llm_response) + continue + compiled_references, inferred_queries, defiltered_query = await extract_references_and_questions( - websocket, None, meta_log, q, 7, 0.18, conversation_commands, location, send_status_update + websocket, meta_log, q, 7, 0.18, conversation_commands, location, send_status_update ) if compiled_references: @@ -458,6 +638,7 @@ async def websocket_endpoint( image, user, meta_log, + user_message_time, intent_type=intent_type, inferred_queries=[improved_image_prompt], client_application=websocket.user.client_app, @@ -525,6 +706,7 @@ async def chat( city: Optional[str] = None, region: Optional[str] = None, country: Optional[str] = None, + timezone: Optional[str] = None, rate_limiter_per_minute=Depends( ApiUserRateLimiter(requests=5, subscribed_requests=60, window=60, slug="chat_minute") ), @@ -534,6 +716,13 @@ async def chat( ) -> Response: user: KhojUser = request.user.object q = unquote(q) + if is_query_empty(q): + return Response( + content="It seems like your query is incomplete. Could you please provide more details or specify what you need help with?", + media_type="text/plain", + status_code=400, + ) + user_message_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") logger.info(f"Chat request by {user.username}: {q}") await is_ready_to_chat(user) @@ -557,9 +746,11 @@ async def chat( else: meta_log = conversation.conversation_log - if conversation_commands == [ConversationCommand.Default]: - conversation_commands = await aget_relevant_information_sources(q, meta_log) - mode = await aget_relevant_output_modes(q, meta_log) + is_automated_task = conversation_commands == [ConversationCommand.AutomatedTask] + + if conversation_commands == [ConversationCommand.Default] or is_automated_task: + conversation_commands = await aget_relevant_information_sources(q, meta_log, is_automated_task) + mode = await aget_relevant_output_modes(q, meta_log, is_automated_task) if mode not in conversation_commands: conversation_commands.append(mode) @@ -574,8 +765,40 @@ async def chat( user_name = await aget_user_name(user) + if ConversationCommand.Automation in conversation_commands: + try: + automation, crontime, query_to_run, subject = await create_automation( + q, timezone, user, request.url, meta_log + ) + except Exception as e: + logger.error(f"Error creating automation {q} for {user.email}: {e}", exc_info=True) + return Response( + content=f"Unable to create automation. Ensure the automation doesn't already exist.", + media_type="text/plain", + status_code=500, + ) + + llm_response = construct_automation_created_message(automation, crontime, query_to_run, subject) + await sync_to_async(save_to_conversation_log)( + q, + llm_response, + user, + meta_log, + user_message_time, + intent_type="automation", + client_application=request.user.client_app, + conversation_id=conversation_id, + inferred_queries=[query_to_run], + automation_id=automation.id, + ) + + if stream: + return StreamingResponse(llm_response, media_type="text/event-stream", status_code=200) + else: + return Response(content=llm_response, media_type="text/plain", status_code=200) + compiled_references, inferred_queries, defiltered_query = await extract_references_and_questions( - request, common, meta_log, q, (n or 5), (d or math.inf), conversation_commands, location + request, meta_log, q, (n or 5), (d or math.inf), conversation_commands, location ) online_results: Dict[str, Dict] = {} @@ -638,6 +861,7 @@ async def chat( image, user, meta_log, + user_message_time, intent_type=intent_type, inferred_queries=[improved_image_prompt], client_application=request.user.client_app, diff --git a/src/khoj/routers/api_config.py b/src/khoj/routers/api_config.py index 64345e3d..dd84e317 100644 --- a/src/khoj/routers/api_config.py +++ b/src/khoj/routers/api_config.py @@ -7,7 +7,7 @@ from asgiref.sync import sync_to_async from fastapi import APIRouter, HTTPException, Request from fastapi.requests import Request from fastapi.responses import Response -from starlette.authentication import requires +from starlette.authentication import has_required_scope, requires from khoj.database import adapters from khoj.database.adapters import ConversationAdapters, EntryAdapters @@ -20,6 +20,7 @@ from khoj.database.models import ( LocalPdfConfig, LocalPlaintextConfig, NotionConfig, + Subscription, ) from khoj.routers.helpers import CommonQueryParams, update_telemetry_state from khoj.utils import constants, state @@ -236,6 +237,10 @@ async def update_chat_model( client: Optional[str] = None, ): user = request.user.object + subscribed = has_required_scope(request, ["premium"]) + + if not subscribed: + raise HTTPException(status_code=403, detail="User is not subscribed to premium") new_config = await ConversationAdapters.aset_user_conversation_processor(user, int(id)) diff --git a/src/khoj/routers/auth.py b/src/khoj/routers/auth.py index 32a3f845..6e0e30c1 100644 --- a/src/khoj/routers/auth.py +++ b/src/khoj/routers/auth.py @@ -12,7 +12,7 @@ from starlette.responses import HTMLResponse, RedirectResponse, Response from starlette.status import HTTP_302_FOUND from khoj.database.adapters import ( - create_khoj_token, + acreate_khoj_token, delete_khoj_token, get_khoj_tokens, get_or_create_user, @@ -67,9 +67,9 @@ async def login(request: Request): async def generate_token(request: Request, token_name: Optional[str] = None): "Generate API token for given user" if token_name: - token = await create_khoj_token(user=request.user.object, name=token_name) + token = await acreate_khoj_token(user=request.user.object, name=token_name) else: - token = await create_khoj_token(user=request.user.object) + token = await acreate_khoj_token(user=request.user.object) return { "token": token.token, "name": token.name, @@ -86,7 +86,7 @@ def get_tokens(request: Request): @auth_router.delete("/token") @requires(["authenticated"], redirect="login_page") -async def delete_token(request: Request, token: str) -> str: +async def delete_token(request: Request, token: str): "Delete API token for given user" return await delete_khoj_token(user=request.user.object, token=token) diff --git a/src/khoj/routers/email.py b/src/khoj/routers/email.py index 86bf67ee..b1eb418e 100644 --- a/src/khoj/routers/email.py +++ b/src/khoj/routers/email.py @@ -6,6 +6,7 @@ try: except ImportError: pass +import markdown_it from django.conf import settings from jinja2 import Environment, FileSystemLoader @@ -43,7 +44,30 @@ async def send_welcome_email(name, email): { "from": "team@khoj.dev", "to": email, - "subject": f"Welcome to Khoj, {name}!" if name else "Welcome to Khoj!", + "subject": f"{name}, four ways to use Khoj" if name else "Four ways to use Khoj", "html": html_content, } ) + + +def send_task_email(name, email, query, result, subject): + if not is_resend_enabled(): + logger.debug("Email sending disabled") + return + + logger.info(f"Sending email to {email} for task {subject}") + + template = env.get_template("task.html") + + html_result = markdown_it.MarkdownIt().render(result) + html_content = template.render(name=name, subject=subject, query=query, result=html_result) + + r = resend.Emails.send( + { + "from": "Khoj ", + "to": email, + "subject": f"✨ {subject}", + "html": html_content, + } + ) + return r diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index 96c713e0..968eb040 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -1,8 +1,10 @@ import asyncio import base64 +import hashlib import io import json import logging +import re from concurrent.futures import ThreadPoolExecutor from datetime import datetime, timedelta, timezone from functools import partial @@ -17,18 +19,35 @@ from typing import ( Tuple, Union, ) +from urllib.parse import parse_qs, urlencode +import cron_descriptor import openai +import pytz +import requests +from apscheduler.job import Job +from apscheduler.triggers.cron import CronTrigger +from asgiref.sync import sync_to_async from fastapi import Depends, Header, HTTPException, Request, UploadFile from PIL import Image from starlette.authentication import has_required_scope +from starlette.requests import URL -from khoj.database.adapters import AgentAdapters, ConversationAdapters, EntryAdapters +from khoj.database.adapters import ( + AgentAdapters, + AutomationAdapters, + ConversationAdapters, + EntryAdapters, + create_khoj_token, + get_khoj_tokens, + run_with_process_lock, +) from khoj.database.models import ( ChatModelOptions, ClientApplication, Conversation, KhojUser, + ProcessLock, Subscription, TextToImageModelConfig, UserRequests, @@ -44,6 +63,7 @@ from khoj.processor.conversation.utils import ( generate_chatml_messages_with_context, save_to_conversation_log, ) +from khoj.routers.email import is_resend_enabled, send_task_email from khoj.routers.storage import upload_image from khoj.utils import state from khoj.utils.config import OfflineChatProcessorModel @@ -64,19 +84,24 @@ logger = logging.getLogger(__name__) executor = ThreadPoolExecutor(max_workers=1) +def is_query_empty(query: str) -> bool: + return is_none_or_empty(query.strip()) + + def validate_conversation_config(): default_config = ConversationAdapters.get_default_conversation_config() if default_config is None: raise HTTPException(status_code=500, detail="Contact the server administrator to set a default chat model.") - if default_config.model_type == "openai" and not ConversationAdapters.has_valid_openai_conversation_config(): + if default_config.model_type == "openai" and not default_config.openai_config: raise HTTPException(status_code=500, detail="Contact the server administrator to set a default chat model.") async def is_ready_to_chat(user: KhojUser): - has_openai_config = await ConversationAdapters.has_openai_chat() - user_conversation_config = await ConversationAdapters.aget_user_conversation_config(user) + user_conversation_config = (await ConversationAdapters.aget_user_conversation_config(user)) or ( + await ConversationAdapters.aget_default_conversation_config() + ) if user_conversation_config and user_conversation_config.model_type == "offline": chat_model = user_conversation_config.chat_model @@ -86,8 +111,14 @@ async def is_ready_to_chat(user: KhojUser): state.offline_chat_processor_config = OfflineChatProcessorModel(chat_model, max_tokens) return True - if not has_openai_config: - raise HTTPException(status_code=500, detail="Set your OpenAI API key or enable Local LLM via Khoj settings.") + if ( + user_conversation_config + and user_conversation_config.model_type == "openai" + and user_conversation_config.openai_config + ): + return True + + raise HTTPException(status_code=500, detail="Set your OpenAI API key or enable Local LLM via Khoj settings.") def update_telemetry_state( @@ -127,7 +158,7 @@ def update_telemetry_state( def construct_chat_history(conversation_history: dict, n: int = 4, agent_name="AI") -> str: chat_history = "" for chat in conversation_history.get("chat", [])[-n:]: - if chat["by"] == "khoj" and chat["intent"].get("type") == "remember": + if chat["by"] == "khoj" and chat["intent"].get("type") in ["remember", "reminder"]: chat_history += f"User: {chat['intent']['query']}\n" chat_history += f"{agent_name}: {chat['message']}\n" elif chat["by"] == "khoj" and ("text-to-image" in chat["intent"].get("type")): @@ -145,8 +176,12 @@ def get_conversation_command(query: str, any_references: bool = False) -> Conver return ConversationCommand.General elif query.startswith("/online"): return ConversationCommand.Online + elif query.startswith("/webpage"): + return ConversationCommand.Webpage elif query.startswith("/image"): return ConversationCommand.Image + elif query.startswith("/automated_task"): + return ConversationCommand.AutomatedTask # If no relevant notes found for the given query elif not any_references: return ConversationCommand.General @@ -159,7 +194,19 @@ async def agenerate_chat_response(*args): return await loop.run_in_executor(executor, generate_chat_response, *args) -async def aget_relevant_information_sources(query: str, conversation_history: dict): +async def acreate_title_from_query(query: str) -> str: + """ + Create a title from the given query + """ + title_generation_prompt = prompts.subject_generation.format(query=query) + + with timer("Chat actor: Generate title from query", logger): + response = await send_message_to_model_wrapper(title_generation_prompt) + + return response.strip() + + +async def aget_relevant_information_sources(query: str, conversation_history: dict, is_task: bool): """ Given a query, determine which of the available tools the agent should use in order to answer appropriately. """ @@ -190,7 +237,7 @@ async def aget_relevant_information_sources(query: str, conversation_history: di logger.error(f"Invalid response for determining relevant tools: {response}") return tool_options - final_response = [] + final_response = [] if not is_task else [ConversationCommand.AutomatedTask] for llm_suggested_tool in response: if llm_suggested_tool in tool_options.keys(): # Check whether the tool exists as a valid ConversationCommand @@ -204,7 +251,7 @@ async def aget_relevant_information_sources(query: str, conversation_history: di return [ConversationCommand.Default] -async def aget_relevant_output_modes(query: str, conversation_history: dict): +async def aget_relevant_output_modes(query: str, conversation_history: dict, is_task: bool = False): """ Given a query, determine which of the available tools the agent should use in order to answer appropriately. """ @@ -213,6 +260,9 @@ async def aget_relevant_output_modes(query: str, conversation_history: dict): mode_options_str = "" for mode, description in mode_descriptions_for_llm.items(): + # Do not allow tasks to schedule another task + if is_task and mode == ConversationCommand.Automation: + continue mode_options[mode.value] = description mode_options_str += f'- "{mode.value}": "{description}"\n' @@ -305,6 +355,30 @@ async def generate_online_subqueries(q: str, conversation_history: dict, locatio return [q] +async def schedule_query(q: str, conversation_history: dict) -> Tuple[str, ...]: + """ + Schedule the date, time to run the query. Assume the server timezone is UTC. + """ + chat_history = construct_chat_history(conversation_history) + + crontime_prompt = prompts.crontime_prompt.format( + query=q, + chat_history=chat_history, + ) + + raw_response = await send_message_to_model_wrapper(crontime_prompt, response_type="json_object") + + # Validate that the response is a non-empty, JSON-serializable list + try: + raw_response = raw_response.strip() + response: Dict[str, str] = json.loads(raw_response) + if not response or not isinstance(response, Dict) or len(response) != 3: + raise AssertionError(f"Invalid response for scheduling query : {response}") + return response.get("crontime"), response.get("query"), response.get("subject") + except Exception: + raise AssertionError(f"Invalid response for scheduling query: {raw_response}") + + async def extract_relevant_info(q: str, corpus: str) -> Union[str, None]: """ Extract relevant information for a given query from the target corpus @@ -407,8 +481,9 @@ async def send_message_to_model_wrapper( ) elif conversation_config.model_type == "openai": - openai_chat_config = await ConversationAdapters.aget_openai_conversation_config() + openai_chat_config = conversation_config.openai_config api_key = openai_chat_config.api_key + api_base_url = openai_chat_config.api_base_url truncated_messages = generate_chatml_messages_with_context( user_message=message, system_message=system_message, @@ -417,6 +492,55 @@ async def send_message_to_model_wrapper( tokenizer_name=tokenizer, ) + openai_response = send_message_to_model( + messages=truncated_messages, + api_key=api_key, + model=chat_model, + response_type=response_type, + api_base_url=api_base_url, + ) + + return openai_response + else: + raise HTTPException(status_code=500, detail="Invalid conversation config") + + +def send_message_to_model_wrapper_sync( + message: str, + system_message: str = "", + response_type: str = "text", +): + conversation_config: ChatModelOptions = ConversationAdapters.get_default_conversation_config() + + if conversation_config is None: + raise HTTPException(status_code=500, detail="Contact the server administrator to set a default chat model.") + + chat_model = conversation_config.chat_model + max_tokens = conversation_config.max_prompt_size + + if conversation_config.model_type == "offline": + if state.offline_chat_processor_config is None or state.offline_chat_processor_config.loaded_model is None: + state.offline_chat_processor_config = OfflineChatProcessorModel(chat_model, max_tokens) + + loaded_model = state.offline_chat_processor_config.loaded_model + truncated_messages = generate_chatml_messages_with_context( + user_message=message, system_message=system_message, model_name=chat_model, loaded_model=loaded_model + ) + + return send_message_to_model_offline( + messages=truncated_messages, + loaded_model=loaded_model, + model=chat_model, + streaming=False, + ) + + elif conversation_config.model_type == "openai": + openai_chat_config = ConversationAdapters.get_openai_conversation_config() + api_key = openai_chat_config.api_key + truncated_messages = generate_chatml_messages_with_context( + user_message=message, system_message=system_message, model_name=chat_model + ) + openai_response = send_message_to_model( messages=truncated_messages, api_key=api_key, model=chat_model, response_type=response_type ) @@ -480,7 +604,7 @@ def generate_chat_response( ) elif conversation_config.model_type == "openai": - openai_chat_config = ConversationAdapters.get_openai_conversation_config() + openai_chat_config = conversation_config.openai_config api_key = openai_chat_config.api_key chat_model = conversation_config.chat_model chat_response = converse( @@ -490,6 +614,7 @@ def generate_chat_response( conversation_log=meta_log, model=chat_model, api_key=api_key, + api_base_url=openai_chat_config.api_base_url, completion_func=partial_completion, conversation_commands=conversation_commands, max_prompt_size=conversation_config.max_prompt_size, @@ -534,7 +659,7 @@ async def text_to_image( text2image_model = text_to_image_config.model_name chat_history = "" for chat in conversation_log.get("chat", [])[-4:]: - if chat["by"] == "khoj" and chat["intent"].get("type") == "remember": + if chat["by"] == "khoj" and chat["intent"].get("type") in ["remember", "reminder"]: chat_history += f"Q: {chat['intent']['query']}\n" chat_history += f"A: {chat['message']}\n" elif chat["by"] == "khoj" and "text-to-image" in chat["intent"].get("type"): @@ -738,3 +863,162 @@ class CommonQueryParamsClass: CommonQueryParams = Annotated[CommonQueryParamsClass, Depends()] + + +def should_notify(original_query: str, executed_query: str, ai_response: str) -> bool: + """ + Decide whether to notify the user of the AI response. + Default to notifying the user for now. + """ + if any(is_none_or_empty(message) for message in [original_query, executed_query, ai_response]): + return False + + to_notify_or_not = prompts.to_notify_or_not.format( + original_query=original_query, + executed_query=executed_query, + response=ai_response, + ) + + with timer("Chat actor: Decide to notify user of automation response", logger): + try: + response = send_message_to_model_wrapper_sync(to_notify_or_not) + should_notify_result = "no" not in response.lower() + logger.info(f'Decided to {"not " if not should_notify_result else ""}notify user of automation response.') + return should_notify_result + except: + logger.warning(f"Fallback to notify user of automation response as failed to infer should notify or not.") + return True + + +def scheduled_chat( + query_to_run: str, scheduling_request: str, subject: str, user: KhojUser, calling_url: URL, job_id: str = None +): + logger.info(f"Processing scheduled_chat: {query_to_run}") + if job_id: + # Get the job object and check whether the time is valid for it to run. This helps avoid race conditions that cause the same job to be run multiple times. + job = AutomationAdapters.get_automation(user, job_id) + last_run_time = AutomationAdapters.get_job_last_run(user, job) + + # Convert last_run_time from %Y-%m-%d %I:%M %p %Z to datetime object + if last_run_time: + last_run_time = datetime.strptime(last_run_time, "%Y-%m-%d %I:%M %p %Z").replace(tzinfo=timezone.utc) + + # If the last run time was within the last 6 hours, don't run it again. This helps avoid multithreading issues and rate limits. + if (datetime.now(timezone.utc) - last_run_time).total_seconds() < 21600: + logger.info(f"Skipping scheduled chat {job_id} as the next run time is in the future.") + return + + # Extract relevant params from the original URL + scheme = "http" if not calling_url.is_secure else "https" + query_dict = parse_qs(calling_url.query) + + # Replace the original scheduling query with the scheduled query + query_dict["q"] = [query_to_run] + + # Construct the URL to call the chat API with the scheduled query string + encoded_query = urlencode(query_dict, doseq=True) + url = f"{scheme}://{calling_url.netloc}/api/chat?{encoded_query}" + + # Construct the Headers for the chat API + headers = {"User-Agent": "Khoj"} + if not state.anonymous_mode: + # Add authorization request header in non-anonymous mode + token = get_khoj_tokens(user) + if is_none_or_empty(token): + token = create_khoj_token(user).token + else: + token = token[0].token + headers["Authorization"] = f"Bearer {token}" + + # Call the chat API endpoint with authenticated user token and query + raw_response = requests.get(url, headers=headers) + + # Stop if the chat API call was not successful + if raw_response.status_code != 200: + logger.error(f"Failed to run schedule chat: {raw_response.text}") + return None + + # Extract the AI response from the chat API response + cleaned_query = re.sub(r"^/automated_task\s*", "", query_to_run).strip() + if raw_response.headers.get("Content-Type") == "application/json": + response_map = raw_response.json() + ai_response = response_map.get("response") or response_map.get("image") + else: + ai_response = raw_response.text + + # Notify user if the AI response is satisfactory + if should_notify(original_query=scheduling_request, executed_query=cleaned_query, ai_response=ai_response): + if is_resend_enabled(): + send_task_email(user.get_short_name(), user.email, cleaned_query, ai_response, subject) + else: + return raw_response + + +async def create_automation(q: str, timezone: str, user: KhojUser, calling_url: URL, meta_log: dict = {}): + crontime, query_to_run, subject = await schedule_query(q, meta_log) + job = await schedule_automation(query_to_run, subject, crontime, timezone, q, user, calling_url) + return job, crontime, query_to_run, subject + + +async def schedule_automation( + query_to_run: str, + subject: str, + crontime: str, + timezone: str, + scheduling_request: str, + user: KhojUser, + calling_url: URL, +): + user_timezone = pytz.timezone(timezone) + trigger = CronTrigger.from_crontab(crontime, user_timezone) + trigger.jitter = 60 + # Generate id and metadata used by task scheduler and process locks for the task runs + job_metadata = json.dumps( + { + "query_to_run": query_to_run, + "scheduling_request": scheduling_request, + "subject": subject, + "crontime": crontime, + } + ) + query_id = hashlib.md5(f"{query_to_run}_{crontime}".encode("utf-8")).hexdigest() + job_id = f"automation_{user.uuid}_{query_id}" + job = await sync_to_async(state.scheduler.add_job)( + run_with_process_lock, + trigger=trigger, + args=( + scheduled_chat, + f"{ProcessLock.Operation.SCHEDULED_JOB}_{user.uuid}_{query_id}", + ), + kwargs={ + "query_to_run": query_to_run, + "scheduling_request": scheduling_request, + "subject": subject, + "user": user, + "calling_url": calling_url, + "job_id": job_id, + }, + id=job_id, + name=job_metadata, + max_instances=2, # Allow second instance to kill any previous instance with stale lock + ) + return job + + +def construct_automation_created_message(automation: Job, crontime: str, query_to_run: str, subject: str): + # Display next run time in user timezone instead of UTC + schedule = f'{cron_descriptor.get_description(crontime)} {automation.next_run_time.strftime("%Z")}' + next_run_time = automation.next_run_time.strftime("%Y-%m-%d %I:%M %p %Z") + # Remove /automated_task prefix from inferred_query + unprefixed_query_to_run = re.sub(r"^\/automated_task\s*", "", query_to_run) + # Create the automation response + automation_icon_url = f"/static/assets/icons/automation.svg" + return f""" + ### ![]({automation_icon_url}) Created Automation +- Subject: **{subject}** +- Query to Run: "{unprefixed_query_to_run}" +- Schedule: `{schedule}` +- Next Run At: {next_run_time} + +Manage your automations [here](/automations). + """.strip() diff --git a/src/khoj/routers/web_client.py b/src/khoj/routers/web_client.py index 9e3a39b5..8c3fcd2a 100644 --- a/src/khoj/routers/web_client.py +++ b/src/khoj/routers/web_client.py @@ -11,8 +11,10 @@ from starlette.authentication import has_required_scope, requires from khoj.database import adapters from khoj.database.adapters import ( AgentAdapters, + AutomationAdapters, ConversationAdapters, EntryAdapters, + PublicConversationAdapters, get_user_github_config, get_user_name, get_user_notion_config, @@ -349,9 +351,9 @@ def notion_config_page(request: Request): @web_client.get("/config/content-source/computer", response_class=HTMLResponse) @requires(["authenticated"], redirect="login_page") def computer_config_page(request: Request): - user = request.user.object - user_picture = request.session.get("user", {}).get("picture") - has_documents = EntryAdapters.user_has_entries(user=user) + user = request.user.object if request.user.is_authenticated else None + user_picture = request.session.get("user", {}).get("picture") if user else None + has_documents = EntryAdapters.user_has_entries(user=user) if user else False return templates.TemplateResponse( "content_source_computer_input.html", @@ -364,3 +366,76 @@ def computer_config_page(request: Request): "khoj_version": state.khoj_version, }, ) + + +@web_client.get("/share/chat/{public_conversation_slug}", response_class=HTMLResponse) +def view_public_conversation(request: Request): + public_conversation_slug = request.path_params.get("public_conversation_slug") + public_conversation = PublicConversationAdapters.get_public_conversation_by_slug(public_conversation_slug) + if not public_conversation: + return templates.TemplateResponse( + "404.html", + context={ + "request": request, + "khoj_version": state.khoj_version, + }, + ) + user = request.user.object if request.user.is_authenticated else None + user_picture = request.session.get("user", {}).get("picture") if user else None + has_documents = EntryAdapters.user_has_entries(user=user) if user else False + + all_agents = AgentAdapters.get_all_accessible_agents(request.user.object if request.user.is_authenticated else None) + + # Filter out the current agent + all_agents = [agent for agent in all_agents if agent != public_conversation.agent] + agents_packet = [] + for agent in all_agents: + agents_packet.append( + { + "slug": agent.slug, + "avatar": agent.avatar, + "name": agent.name, + } + ) + + google_client_id = os.environ.get("GOOGLE_CLIENT_ID") + redirect_uri = str(request.app.url_path_for("auth")) + next_url = str( + request.app.url_path_for("view_public_conversation", public_conversation_slug=public_conversation_slug) + ) + + return templates.TemplateResponse( + "public_conversation.html", + context={ + "request": request, + "username": user.username if user else None, + "user_photo": user_picture, + "is_active": has_required_scope(request, ["premium"]), + "has_documents": has_documents, + "khoj_version": state.khoj_version, + "public_conversation_slug": public_conversation_slug, + "agents": agents_packet, + "google_client_id": google_client_id, + "redirect_uri": f"{redirect_uri}?next={next_url}", + }, + ) + + +@web_client.get("/automations", response_class=HTMLResponse) +@requires(["authenticated"], redirect="login_page") +def automations_config_page(request: Request): + user = request.user.object + user_picture = request.session.get("user", {}).get("picture") + has_documents = EntryAdapters.user_has_entries(user=user) + + return templates.TemplateResponse( + "config_automation.html", + context={ + "request": request, + "username": user.username, + "user_photo": user_picture, + "is_active": has_required_scope(request, ["premium"]), + "has_documents": has_documents, + "khoj_version": state.khoj_version, + }, + ) diff --git a/src/khoj/utils/helpers.py b/src/khoj/utils/helpers.py index 3cb5bfac..4b24b828 100644 --- a/src/khoj/utils/helpers.py +++ b/src/khoj/utils/helpers.py @@ -304,6 +304,8 @@ class ConversationCommand(str, Enum): Online = "online" Webpage = "webpage" Image = "image" + Automation = "automation" + AutomatedTask = "automated_task" command_descriptions = { @@ -313,6 +315,7 @@ command_descriptions = { ConversationCommand.Online: "Search for information on the internet.", ConversationCommand.Webpage: "Get information from webpage links provided by you.", ConversationCommand.Image: "Generate images by describing your imagination in words.", + ConversationCommand.Automation: "Automatically run your query at a specified time or interval.", ConversationCommand.Help: "Display a help message with all available commands and other metadata.", } @@ -325,7 +328,8 @@ tool_descriptions_for_llm = { } mode_descriptions_for_llm = { - ConversationCommand.Image: "Use this if you think the user is requesting an image or visual response to their query.", + ConversationCommand.Image: "Use this if the user is requesting an image or visual response to their query.", + ConversationCommand.Automation: "Use this if the user is requesting a response at a scheduled date or time.", ConversationCommand.Default: "Use this if the other response modes don't seem to fit the query.", } diff --git a/src/khoj/utils/state.py b/src/khoj/utils/state.py index b431225d..7439929f 100644 --- a/src/khoj/utils/state.py +++ b/src/khoj/utils/state.py @@ -2,8 +2,9 @@ import os import threading from collections import defaultdict from pathlib import Path -from typing import Dict, List +from typing import Any, Dict, List +from apscheduler.schedulers.background import BackgroundScheduler from openai import OpenAI from whisper import Whisper @@ -29,11 +30,13 @@ cli_args: List[str] = None query_cache: Dict[str, LRU] = defaultdict(LRU) chat_lock = threading.Lock() SearchType = utils_config.SearchType +scheduler: BackgroundScheduler = None telemetry: List[Dict[str, str]] = [] khoj_version: str = None device = get_device() chat_on_gpu: bool = True anonymous_mode: bool = False +pretrained_tokenizers: Dict[str, Any] = dict() billing_enabled: bool = ( os.getenv("STRIPE_API_KEY") is not None and os.getenv("STRIPE_SIGNING_SECRET") is not None diff --git a/tests/test_openai_chat_actors.py b/tests/test_openai_chat_actors.py index df9d8f07..7c4f5ee3 100644 --- a/tests/test_openai_chat_actors.py +++ b/tests/test_openai_chat_actors.py @@ -12,8 +12,11 @@ from khoj.routers.helpers import ( aget_relevant_output_modes, generate_online_subqueries, infer_webpage_urls, + schedule_query, + should_notify, ) from khoj.utils.helpers import ConversationCommand +from khoj.utils.rawconfig import LocationData # Initialize variables for tests api_key = os.getenv("OPENAI_API_KEY") @@ -490,71 +493,42 @@ async def test_websearch_khoj_website_for_info_about_khoj(chat_client): # ---------------------------------------------------------------------------------------------------- @pytest.mark.anyio @pytest.mark.django_db(transaction=True) -async def test_use_default_response_mode(chat_client): - # Arrange - user_query = "What's the latest in the Israel/Palestine conflict?" - +@pytest.mark.parametrize( + "user_query, expected_mode", + [ + ("What's the latest in the Israel/Palestine conflict?", "default"), + ("Summarize the latest tech news every Monday evening", "reminder"), + ("Paint a scenery in Timbuktu in the winter", "image"), + ("Remind me, when did I last visit the Serengeti?", "default"), + ], +) +async def test_use_default_response_mode(chat_client, user_query, expected_mode): # Act mode = await aget_relevant_output_modes(user_query, {}) # Assert - assert mode.value == "default" + assert mode.value == expected_mode # ---------------------------------------------------------------------------------------------------- @pytest.mark.anyio @pytest.mark.django_db(transaction=True) -async def test_use_image_response_mode(chat_client): - # Arrange - user_query = "Paint a scenery in Timbuktu in the winter" - - # Act - mode = await aget_relevant_output_modes(user_query, {}) - - # Assert - assert mode.value == "image" - - -# ---------------------------------------------------------------------------------------------------- -@pytest.mark.anyio -@pytest.mark.django_db(transaction=True) -async def test_select_data_sources_actor_chooses_to_search_notes(chat_client): - # Arrange - user_query = "Where did I learn to swim?" - +@pytest.mark.parametrize( + "user_query, expected_conversation_commands", + [ + ("Where did I learn to swim?", [ConversationCommand.Notes]), + ("Where is the nearest hospital?", [ConversationCommand.Online]), + ("Summarize the wikipedia page on the history of the internet", [ConversationCommand.Webpage]), + ], +) +async def test_select_data_sources_actor_chooses_to_search_notes( + chat_client, user_query, expected_conversation_commands +): # Act conversation_commands = await aget_relevant_information_sources(user_query, {}) # Assert - assert ConversationCommand.Notes in conversation_commands - - -# ---------------------------------------------------------------------------------------------------- -@pytest.mark.anyio -@pytest.mark.django_db(transaction=True) -async def test_select_data_sources_actor_chooses_to_search_online(chat_client): - # Arrange - user_query = "Where is the nearest hospital?" - - # Act - conversation_commands = await aget_relevant_information_sources(user_query, {}) - - # Assert - assert ConversationCommand.Online in conversation_commands - - -# ---------------------------------------------------------------------------------------------------- -@pytest.mark.anyio -@pytest.mark.django_db(transaction=True) -async def test_select_data_sources_actor_chooses_to_read_webpage(chat_client): - # Arrange - user_query = "Summarize the wikipedia page on the history of the internet" - - # Act - conversation_commands = await aget_relevant_information_sources(user_query, {}) - - # Assert - assert ConversationCommand.Webpage in conversation_commands + assert expected_conversation_commands in conversation_commands # ---------------------------------------------------------------------------------------------------- @@ -571,6 +545,104 @@ async def test_infer_webpage_urls_actor_extracts_correct_links(chat_client): assert "https://en.wikipedia.org/wiki/History_of_the_Internet" in urls +# ---------------------------------------------------------------------------------------------------- +@pytest.mark.anyio +@pytest.mark.django_db(transaction=True) +@pytest.mark.parametrize( + "user_query, location, expected_crontime, expected_qs, unexpected_qs", + [ + ( + "Share the weather forecast for the next day daily at 7:30pm", + ("Ubud", "Bali", "Indonesia"), + "30 11 * * *", # ensure correctly converts to utc + ["weather forecast", "ubud"], + ["7:30"], + ), + ( + "Notify me when the new President of Brazil is announced", + ("Sao Paulo", "Sao Paulo", "Brazil"), + "* *", # crontime is variable + ["brazil", "president"], + ["notify"], # ensure reminder isn't re-triggered on scheduled query run + ), + ( + "Let me know whenever Elon leaves Twitter. Check this every afternoon at 12", + ("Karachi", "Sindh", "Pakistan"), + "0 7 * * *", # ensure correctly converts to utc + ["elon", "twitter"], + ["12"], + ), + ( + "Draw a wallpaper every morning using the current weather", + ("Bogota", "Cundinamarca", "Colombia"), + "* * *", # daily crontime + ["weather", "wallpaper", "bogota"], + ["every"], + ), + ], +) +async def test_infer_task_scheduling_request( + chat_client, user_query, location, expected_crontime, expected_qs, unexpected_qs +): + # Arrange + location_data = LocationData(city=location[0], region=location[1], country=location[2]) + + # Act + crontime, inferred_query = await schedule_query(user_query, location_data, {}) + inferred_query = inferred_query.lower() + + # Assert + assert expected_crontime in crontime + for expected_q in expected_qs: + assert expected_q in inferred_query, f"Expected fragment {expected_q} in query: {inferred_query}" + for unexpected_q in unexpected_qs: + assert ( + unexpected_q not in inferred_query + ), f"Did not expect fragment '{unexpected_q}' in query: '{inferred_query}'" + + +# ---------------------------------------------------------------------------------------------------- +@pytest.mark.anyio +@pytest.mark.django_db(transaction=True) +@pytest.mark.parametrize( + "scheduling_query, executing_query, generated_response, expected_should_notify", + [ + ( + "Notify me if it is going to rain tomorrow?", + "What's the weather forecast for tomorrow?", + "It is sunny and warm tomorrow.", + False, + ), + ( + "Summarize the latest news every morning", + "Summarize today's news", + "Today in the news: AI is taking over the world", + True, + ), + ( + "Create a weather wallpaper every morning using the current weather", + "Paint a weather wallpaper using the current weather", + "https://khoj-generated-wallpaper.khoj.dev/user110/weathervane.webp", + True, + ), + ( + "Let me know the election results once they are offically declared", + "What are the results of the elections? Has the winner been declared?", + "The election results has not been declared yet.", + False, + ), + ], +) +def test_decision_on_when_to_notify_scheduled_task_results( + chat_client, scheduling_query, executing_query, generated_response, expected_should_notify +): + # Act + generated_should_notify = should_notify(scheduling_query, executing_query, generated_response) + + # Assert + assert generated_should_notify == expected_should_notify + + # Helpers # ---------------------------------------------------------------------------------------------------- def populate_chat_history(message_list): diff --git a/versions.json b/versions.json index c71fd3f2..8aaaaeb1 100644 --- a/versions.json +++ b/versions.json @@ -45,5 +45,8 @@ "1.10.0": "0.15.0", "1.10.1": "0.15.0", "1.10.2": "0.15.0", - "1.11.0": "0.15.0" + "1.11.0": "0.15.0", + "1.11.1": "0.15.0", + "1.11.2": "0.15.0", + "1.12.0": "0.15.0" }