Merge with features/internet-enabled-search

This commit is contained in:
sabaimran
2023-11-22 07:25:24 -08:00
88 changed files with 365 additions and 316 deletions

View File

@@ -48,7 +48,7 @@ jobs:
- name: 🌡️ Validate Python Package - name: 🌡️ Validate Python Package
run: | run: |
# Validate PyPi Package # Validate PyPi Package
pipx run check-wheel-contents dist/*.whl pipx run check-wheel-contents dist/*.whl --ignore W004
pipx run twine check dist/* pipx run twine check dist/*
- name: ⏫ Upload Python Package Artifacts - name: ⏫ Upload Python Package Artifacts

View File

@@ -3,7 +3,7 @@ name: test
on: on:
pull_request: pull_request:
paths: paths:
- src/** - src/khoj/**
- tests/** - tests/**
- config/** - config/**
- pyproject.toml - pyproject.toml

View File

@@ -17,7 +17,7 @@ RUN sed -i 's/dynamic = \["version"\]/version = "0.0.0"/' pyproject.toml && \
COPY . . COPY . .
# Set the PYTHONPATH environment variable in order for it to find the Django app. # Set the PYTHONPATH environment variable in order for it to find the Django app.
ENV PYTHONPATH=/app/src:$PYTHONPATH ENV PYTHONPATH=/app/src/khoj:$PYTHONPATH
# Run the Application # Run the Application
# There are more arguments required for the application to run, # There are more arguments required for the application to run,

View File

@@ -6,5 +6,5 @@
"description": "An AI copilot for your Second Brain", "description": "An AI copilot for your Second Brain",
"author": "Khoj Inc.", "author": "Khoj Inc.",
"authorUrl": "https://github.com/khoj-ai", "authorUrl": "https://github.com/khoj-ai",
"isDesktopOnly": true "isDesktopOnly": false
} }

View File

@@ -46,7 +46,7 @@ dependencies = [
"tiktoken >= 0.3.2", "tiktoken >= 0.3.2",
"tenacity >= 8.2.2", "tenacity >= 8.2.2",
"pillow == 9.3.0", "pillow == 9.3.0",
"pydantic >= 1.10.10", "pydantic >= 2.0.0",
"pyyaml == 6.0", "pyyaml == 6.0",
"rich >= 13.3.1", "rich >= 13.3.1",
"schedule == 1.1.0", "schedule == 1.1.0",
@@ -128,6 +128,9 @@ warn_unused_ignores = false
[tool.black] [tool.black]
line-length = 120 line-length = 120
[tool.isort]
profile = "black"
[tool.pytest.ini_options] [tool.pytest.ini_options]
addopts = "--strict-markers" addopts = "--strict-markers"
markers = [ markers = [

View File

@@ -1,5 +1,5 @@
[pytest] [pytest]
DJANGO_SETTINGS_MODULE = app.settings DJANGO_SETTINGS_MODULE = khoj.app.settings
pythonpath = . src pythonpath = . src
testpaths = tests testpaths = tests
markers = markers =

View File

@@ -120,7 +120,8 @@
// Create a new div for the chat message text and append it to the chat message // Create a new div for the chat message text and append it to the chat message
let chatMessageText = document.createElement('div'); let chatMessageText = document.createElement('div');
chatMessageText.className = `chat-message-text ${by}`; chatMessageText.className = `chat-message-text ${by}`;
chatMessageText.innerHTML = formattedMessage; let textNode = document.createTextNode(formattedMessage);
chatMessageText.appendChild(textNode);
chatMessage.appendChild(chatMessageText); chatMessage.appendChild(chatMessageText);
// Append annotations div to the chat message // Append annotations div to the chat message

View File

@@ -112,14 +112,14 @@
} else if ( } else if (
item.additional.file.endsWith(".md") || item.additional.file.endsWith(".md") ||
item.additional.file.endsWith(".markdown") || item.additional.file.endsWith(".markdown") ||
(item.additional.file.includes("issues") && item.additional.file.includes("github.com")) || (item.additional.file.includes("issues") && item.additional.source === "github") ||
(item.additional.file.includes("commit") && item.additional.file.includes("github.com")) (item.additional.file.includes("commit") && item.additional.source === "github")
) )
{ {
html += render_markdown(query, [item]); html += render_markdown(query, [item]);
} else if (item.additional.file.endsWith(".pdf")) { } else if (item.additional.file.endsWith(".pdf")) {
html += render_pdf(query, [item]); html += render_pdf(query, [item]);
} else if (item.additional.file.includes("notion.so")) { } else if (item.additional.source == "notion") {
html += `<div class="results-notion">` + `<b><a href="${item.additional.file}">${item.additional.heading}</a></b>` + `<p>${item.entry}</p>` + `</div>`; html += `<div class="results-notion">` + `<b><a href="${item.additional.file}">${item.additional.heading}</a></b>` + `<p>${item.entry}</p>` + `</div>`;
} else if (item.additional.file.endsWith(".html")) { } else if (item.additional.file.endsWith(".html")) {
html += render_html(query, [item]); html += render_html(query, [item]);

View File

@@ -6,5 +6,5 @@
"description": "An AI copilot for your Second Brain", "description": "An AI copilot for your Second Brain",
"author": "Khoj Inc.", "author": "Khoj Inc.",
"authorUrl": "https://github.com/khoj-ai", "authorUrl": "https://github.com/khoj-ai",
"isDesktopOnly": true "isDesktopOnly": false
} }

View File

@@ -73,21 +73,19 @@ export default class Khoj extends Plugin {
// Check if khoj backend is configured, note if cannot connect to backend // Check if khoj backend is configured, note if cannot connect to backend
let headers = { "Authorization": `Bearer ${this.settings.khojApiKey}` }; let headers = { "Authorization": `Bearer ${this.settings.khojApiKey}` };
if (this.settings.khojUrl === "https://app.khoj.dev") { if (this.settings.khojApiKey === "" && this.settings.khojUrl === "https://app.khoj.dev") {
if (this.settings.khojApiKey === "") { new Notice(`Khoj API key is not configured. Please visit https://app.khoj.dev/config#clients to get an API key.`);
new Notice(`Khoj API key is not configured. Please visit https://app.khoj.dev/config#clients to get an API key.`); return;
return;
}
await request({ url: this.settings.khojUrl ,method: "GET", headers: headers })
.then(response => {
this.settings.connectedToBackend = true;
})
.catch(error => {
this.settings.connectedToBackend = false;
new Notice(`Ensure Khoj backend is running and Khoj URL is pointing to it in the plugin settings.\n\n${error}`);
});
} }
await request({ url: this.settings.khojUrl ,method: "GET", headers: headers })
.then(response => {
this.settings.connectedToBackend = true;
})
.catch(error => {
this.settings.connectedToBackend = false;
new Notice(`Ensure Khoj backend is running and Khoj URL is pointing to it in the plugin settings.\n\n${error}`);
});
} }
async saveSettings() { async saveSettings() {

View File

@@ -87,27 +87,18 @@ export class KhojSearchModal extends SuggestModal<SearchResult> {
} }
async getSuggestions(query: string): Promise<SearchResult[]> { async getSuggestions(query: string): Promise<SearchResult[]> {
// Query Khoj backend for search results // Setup Query Khoj backend for search results
let encodedQuery = encodeURIComponent(query); let encodedQuery = encodeURIComponent(query);
let searchUrl = `${this.setting.khojUrl}/api/search?q=${encodedQuery}&n=${this.setting.resultsCount}&r=${this.rerank}&client=obsidian`; let searchUrl = `${this.setting.khojUrl}/api/search?q=${encodedQuery}&n=${this.setting.resultsCount}&r=${this.rerank}&client=obsidian`;
let headers = { 'Authorization': `Bearer ${this.setting.khojApiKey}` } let headers = { 'Authorization': `Bearer ${this.setting.khojApiKey}` }
// Get search results for markdown and pdf files // Get search results from Khoj backend
let mdResponse = await request({ url: `${searchUrl}&t=markdown`, headers: headers }); let response = await request({ url: `${searchUrl}`, headers: headers });
let pdfResponse = await request({ url: `${searchUrl}&t=pdf`, headers: headers });
// Parse search results // Parse search results
let mdData = JSON.parse(mdResponse) let results = JSON.parse(response)
.filter((result: any) => !this.find_similar_notes || !result.additional.file.endsWith(this.app.workspace.getActiveFile()?.path)) .filter((result: any) => !this.find_similar_notes || !result.additional.file.endsWith(this.app.workspace.getActiveFile()?.path))
.map((result: any) => { return { entry: result.entry, score: result.score, file: result.additional.file }; }); .map((result: any) => { return { entry: result.entry, file: result.additional.file } as SearchResult; });
let pdfData = JSON.parse(pdfResponse)
.filter((result: any) => !this.find_similar_notes || !result.additional.file.endsWith(this.app.workspace.getActiveFile()?.path))
.map((result: any) => { return { entry: `## ${result.additional.compiled}`, score: result.score, file: result.additional.file } as SearchResult; })
// Combine markdown and PDF results and sort them by score
let results = mdData.concat(pdfData)
.sort((a: any, b: any) => b.score - a.score)
.map((result: any) => { return { entry: result.entry, file: result.file } as SearchResult; })
this.query = query; this.query = query;
return results; return results;

View File

@@ -14,7 +14,7 @@ from pathlib import Path
import os import os
# Build paths inside the project like this: BASE_DIR / 'subdir'. # Build paths inside the project like this: BASE_DIR / 'subdir'.
BASE_DIR = Path(__file__).resolve().parent.parent.parent BASE_DIR = Path(__file__).resolve().parent.parent
# Quick-start development settings - unsuitable for production # Quick-start development settings - unsuitable for production
@@ -24,15 +24,15 @@ BASE_DIR = Path(__file__).resolve().parent.parent.parent
SECRET_KEY = os.getenv("KHOJ_DJANGO_SECRET_KEY") SECRET_KEY = os.getenv("KHOJ_DJANGO_SECRET_KEY")
# SECURITY WARNING: don't run with debug turned on in production! # SECURITY WARNING: don't run with debug turned on in production!
DEBUG = os.getenv("KHOJ_DEBUG", "False") == "True" DEBUG = os.getenv("KHOJ_DEBUG") == "True"
ALLOWED_HOSTS = [".khoj.dev", "localhost", "127.0.0.1", "[::1]", "beta.khoj.dev"] # All Subdomains of KHOJ_DOMAIN are trusted
KHOJ_DOMAIN = os.getenv("KHOJ_DOMAIN", "khoj.dev")
ALLOWED_HOSTS = [f".{KHOJ_DOMAIN}", "localhost", "127.0.0.1", "[::1]"]
CSRF_TRUSTED_ORIGINS = [ CSRF_TRUSTED_ORIGINS = [
"https://app.khoj.dev", f"https://*.{KHOJ_DOMAIN}",
"https://beta.khoj.dev", f"https://{KHOJ_DOMAIN}",
"https://khoj.dev",
"https://*.khoj.dev",
] ]
COOKIE_SAMESITE = "None" COOKIE_SAMESITE = "None"
@@ -40,8 +40,8 @@ if DEBUG:
SESSION_COOKIE_DOMAIN = "localhost" SESSION_COOKIE_DOMAIN = "localhost"
CSRF_COOKIE_DOMAIN = "localhost" CSRF_COOKIE_DOMAIN = "localhost"
else: else:
SESSION_COOKIE_DOMAIN = "khoj.dev" SESSION_COOKIE_DOMAIN = KHOJ_DOMAIN
CSRF_COOKIE_DOMAIN = "khoj.dev" CSRF_COOKIE_DOMAIN = KHOJ_DOMAIN
SESSION_COOKIE_SECURE = True SESSION_COOKIE_SECURE = True
CSRF_COOKIE_SECURE = True CSRF_COOKIE_SECURE = True
@@ -53,7 +53,7 @@ SESSION_COOKIE_SAMESITE = "None"
INSTALLED_APPS = [ INSTALLED_APPS = [
"django.contrib.auth", "django.contrib.auth",
"django.contrib.contenttypes", "django.contrib.contenttypes",
"database.apps.DatabaseConfig", "khoj.database.apps.DatabaseConfig",
"django.contrib.admin", "django.contrib.admin",
"django.contrib.sessions", "django.contrib.sessions",
"django.contrib.messages", "django.contrib.messages",
@@ -143,7 +143,7 @@ USE_TZ = True
# https://docs.djangoproject.com/en/4.2/howto/static-files/ # https://docs.djangoproject.com/en/4.2/howto/static-files/
STATIC_ROOT = BASE_DIR / "static" STATIC_ROOT = BASE_DIR / "static"
STATICFILES_DIRS = [BASE_DIR / "src/khoj/interface/web"] STATICFILES_DIRS = [BASE_DIR / "interface/web"]
STATIC_URL = "/static/" STATIC_URL = "/static/"
# Default primary key field type # Default primary key field type

View File

@@ -20,8 +20,8 @@ from starlette.authentication import (
) )
# Internal Packages # Internal Packages
from database.models import KhojUser, Subscription from khoj.database.models import KhojUser, Subscription
from database.adapters import get_all_users, get_or_create_search_model from khoj.database.adapters import get_all_users, get_or_create_search_model
from khoj.processor.embeddings import CrossEncoderModel, EmbeddingsModel from khoj.processor.embeddings import CrossEncoderModel, EmbeddingsModel
from khoj.routers.indexer import configure_content, load_content, configure_search from khoj.routers.indexer import configure_content, load_content, configure_search
from khoj.utils import constants, state from khoj.utils import constants, state
@@ -45,7 +45,7 @@ class UserAuthenticationBackend(AuthenticationBackend):
def __init__( def __init__(
self, self,
): ):
from database.models import KhojUser, KhojApiUser from khoj.database.models import KhojUser, KhojApiUser
self.khojuser_manager = KhojUser.objects self.khojuser_manager = KhojUser.objects
self.khojapiuser_manager = KhojApiUser.objects self.khojapiuser_manager = KhojApiUser.objects

View File

@@ -1,32 +1,29 @@
import math import math
from typing import Optional, Type, List
from datetime import date, datetime
import secrets import secrets
from typing import Type, List from datetime import date, datetime, timezone
from datetime import date, timezone from typing import List, Optional, Type
from django.db import models from asgiref.sync import sync_to_async
from django.contrib.sessions.backends.db import SessionStore from django.contrib.sessions.backends.db import SessionStore
from pgvector.django import CosineDistance from django.db import models
from django.db.models.manager import BaseManager
from django.db.models import Q from django.db.models import Q
from django.db.models.manager import BaseManager
from fastapi import HTTPException
from pgvector.django import CosineDistance
from torch import Tensor from torch import Tensor
# Import sync_to_async from Django Channels from khoj.database.models import (
from asgiref.sync import sync_to_async ChatModelOptions,
Conversation,
from fastapi import HTTPException Entry,
GithubConfig,
from database.models import ( GithubRepoConfig,
KhojUser,
GoogleUser, GoogleUser,
KhojApiUser, KhojApiUser,
KhojUser,
NotionConfig, NotionConfig,
GithubConfig, OfflineChatProcessorConversationConfig,
Entry, OpenAIProcessorConversationConfig,
GithubRepoConfig,
Conversation,
ChatModelOptions,
SearchModelConfig, SearchModelConfig,
Subscription, Subscription,
UserConversationConfig, UserConversationConfig,
@@ -34,12 +31,12 @@ from database.models import (
OfflineChatProcessorConversationConfig, OfflineChatProcessorConversationConfig,
ReflectiveQuestion, ReflectiveQuestion,
) )
from khoj.utils.helpers import generate_random_name from khoj.search_filter.date_filter import DateFilter
from khoj.search_filter.file_filter import FileFilter
from khoj.search_filter.word_filter import WordFilter
from khoj.utils import state from khoj.utils import state
from khoj.utils.config import GPT4AllProcessorModel from khoj.utils.config import GPT4AllProcessorModel
from khoj.search_filter.word_filter import WordFilter from khoj.utils.helpers import generate_random_name
from khoj.search_filter.file_filter import FileFilter
from khoj.search_filter.date_filter import DateFilter
async def set_notion_config(token: str, user: KhojUser): async def set_notion_config(token: str, user: KhojUser):

View File

@@ -3,7 +3,7 @@ from django.contrib.auth.admin import UserAdmin
# Register your models here. # Register your models here.
from database.models import ( from khoj.database.models import (
KhojUser, KhojUser,
ChatModelOptions, ChatModelOptions,
OpenAIProcessorConversationConfig, OpenAIProcessorConversationConfig,

View File

@@ -3,4 +3,4 @@ from django.apps import AppConfig
class DatabaseConfig(AppConfig): class DatabaseConfig(AppConfig):
default_auto_field = "django.db.models.BigAutoField" default_auto_field = "django.db.models.BigAutoField"
name = "database" name = "khoj.database"

View File

@@ -2,6 +2,8 @@
from django.db import migrations from django.db import migrations
from typing import List, Any
class Migration(migrations.Migration): class Migration(migrations.Migration):
dependencies = [ dependencies = [
@@ -9,4 +11,4 @@ class Migration(migrations.Migration):
("database", "0010_rename_embeddings_entry_and_more"), ("database", "0010_rename_embeddings_entry_and_more"),
] ]
operations = [] operations: List[Any] = []

View File

@@ -112,14 +112,14 @@
} else if ( } else if (
item.additional.file.endsWith(".md") || item.additional.file.endsWith(".md") ||
item.additional.file.endsWith(".markdown") || item.additional.file.endsWith(".markdown") ||
(item.additional.file.includes("issues") && item.additional.file.includes("github.com")) || (item.additional.file.includes("issues") && item.additional.source === "github") ||
(item.additional.file.includes("commit") && item.additional.file.includes("github.com")) (item.additional.file.includes("commit") && item.additional.source === "github")
) )
{ {
html += render_markdown(query, [item]); html += render_markdown(query, [item]);
} else if (item.additional.file.endsWith(".pdf")) { } else if (item.additional.file.endsWith(".pdf")) {
html += render_pdf(query, [item]); html += render_pdf(query, [item]);
} else if (item.additional.file.includes("notion.so")) { } else if (item.additional.source === "notion") {
html += `<div class="results-notion">` + `<b><a href="${item.additional.file}">${item.additional.heading}</a></b>` + `<p>${item.entry}</p>` + `</div>`; html += `<div class="results-notion">` + `<b><a href="${item.additional.file}">${item.additional.heading}</a></b>` + `<p>${item.entry}</p>` + `</div>`;
} else if (item.additional.file.endsWith(".html")) { } else if (item.additional.file.endsWith(".html")) {
html += render_html(query, [item]); html += render_html(query, [item]);

View File

@@ -1,4 +1,10 @@
""" Main module for Khoj Assistant
isort:skip_file
"""
# Standard Packages # Standard Packages
from contextlib import redirect_stdout
import io
import os import os
import sys import sys
import locale import locale
@@ -25,14 +31,18 @@ from django.core.asgi import get_asgi_application
from django.core.management import call_command from django.core.management import call_command
# Initialize Django # Initialize Django
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "app.settings") os.environ.setdefault("DJANGO_SETTINGS_MODULE", "khoj.app.settings")
django.setup() django.setup()
# Initialize Django Database # Initialize Django Database
call_command("migrate", "--noinput") db_migrate_output = io.StringIO()
with redirect_stdout(db_migrate_output):
call_command("migrate", "--noinput")
# Initialize Django Static Files # Initialize Django Static Files
call_command("collectstatic", "--noinput") collectstatic_output = io.StringIO()
with redirect_stdout(collectstatic_output):
call_command("collectstatic", "--noinput")
# Initialize the Application Server # Initialize the Application Server
app = FastAPI() app = FastAPI()
@@ -41,9 +51,16 @@ app = FastAPI()
django_app = get_asgi_application() django_app = get_asgi_application()
# Add CORS middleware # Add CORS middleware
KHOJ_DOMAIN = os.getenv("KHOJ_DOMAIN", "app.khoj.dev")
app.add_middleware( app.add_middleware(
CORSMiddleware, CORSMiddleware,
allow_origins=["app://obsidian.md", "http://localhost:*", "https://app.khoj.dev/*", "app://khoj.dev"], allow_origins=[
"app://obsidian.md",
"http://localhost:*",
"http://127.0.0.1:*",
f"https://{KHOJ_DOMAIN}",
"app://khoj.dev",
],
allow_credentials=True, allow_credentials=True,
allow_methods=["*"], allow_methods=["*"],
allow_headers=["*"], allow_headers=["*"],
@@ -75,14 +92,16 @@ def run(should_start_server=True):
args = cli(state.cli_args) args = cli(state.cli_args)
set_state(args) set_state(args)
logger.info(f"🚒 Initializing Khoj v{state.khoj_version}")
# Set Logging Level # Set Logging Level
if args.verbose == 0: if args.verbose == 0:
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
elif args.verbose >= 1: elif args.verbose >= 1:
logger.setLevel(logging.DEBUG) logger.setLevel(logging.DEBUG)
logger.info(f"🚒 Initializing Khoj v{state.khoj_version}")
logger.info(f"📦 Initializing DB:\n{db_migrate_output.getvalue().strip()}")
logger.debug(f"🌍 Initializing Web Client:\n{collectstatic_output.getvalue().strip()}")
initialization() initialization()
# Create app directory, if it doesn't exist # Create app directory, if it doesn't exist
@@ -103,10 +122,10 @@ def run(should_start_server=True):
# Mount Django and Static Files # Mount Django and Static Files
app.mount("/server", django_app, name="server") app.mount("/server", django_app, name="server")
static_dir = "static" static_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static")
if not os.path.exists(static_dir): if not os.path.exists(static_dir):
os.mkdir(static_dir) os.mkdir(static_dir)
app.mount(f"/{static_dir}", StaticFiles(directory=static_dir), name=static_dir) app.mount(f"/static", StaticFiles(directory=static_dir), name=static_dir)
# Configure Middleware # Configure Middleware
configure_middleware(app) configure_middleware(app)

View File

@@ -6,7 +6,7 @@ import sys
def main(): def main():
"""Run administrative tasks.""" """Run administrative tasks."""
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "app.settings") os.environ.setdefault("DJANGO_SETTINGS_MODULE", "khoj.app.settings")
try: try:
from django.core.management import execute_from_command_line from django.core.management import execute_from_command_line
except ImportError as exc: except ImportError as exc:

View File

@@ -60,7 +60,7 @@ import logging
from packaging import version from packaging import version
from khoj.utils.yaml import load_config_from_file, save_config_to_file from khoj.utils.yaml import load_config_from_file, save_config_to_file
from database.models import ( from khoj.database.models import (
OpenAIProcessorConversationConfig, OpenAIProcessorConversationConfig,
OfflineChatProcessorConversationConfig, OfflineChatProcessorConversationConfig,
ChatModelOptions, ChatModelOptions,

View File

@@ -2,19 +2,20 @@
import logging import logging
import time import time
from datetime import datetime from datetime import datetime
from typing import Dict, List, Union, Tuple from typing import Dict, List, Tuple, Union
# External Packages # External Packages
import requests import requests
from khoj.database.models import Entry as DbEntry
from khoj.database.models import GithubConfig, KhojUser
from khoj.processor.content.markdown.markdown_to_entries import MarkdownToEntries
from khoj.processor.content.org_mode.org_to_entries import OrgToEntries
from khoj.processor.content.text_to_entries import TextToEntries
# Internal Packages # Internal Packages
from khoj.utils.helpers import timer from khoj.utils.helpers import timer
from khoj.utils.rawconfig import Entry, GithubContentConfig, GithubRepoConfig from khoj.utils.rawconfig import Entry, GithubContentConfig, GithubRepoConfig
from khoj.processor.data_sources.markdown.markdown_to_entries import MarkdownToEntries
from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries
from khoj.processor.text_to_entries import TextToEntries
from database.models import Entry as DbEntry, GithubConfig, KhojUser
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)

View File

@@ -1,17 +1,19 @@
# Standard Packages # Standard Packages
import logging import logging
import re import re
import urllib3
from pathlib import Path from pathlib import Path
from typing import Tuple, List from typing import List, Tuple
import urllib3
from khoj.database.models import Entry as DbEntry
from khoj.database.models import KhojUser
# Internal Packages # Internal Packages
from khoj.processor.text_to_entries import TextToEntries from khoj.processor.content.text_to_entries import TextToEntries
from khoj.utils.helpers import timer
from khoj.utils.constants import empty_escape_sequences from khoj.utils.constants import empty_escape_sequences
from khoj.utils.helpers import timer
from khoj.utils.rawconfig import Entry from khoj.utils.rawconfig import Entry
from database.models import Entry as DbEntry, KhojUser
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)

View File

@@ -1,19 +1,18 @@
# Standard Packages # Standard Packages
import logging import logging
from enum import Enum
from typing import Tuple from typing import Tuple
# External Packages # External Packages
import requests import requests
from khoj.database.models import Entry as DbEntry
from khoj.database.models import KhojUser, NotionConfig
from khoj.processor.content.text_to_entries import TextToEntries
# Internal Packages # Internal Packages
from khoj.utils.helpers import timer from khoj.utils.helpers import timer
from khoj.utils.rawconfig import Entry, NotionContentConfig from khoj.utils.rawconfig import Entry, NotionContentConfig
from khoj.processor.text_to_entries import TextToEntries
from khoj.utils.rawconfig import Entry
from database.models import Entry as DbEntry, KhojUser, NotionConfig
from enum import Enum
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)

View File

@@ -3,14 +3,15 @@ import logging
from pathlib import Path from pathlib import Path
from typing import Iterable, List, Tuple from typing import Iterable, List, Tuple
from khoj.database.models import Entry as DbEntry
from khoj.database.models import KhojUser
# Internal Packages # Internal Packages
from khoj.processor.data_sources.org_mode import orgnode from khoj.processor.content.org_mode import orgnode
from khoj.processor.text_to_entries import TextToEntries from khoj.processor.content.text_to_entries import TextToEntries
from khoj.utils import state
from khoj.utils.helpers import timer from khoj.utils.helpers import timer
from khoj.utils.rawconfig import Entry from khoj.utils.rawconfig import Entry
from khoj.utils import state
from database.models import Entry as DbEntry, KhojUser
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)

View File

@@ -1,18 +1,19 @@
# Standard Packages # Standard Packages
import os
import logging
from typing import List, Tuple
import base64 import base64
import logging
import os
from typing import List, Tuple
# External Packages # External Packages
from langchain.document_loaders import PyMuPDFLoader from langchain.document_loaders import PyMuPDFLoader
from khoj.database.models import Entry as DbEntry
from khoj.database.models import KhojUser
# Internal Packages # Internal Packages
from khoj.processor.text_to_entries import TextToEntries from khoj.processor.content.text_to_entries import TextToEntries
from khoj.utils.helpers import timer from khoj.utils.helpers import timer
from khoj.utils.rawconfig import Entry from khoj.utils.rawconfig import Entry
from database.models import Entry as DbEntry, KhojUser
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)

View File

@@ -2,15 +2,16 @@
import logging import logging
from pathlib import Path from pathlib import Path
from typing import List, Tuple from typing import List, Tuple
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from khoj.database.models import Entry as DbEntry
from khoj.database.models import KhojUser
# Internal Packages # Internal Packages
from khoj.processor.text_to_entries import TextToEntries from khoj.processor.content.text_to_entries import TextToEntries
from khoj.utils.helpers import timer from khoj.utils.helpers import timer
from khoj.utils.rawconfig import Entry from khoj.utils.rawconfig import Entry
from database.models import Entry as DbEntry, KhojUser
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)

View File

@@ -13,8 +13,8 @@ from khoj.utils.helpers import is_none_or_empty, timer, batcher
# Internal Packages # Internal Packages
from khoj.utils.rawconfig import Entry from khoj.utils.rawconfig import Entry
from khoj.search_filter.date_filter import DateFilter from khoj.search_filter.date_filter import DateFilter
from database.models import KhojUser, Entry as DbEntry, EntryDates from khoj.database.models import KhojUser, Entry as DbEntry, EntryDates
from database.adapters import EntryAdapters from khoj.database.adapters import EntryAdapters
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)

View File

@@ -10,7 +10,7 @@ You are Khoj, a smart, inquisitive and helpful personal assistant.
Use your general knowledge and the past conversation with the user as context to inform your responses. Use your general knowledge and the past conversation with the user as context to inform your responses.
You were created by Khoj Inc. with the following capabilities: You were created by Khoj Inc. with the following capabilities:
- You *CAN REMEMBER ALL NOTES and PERSONAL INFORMATION FOREVER* that the user ever shares with you. They can share files with you using the Khoj desktop application. - You *CAN REMEMBER ALL NOTES and PERSONAL INFORMATION FOREVER* that the user ever shares with you. They can share files with you using any Khoj client, including the native Desktop app, the Obsidian or Emacs plugins, or the web app.
- You cannot set reminders. - You cannot set reminders.
- Say "I don't know" or "I don't understand" if you don't know what to say or if you don't know the answer to a question. - Say "I don't know" or "I don't understand" if you don't know what to say or if you don't know the answer to a question.
- Ask crisp follow-up questions to get additional context, when the answer cannot be inferred from the provided notes or past conversations. - Ask crisp follow-up questions to get additional context, when the answer cannot be inferred from the provided notes or past conversations.
@@ -128,8 +128,9 @@ The user has a question which you can use the internet to respond to. Can you br
Today's date in UTC: {current_date} Today's date in UTC: {current_date}
Here are some examples of questions and subqueries: Here are some examples of questions and subqueries:
Q: What is the weather like in New York?
A: ["weather in new york"] Q: Posts about vector databases on Hacker News
A: ["site:"news.ycombinator.com vector database"]
Q: What is the weather like in New York and San Francisco? Q: What is the weather like in New York and San Francisco?
A: ["weather in new york", "weather in san francisco"] A: ["weather in new york", "weather in san francisco"]

View File

@@ -1,64 +1,63 @@
# Standard Packages # Standard Packages
import concurrent.futures import concurrent.futures
import json
import logging
import math import math
import time import time
import logging from typing import Any, Dict, List, Optional, Union
import json
from typing import List, Optional, Union, Any, Dict from asgiref.sync import sync_to_async
# External Packages # External Packages
from fastapi import APIRouter, Depends, HTTPException, Header, Request from fastapi import APIRouter, Depends, Header, HTTPException, Request
from fastapi.requests import Request
from fastapi.responses import Response, StreamingResponse
from starlette.authentication import requires from starlette.authentication import requires
from asgiref.sync import sync_to_async
# Internal Packages # Internal Packages
from khoj.configure import configure_server from khoj.configure import configure_server
from khoj.search_type import image_search, text_search from khoj.database import adapters
from khoj.search_filter.date_filter import DateFilter from khoj.database.adapters import ConversationAdapters, EntryAdapters
from khoj.search_filter.file_filter import FileFilter from khoj.database.models import ChatModelOptions
from khoj.search_filter.word_filter import WordFilter from khoj.database.models import Entry as DbEntry
from khoj.utils.config import TextSearchModel, GPT4AllProcessorModel from khoj.database.models import (
from khoj.utils.helpers import ConversationCommand, is_none_or_empty, timer, command_descriptions GithubConfig,
from khoj.utils.rawconfig import ( KhojUser,
FullConfig,
SearchConfig,
SearchResponse,
GithubContentConfig,
NotionContentConfig,
)
from khoj.utils.state import SearchType
from khoj.utils import state, constants
from khoj.utils.helpers import AsyncIteratorWrapper, get_device
from fastapi.responses import StreamingResponse, Response
from khoj.routers.helpers import (
CommonQueryParams,
get_conversation_command,
validate_conversation_config,
agenerate_chat_response,
update_telemetry_state,
is_ready_to_chat,
ApiUserRateLimiter,
)
from khoj.processor.conversation.prompts import help_message, no_entries_found
from khoj.processor.conversation.openai.gpt import extract_questions
from khoj.processor.conversation.gpt4all.chat_model import extract_questions_offline
from khoj.processor.tools.online_search import search_with_google
from fastapi.requests import Request
from database import adapters
from database.adapters import EntryAdapters, ConversationAdapters
from database.models import (
LocalMarkdownConfig, LocalMarkdownConfig,
LocalOrgConfig, LocalOrgConfig,
LocalPdfConfig, LocalPdfConfig,
LocalPlaintextConfig, LocalPlaintextConfig,
KhojUser,
Entry as DbEntry,
GithubConfig,
NotionConfig, NotionConfig,
ChatModelOptions,
) )
from khoj.processor.conversation.gpt4all.chat_model import extract_questions_offline
from khoj.processor.conversation.openai.gpt import extract_questions
from khoj.processor.conversation.prompts import help_message, no_entries_found
from khoj.processor.tools.online_search import search_with_google
from khoj.routers.helpers import (
ApiUserRateLimiter,
CommonQueryParams,
agenerate_chat_response,
get_conversation_command,
is_ready_to_chat,
update_telemetry_state,
validate_conversation_config,
)
from khoj.search_filter.date_filter import DateFilter
from khoj.search_filter.file_filter import FileFilter
from khoj.search_filter.word_filter import WordFilter
from khoj.search_type import image_search, text_search
from khoj.utils import constants, state
from khoj.utils.config import GPT4AllProcessorModel, TextSearchModel
from khoj.utils.helpers import (
AsyncIteratorWrapper,
ConversationCommand,
command_descriptions,
get_device,
is_none_or_empty,
timer,
)
from khoj.utils.rawconfig import FullConfig, GithubContentConfig, NotionContentConfig, SearchConfig, SearchResponse
from khoj.utils.state import SearchType
# Initialize Router # Initialize Router
api = APIRouter() api = APIRouter()

View File

@@ -15,8 +15,8 @@ from google.oauth2 import id_token
from google.auth.transport import requests as google_requests from google.auth.transport import requests as google_requests
# Internal Packages # Internal Packages
from database.adapters import get_khoj_tokens, get_or_create_user, create_khoj_token, delete_khoj_token from khoj.database.adapters import get_khoj_tokens, get_or_create_user, create_khoj_token, delete_khoj_token
from database.models import KhojApiUser from khoj.database.models import KhojApiUser
from khoj.routers.helpers import update_telemetry_state from khoj.routers.helpers import update_telemetry_state
from khoj.utils import state from khoj.utils import state

View File

@@ -1,31 +1,28 @@
# Standard Packages # Standard Packages
import asyncio import asyncio
import json
import logging
from collections import defaultdict from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from datetime import datetime from datetime import datetime
from functools import partial from functools import partial
import logging
from time import time from time import time
import json from typing import Annotated, Any, Dict, Iterator, List, Optional, Tuple, Union
from typing import Annotated, Iterator, List, Optional, Union, Tuple, Dict, Any
from datetime import datetime
from khoj.processor.conversation import prompts
# External Packages # External Packages
from fastapi import HTTPException, Header, Request, Depends from fastapi import Depends, Header, HTTPException, Request
from khoj.database.adapters import ConversationAdapters
from khoj.database.models import KhojUser, Subscription
from khoj.processor.conversation import prompts
from khoj.processor.conversation.gpt4all.chat_model import converse_offline, send_message_to_model_offline
from khoj.processor.conversation.openai.gpt import converse, send_message_to_model
from khoj.processor.conversation.utils import ThreadedGenerator, message_to_log
# Internal Packages # Internal Packages
from khoj.utils import state from khoj.utils import state
from khoj.utils.config import GPT4AllProcessorModel from khoj.utils.config import GPT4AllProcessorModel
from khoj.utils.helpers import ConversationCommand, log_telemetry from khoj.utils.helpers import ConversationCommand, log_telemetry
from khoj.processor.conversation.openai.gpt import converse, send_message_to_model
from khoj.processor.conversation.gpt4all.chat_model import converse_offline, send_message_to_model_offline
from khoj.processor.conversation.utils import message_to_log, ThreadedGenerator
from database.models import KhojUser, Subscription, ChatModelOptions
from database.adapters import ConversationAdapters
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)

View File

@@ -1,40 +1,25 @@
# Standard Packages
import logging
from typing import Optional, Union, Dict
import asyncio import asyncio
import logging
from typing import Dict, Optional, Union
# External Packages
from fastapi import APIRouter, Header, Request, Response, UploadFile from fastapi import APIRouter, Header, Request, Response, UploadFile
from pydantic import BaseModel from pydantic import BaseModel
from starlette.authentication import requires from starlette.authentication import requires
# Internal Packages from khoj.database.models import GithubConfig, KhojUser, NotionConfig
from khoj.utils import state, constants from khoj.processor.content.github.github_to_entries import GithubToEntries
from khoj.processor.data_sources.markdown.markdown_to_entries import MarkdownToEntries from khoj.processor.content.markdown.markdown_to_entries import MarkdownToEntries
from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries from khoj.processor.content.notion.notion_to_entries import NotionToEntries
from khoj.processor.data_sources.pdf.pdf_to_entries import PdfToEntries from khoj.processor.content.org_mode.org_to_entries import OrgToEntries
from khoj.processor.data_sources.github.github_to_entries import GithubToEntries from khoj.processor.content.pdf.pdf_to_entries import PdfToEntries
from khoj.processor.data_sources.notion.notion_to_entries import NotionToEntries from khoj.processor.content.plaintext.plaintext_to_entries import PlaintextToEntries
from khoj.processor.data_sources.plaintext.plaintext_to_entries import PlaintextToEntries
from khoj.search_type import text_search, image_search
from khoj.routers.helpers import update_telemetry_state from khoj.routers.helpers import update_telemetry_state
from khoj.utils.yaml import save_config_to_file_updated_state from khoj.search_type import image_search, text_search
from khoj.utils.config import SearchModels from khoj.utils import constants, state
from khoj.utils.config import ContentIndex, SearchModels
from khoj.utils.helpers import LRU, get_file_type from khoj.utils.helpers import LRU, get_file_type
from khoj.utils.rawconfig import ( from khoj.utils.rawconfig import ContentConfig, FullConfig, SearchConfig
ContentConfig, from khoj.utils.yaml import save_config_to_file_updated_state
FullConfig,
SearchConfig,
)
from khoj.utils.config import (
ContentIndex,
SearchModels,
)
from database.models import (
KhojUser,
GithubConfig,
NotionConfig,
)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -189,6 +174,9 @@ def configure_content(
content_index = ContentIndex() content_index = ContentIndex()
success = True success = True
if t == None:
t = state.SearchType.All
if t is not None and t in [type.value for type in state.SearchType]: if t is not None and t in [type.value for type in state.SearchType]:
t = state.SearchType(t) t = state.SearchType(t)
@@ -315,7 +303,7 @@ def configure_content(
# Initialize Notion Search # Initialize Notion Search
notion_config = NotionConfig.objects.filter(user=user).first() notion_config = NotionConfig.objects.filter(user=user).first()
if ( if (
search_type == state.SearchType.All.value or search_type in state.SearchType.Notion.value search_type == state.SearchType.All.value or search_type == state.SearchType.Notion.value
) and notion_config: ) and notion_config:
logger.info("🔌 Setting up search for notion") logger.info("🔌 Setting up search for notion")
text_search.setup( text_search.setup(
@@ -328,7 +316,7 @@ def configure_content(
) )
except Exception as e: except Exception as e:
logger.error(f"🚨 Failed to setup GitHub: {e}", exc_info=True) logger.error(f"🚨 Failed to setup Notion: {e}", exc_info=True)
success = False success = False
# Invalidate Query Cache # Invalidate Query Cache

View File

@@ -10,7 +10,7 @@ from starlette.authentication import requires
import stripe import stripe
# Internal Packages # Internal Packages
from database import adapters from khoj.database import adapters
# Stripe integration for Khoj Cloud Subscription # Stripe integration for Khoj Cloud Subscription

View File

@@ -8,8 +8,8 @@ from fastapi import Request
from fastapi.responses import HTMLResponse, FileResponse, RedirectResponse from fastapi.responses import HTMLResponse, FileResponse, RedirectResponse
from fastapi.templating import Jinja2Templates from fastapi.templating import Jinja2Templates
from starlette.authentication import requires from starlette.authentication import requires
from database import adapters from khoj.database import adapters
from database.models import KhojUser from khoj.database.models import KhojUser
from khoj.utils.rawconfig import ( from khoj.utils.rawconfig import (
GithubContentConfig, GithubContentConfig,
GithubRepoConfig, GithubRepoConfig,
@@ -18,7 +18,7 @@ from khoj.utils.rawconfig import (
# Internal Packages # Internal Packages
from khoj.utils import constants, state from khoj.utils import constants, state
from database.adapters import ( from khoj.database.adapters import (
EntryAdapters, EntryAdapters,
get_user_github_config, get_user_github_config,
get_user_notion_config, get_user_notion_config,

View File

@@ -12,7 +12,6 @@ from sentence_transformers import SentenceTransformer, util
from PIL import Image from PIL import Image
from tqdm import trange from tqdm import trange
import torch import torch
from khoj.utils import state
# Internal Packages # Internal Packages
from khoj.utils.helpers import get_absolute_path, get_from_dict, resolve_absolute_path, load_model, timer from khoj.utils.helpers import get_absolute_path, get_from_dict, resolve_absolute_path, load_model, timer
@@ -26,9 +25,6 @@ logger = logging.getLogger(__name__)
def initialize_model(search_config: ImageSearchConfig): def initialize_model(search_config: ImageSearchConfig):
# Initialize Model
torch.set_num_threads(4)
# Convert model directory to absolute path # Convert model directory to absolute path
search_config.model_directory = resolve_absolute_path(search_config.model_directory) search_config.model_directory = resolve_absolute_path(search_config.model_directory)

View File

@@ -18,9 +18,9 @@ from khoj.utils.models import BaseEncoder
from khoj.utils.state import SearchType from khoj.utils.state import SearchType
from khoj.utils.rawconfig import SearchResponse, Entry from khoj.utils.rawconfig import SearchResponse, Entry
from khoj.utils.jsonl import load_jsonl from khoj.utils.jsonl import load_jsonl
from khoj.processor.text_to_entries import TextToEntries from khoj.processor.content.text_to_entries import TextToEntries
from database.adapters import EntryAdapters from khoj.database.adapters import EntryAdapters
from database.models import KhojUser, Entry as DbEntry from khoj.database.models import KhojUser, Entry as DbEntry
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -141,12 +141,13 @@ def collate_results(hits, dedupe=True):
else: else:
hit_ids.add(hit.corpus_id) hit_ids.add(hit.corpus_id)
yield SearchResponse.parse_obj( yield SearchResponse.model_validate(
{ {
"entry": hit.raw, "entry": hit.raw,
"score": hit.distance, "score": hit.distance,
"corpus_id": str(hit.corpus_id), "corpus_id": str(hit.corpus_id),
"additional": { "additional": {
"source": hit.file_source,
"file": hit.file_path, "file": hit.file_path,
"compiled": hit.compiled, "compiled": hit.compiled,
"heading": hit.heading, "heading": hit.heading,
@@ -169,6 +170,7 @@ def deduplicated_search_responses(hits: List[SearchResponse]):
"score": hit.score, "score": hit.score,
"corpus_id": hit.corpus_id, "corpus_id": hit.corpus_id,
"additional": { "additional": {
"source": hit.additional["source"],
"file": hit.additional["file"], "file": hit.additional["file"],
"compiled": hit.additional["compiled"], "compiled": hit.additional["compiled"],
"heading": hit.additional["heading"], "heading": hit.additional["heading"],

View File

@@ -7,6 +7,7 @@ app_env_filepath = "~/.khoj/env"
telemetry_server = "https://khoj.beta.haletic.com/v1/telemetry" telemetry_server = "https://khoj.beta.haletic.com/v1/telemetry"
content_directory = "~/.khoj/content/" content_directory = "~/.khoj/content/"
default_offline_chat_model = "mistral-7b-instruct-v0.1.Q4_0.gguf" default_offline_chat_model = "mistral-7b-instruct-v0.1.Q4_0.gguf"
default_online_chat_model = "gpt-4"
empty_config = { empty_config = {
"search-type": { "search-type": {

View File

@@ -7,7 +7,7 @@ from bs4 import BeautifulSoup
from khoj.utils.helpers import get_absolute_path, is_none_or_empty from khoj.utils.helpers import get_absolute_path, is_none_or_empty
from khoj.utils.rawconfig import TextContentConfig from khoj.utils.rawconfig import TextContentConfig
from khoj.utils.config import SearchType from khoj.utils.config import SearchType
from database.models import LocalMarkdownConfig, LocalOrgConfig, LocalPdfConfig, LocalPlaintextConfig from khoj.database.models import LocalMarkdownConfig, LocalOrgConfig, LocalPdfConfig, LocalPlaintextConfig
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)

View File

@@ -1,16 +1,17 @@
import logging import logging
import os import os
from database.models import ( from khoj.database.models import (
KhojUser, KhojUser,
OfflineChatProcessorConversationConfig, OfflineChatProcessorConversationConfig,
OpenAIProcessorConversationConfig, OpenAIProcessorConversationConfig,
ChatModelOptions, ChatModelOptions,
) )
from khoj.utils.constants import default_offline_chat_model from khoj.utils.constants import default_offline_chat_model, default_online_chat_model
from khoj.processor.conversation.utils import model_to_prompt_size, model_to_tokenizer
from database.adapters import ConversationAdapters from khoj.database.adapters import ConversationAdapters
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -30,11 +31,6 @@ def initialization():
logger.info( logger.info(
"🗣️ Configure chat models available to your server. You can always update these at /server/admin using the credentials of your admin account" "🗣️ Configure chat models available to your server. You can always update these at /server/admin using the credentials of your admin account"
) )
try:
# Some environments don't support interactive input. We catch the exception and return if that's the case. The admin can still configure their settings from the admin page.
input()
except EOFError:
return
try: try:
# Note: gpt4all package is not available on all devices. # Note: gpt4all package is not available on all devices.
@@ -47,15 +43,27 @@ def initialization():
OfflineChatProcessorConversationConfig.objects.create(enabled=True) OfflineChatProcessorConversationConfig.objects.create(enabled=True)
offline_chat_model = input( offline_chat_model = input(
f"Enter the name of the offline chat model you want to use, based on the models in HuggingFace (press enter to use the default: {default_offline_chat_model}): " f"Enter the offline chat model you want to use, See GPT4All for supported models (default: {default_offline_chat_model}): "
) )
if offline_chat_model == "": if offline_chat_model == "":
ChatModelOptions.objects.create( ChatModelOptions.objects.create(
chat_model=default_offline_chat_model, model_type=ChatModelOptions.ModelType.OFFLINE chat_model=default_offline_chat_model, model_type=ChatModelOptions.ModelType.OFFLINE
) )
else: else:
max_tokens = input("Enter the maximum number of tokens to use for the offline chat model:") default_max_tokens = model_to_prompt_size.get(offline_chat_model, 2000)
tokenizer = input("Enter the tokenizer to use for the offline chat model:") max_tokens = input(
f"Enter the maximum number of tokens to use for the offline chat model (default {default_max_tokens}):"
)
max_tokens = max_tokens or default_max_tokens
default_tokenizer = model_to_tokenizer.get(
offline_chat_model, "hf-internal-testing/llama-tokenizer"
)
tokenizer = input(
f"Enter the tokenizer to use for the offline chat model (default: {default_tokenizer}):"
)
tokenizer = tokenizer or default_tokenizer
ChatModelOptions.objects.create( ChatModelOptions.objects.create(
chat_model=offline_chat_model, chat_model=offline_chat_model,
model_type=ChatModelOptions.ModelType.OFFLINE, model_type=ChatModelOptions.ModelType.OFFLINE,
@@ -71,10 +79,19 @@ def initialization():
logger.info("🗣️ Setting up OpenAI chat model") logger.info("🗣️ Setting up OpenAI chat model")
api_key = input("Enter your OpenAI API key: ") api_key = input("Enter your OpenAI API key: ")
OpenAIProcessorConversationConfig.objects.create(api_key=api_key) OpenAIProcessorConversationConfig.objects.create(api_key=api_key)
openai_chat_model = input("Enter the name of the OpenAI chat model you want to use: ")
max_tokens = input("Enter the maximum number of tokens to use for the OpenAI chat model:") openai_chat_model = input(
f"Enter the OpenAI chat model you want to use (default: {default_online_chat_model}): "
)
openai_chat_model = openai_chat_model or default_online_chat_model
default_max_tokens = model_to_prompt_size.get(openai_chat_model, 2000)
max_tokens = input(
f"Enter the maximum number of tokens to use for the OpenAI chat model (default: {default_max_tokens}): "
)
max_tokens = max_tokens or default_max_tokens
ChatModelOptions.objects.create( ChatModelOptions.objects.create(
chat_model=openai_chat_model, model_type=ChatModelOptions.ModelType.OPENAI, max_tokens=max_tokens chat_model=openai_chat_model, model_type=ChatModelOptions.ModelType.OPENAI, max_prompt_size=max_tokens
) )
logger.info("🗣️ Chat model configuration complete") logger.info("🗣️ Chat model configuration complete")
@@ -94,5 +111,8 @@ def initialization():
try: try:
_create_chat_configuration() _create_chat_configuration()
break break
# Some environments don't support interactive input. We catch the exception and return if that's the case. The admin can still configure their settings from the admin page.
except EOFError:
return
except Exception as e: except Exception as e:
logger.error(f"🚨 Failed to create chat configuration: {e}", exc_info=True) logger.error(f"🚨 Failed to create chat configuration: {e}", exc_info=True)

View File

@@ -72,6 +72,9 @@ class ImageSearchConfig(ConfigBase):
encoder_type: Optional[str] = None encoder_type: Optional[str] = None
model_directory: Optional[Path] = None model_directory: Optional[Path] = None
class Config:
protected_namespaces = ()
class SearchConfig(ConfigBase): class SearchConfig(ConfigBase):
image: Optional[ImageSearchConfig] = None image: Optional[ImageSearchConfig] = None

View File

@@ -1,48 +1,40 @@
# External Packages # External Packages
import os import os
from fastapi.testclient import TestClient
from pathlib import Path from pathlib import Path
import pytest
from fastapi.staticfiles import StaticFiles
from fastapi import FastAPI
import os
from fastapi import FastAPI
import pytest
from fastapi import FastAPI
from fastapi.staticfiles import StaticFiles
from fastapi.testclient import TestClient
# Internal Packages # Internal Packages
from khoj.configure import configure_routes, configure_search_types, configure_middleware from khoj.configure import configure_middleware, configure_routes, configure_search_types
from khoj.database.models import (
GithubConfig,
GithubRepoConfig,
KhojApiUser,
KhojUser,
LocalMarkdownConfig,
LocalOrgConfig,
LocalPlaintextConfig,
)
from khoj.processor.content.org_mode.org_to_entries import OrgToEntries
from khoj.processor.content.plaintext.plaintext_to_entries import PlaintextToEntries
from khoj.processor.embeddings import CrossEncoderModel, EmbeddingsModel from khoj.processor.embeddings import CrossEncoderModel, EmbeddingsModel
from khoj.processor.data_sources.plaintext.plaintext_to_entries import PlaintextToEntries from khoj.routers.indexer import configure_content
from khoj.search_type import image_search, text_search from khoj.search_type import image_search, text_search
from khoj.utils import fs_syncer, state
from khoj.utils.config import SearchModels from khoj.utils.config import SearchModels
from khoj.utils.constants import web_directory from khoj.utils.constants import web_directory
from khoj.utils.helpers import resolve_absolute_path from khoj.utils.helpers import resolve_absolute_path
from khoj.utils.rawconfig import ( from khoj.utils.rawconfig import ContentConfig, ImageContentConfig, ImageSearchConfig, SearchConfig
ContentConfig,
ImageContentConfig,
SearchConfig,
ImageSearchConfig,
)
from khoj.utils import state, fs_syncer
from khoj.routers.indexer import configure_content
from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries
from database.models import (
KhojApiUser,
LocalOrgConfig,
LocalMarkdownConfig,
LocalPlaintextConfig,
GithubConfig,
KhojUser,
GithubRepoConfig,
)
from tests.helpers import ( from tests.helpers import (
UserFactory,
ChatModelOptionsFactory, ChatModelOptionsFactory,
OpenAIProcessorConversationConfigFactory,
OfflineChatProcessorConversationConfigFactory, OfflineChatProcessorConversationConfigFactory,
UserConversationProcessorConfigFactory, OpenAIProcessorConversationConfigFactory,
SubscriptionFactory, SubscriptionFactory,
UserConversationProcessorConfigFactory,
UserFactory,
) )

View File

@@ -1,7 +1,7 @@
import factory import factory
import os import os
from database.models import ( from khoj.database.models import (
KhojUser, KhojUser,
KhojApiUser, KhojApiUser,
ChatModelOptions, ChatModelOptions,

View File

@@ -1,23 +1,23 @@
# Standard Modules # Standard Modules
from io import BytesIO from io import BytesIO
from PIL import Image
from urllib.parse import quote from urllib.parse import quote
import pytest import pytest
from fastapi import FastAPI
# External Packages # External Packages
from fastapi.testclient import TestClient from fastapi.testclient import TestClient
from fastapi import FastAPI from PIL import Image
import pytest
# Internal Packages # Internal Packages
from khoj.configure import configure_routes, configure_search_types from khoj.configure import configure_routes, configure_search_types
from khoj.database.adapters import EntryAdapters
from khoj.database.models import KhojApiUser, KhojUser
from khoj.processor.content.org_mode.org_to_entries import OrgToEntries
from khoj.search_type import image_search, text_search
from khoj.utils import state from khoj.utils import state
from khoj.utils.state import search_models, content_index, config
from khoj.search_type import text_search, image_search
from khoj.utils.rawconfig import ContentConfig, SearchConfig from khoj.utils.rawconfig import ContentConfig, SearchConfig
from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries from khoj.utils.state import config, content_index, search_models
from database.models import KhojUser, KhojApiUser
from database.adapters import EntryAdapters
# Test # Test

View File

@@ -1,5 +1,6 @@
# Standard Packages # Standard Packages
import urllib.parse import urllib.parse
from urllib.parse import quote
# External Packages # External Packages
import pytest import pytest
@@ -54,6 +55,26 @@ def test_chat_with_no_chat_history_or_retrieved_content_gpt4all(client_offline_c
) )
# ----------------------------------------------------------------------------------------------------
@pytest.mark.chatquality
@pytest.mark.django_db(transaction=True)
def test_chat_with_online_content(chat_client):
# Act
q = "/online give me the link to paul graham's essay how to do great work"
encoded_q = quote(q, safe="")
response = chat_client.get(f"/api/chat?q={encoded_q}&stream=true")
response_message = response.content.decode("utf-8")
response_message = response_message.split("### compiled references")[0]
# Assert
expected_responses = ["http://www.paulgraham.com/greatwork.html"]
assert response.status_code == 200
assert any([expected_response in response_message for expected_response in expected_responses]), (
"Expected assistants name, [K|k]hoj, in response but got: " + response_message
)
# ---------------------------------------------------------------------------------------------------- # ----------------------------------------------------------------------------------------------------
@pytest.mark.chatquality @pytest.mark.chatquality
@pytest.mark.django_db(transaction=True) @pytest.mark.django_db(transaction=True)

View File

@@ -4,7 +4,7 @@ from pathlib import Path
import os import os
# Internal Packages # Internal Packages
from khoj.processor.data_sources.markdown.markdown_to_entries import MarkdownToEntries from khoj.processor.content.markdown.markdown_to_entries import MarkdownToEntries
from khoj.utils.fs_syncer import get_markdown_files from khoj.utils.fs_syncer import get_markdown_files
from khoj.utils.rawconfig import TextContentConfig from khoj.utils.rawconfig import TextContentConfig

View File

@@ -1,24 +1,14 @@
# Standard Modules # Standard Modules
from io import BytesIO
from PIL import Image
from urllib.parse import quote from urllib.parse import quote
import pytest
# External Packages # External Packages
from fastapi.testclient import TestClient
from fastapi import FastAPI, UploadFile
from io import BytesIO
import pytest import pytest
from khoj.database.models import KhojApiUser, KhojUser
from khoj.processor.content.org_mode.org_to_entries import OrgToEntries
# Internal Packages # Internal Packages
from khoj.configure import configure_routes, configure_search_types from khoj.search_type import text_search
from khoj.utils import state
from khoj.utils.state import search_models, content_index, config
from khoj.search_type import text_search, image_search
from khoj.utils.rawconfig import ContentConfig, SearchConfig
from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries
from database.models import KhojUser, KhojApiUser
from database.adapters import EntryAdapters
# ---------------------------------------------------------------------------------------------------- # ----------------------------------------------------------------------------------------------------

View File

@@ -1,6 +1,7 @@
# Standard Packages # Standard Packages
import os import os
import urllib.parse import urllib.parse
from urllib.parse import quote
# External Packages # External Packages
import pytest import pytest
@@ -10,7 +11,7 @@ from khoj.processor.conversation import prompts
# Internal Packages # Internal Packages
from khoj.processor.conversation.utils import message_to_log from khoj.processor.conversation.utils import message_to_log
from tests.helpers import ConversationFactory from tests.helpers import ConversationFactory
from database.models import KhojUser from khoj.database.models import KhojUser
# Initialize variables for tests # Initialize variables for tests
api_key = os.getenv("OPENAI_API_KEY") api_key = os.getenv("OPENAI_API_KEY")
@@ -54,6 +55,26 @@ def test_chat_with_no_chat_history_or_retrieved_content(chat_client):
) )
# ----------------------------------------------------------------------------------------------------
@pytest.mark.chatquality
@pytest.mark.django_db(transaction=True)
def test_chat_with_online_content(chat_client):
# Act
q = "/online give me the link to paul graham's essay how to do great work"
encoded_q = quote(q, safe="")
response = chat_client.get(f"/api/chat?q={encoded_q}&stream=true")
response_message = response.content.decode("utf-8")
response_message = response_message.split("### compiled references")[0]
# Assert
expected_responses = ["http://www.paulgraham.com/greatwork.html"]
assert response.status_code == 200
assert any([expected_response in response_message for expected_response in expected_responses]), (
"Expected assistants name, [K|k]hoj, in response but got: " + response_message
)
# ---------------------------------------------------------------------------------------------------- # ----------------------------------------------------------------------------------------------------
@pytest.mark.django_db(transaction=True) @pytest.mark.django_db(transaction=True)
@pytest.mark.chatquality @pytest.mark.chatquality

View File

@@ -3,8 +3,8 @@ import json
import os import os
# Internal Packages # Internal Packages
from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries from khoj.processor.content.org_mode.org_to_entries import OrgToEntries
from khoj.processor.text_to_entries import TextToEntries from khoj.processor.content.text_to_entries import TextToEntries
from khoj.utils.helpers import is_none_or_empty from khoj.utils.helpers import is_none_or_empty
from khoj.utils.rawconfig import Entry from khoj.utils.rawconfig import Entry
from khoj.utils.fs_syncer import get_org_files from khoj.utils.fs_syncer import get_org_files

View File

@@ -2,7 +2,7 @@
import datetime import datetime
# Internal Packages # Internal Packages
from khoj.processor.data_sources.org_mode import orgnode from khoj.processor.content.org_mode import orgnode
# Test # Test

View File

@@ -3,7 +3,7 @@ import json
import os import os
# Internal Packages # Internal Packages
from khoj.processor.data_sources.pdf.pdf_to_entries import PdfToEntries from khoj.processor.content.pdf.pdf_to_entries import PdfToEntries
from khoj.utils.fs_syncer import get_pdf_files from khoj.utils.fs_syncer import get_pdf_files
from khoj.utils.rawconfig import TextContentConfig from khoj.utils.rawconfig import TextContentConfig

View File

@@ -3,11 +3,12 @@ import json
import os import os
from pathlib import Path from pathlib import Path
from khoj.database.models import KhojUser, LocalPlaintextConfig
from khoj.processor.content.plaintext.plaintext_to_entries import PlaintextToEntries
# Internal Packages # Internal Packages
from khoj.utils.fs_syncer import get_plaintext_files from khoj.utils.fs_syncer import get_plaintext_files
from khoj.utils.rawconfig import TextContentConfig from khoj.utils.rawconfig import TextContentConfig
from khoj.processor.data_sources.plaintext.plaintext_to_entries import PlaintextToEntries
from database.models import LocalPlaintextConfig, KhojUser
def test_plaintext_file(tmp_path): def test_plaintext_file(tmp_path):

View File

@@ -1,19 +1,20 @@
# System Packages # System Packages
import logging
from pathlib import Path
import os
import asyncio import asyncio
import logging
import os
from pathlib import Path
# External Packages # External Packages
import pytest import pytest
from khoj.database.models import Entry, GithubConfig, KhojUser, LocalOrgConfig
from khoj.processor.content.github.github_to_entries import GithubToEntries
from khoj.processor.content.org_mode.org_to_entries import OrgToEntries
# Internal Packages # Internal Packages
from khoj.search_type import text_search from khoj.search_type import text_search
from khoj.utils.rawconfig import ContentConfig, SearchConfig
from khoj.processor.data_sources.org_mode.org_to_entries import OrgToEntries
from khoj.processor.data_sources.github.github_to_entries import GithubToEntries
from khoj.utils.fs_syncer import collect_files, get_org_files from khoj.utils.fs_syncer import collect_files, get_org_files
from database.models import LocalOrgConfig, KhojUser, Entry, GithubConfig from khoj.utils.rawconfig import ContentConfig, SearchConfig
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)