Merge branch 'master' into fix-1000-file-index-update-limit

This commit is contained in:
Debanjum Singh Solanky
2024-01-16 16:50:58 +05:30
80 changed files with 24067 additions and 467 deletions

View File

@@ -1,6 +1,6 @@
{
"name": "Khoj",
"version": "1.2.0",
"version": "1.2.1",
"description": "An AI copilot for your Second Brain",
"author": "Saba Imran, Debanjum Singh Solanky <team@khoj.dev>",
"license": "GPL-3.0-or-later",

View File

@@ -6,7 +6,7 @@
;; Saba Imran <saba@khoj.dev>
;; Description: An AI copilot for your Second Brain
;; Keywords: search, chat, org-mode, outlines, markdown, pdf, image
;; Version: 1.2.0
;; Version: 1.2.1
;; Package-Requires: ((emacs "27.1") (transient "0.3.0") (dash "2.19.1"))
;; URL: https://github.com/khoj-ai/khoj/tree/master/src/interface/emacs

View File

@@ -1,7 +1,7 @@
{
"id": "khoj",
"name": "Khoj",
"version": "1.2.0",
"version": "1.2.1",
"minAppVersion": "0.15.0",
"description": "An AI copilot for your Second Brain",
"author": "Khoj Inc.",

View File

@@ -1,6 +1,6 @@
{
"name": "Khoj",
"version": "1.2.0",
"version": "1.2.1",
"description": "An AI copilot for your Second Brain",
"author": "Debanjum Singh Solanky, Saba Imran <team@khoj.dev>",
"license": "GPL-3.0-or-later",

View File

@@ -30,5 +30,6 @@
"1.0.0": "0.15.0",
"1.0.1": "0.15.0",
"1.1.0": "0.15.0",
"1.2.0": "0.15.0"
"1.2.0": "0.15.0",
"1.2.1": "0.15.0"
}

View File

@@ -144,7 +144,15 @@ def configure_server(
state.cross_encoder_model = dict()
for model in search_models:
state.embeddings_model.update({model.name: EmbeddingsModel(model.bi_encoder)})
state.embeddings_model.update(
{
model.name: EmbeddingsModel(
model.bi_encoder,
model.embeddings_inference_endpoint,
model.embeddings_inference_endpoint_api_key,
)
}
)
state.cross_encoder_model.update({model.name: CrossEncoderModel(model.cross_encoder)})
state.SearchType = configure_search_types()

View File

@@ -0,0 +1,22 @@
# Generated by Django 4.2.7 on 2024-01-15 18:12
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("database", "0024_alter_entry_embeddings"),
]
operations = [
migrations.AddField(
model_name="searchmodelconfig",
name="embeddings_inference_endpoint",
field=models.CharField(blank=True, default=None, max_length=200, null=True),
),
migrations.AddField(
model_name="searchmodelconfig",
name="embeddings_inference_endpoint_api_key",
field=models.CharField(blank=True, default=None, max_length=200, null=True),
),
]

View File

@@ -110,6 +110,8 @@ class SearchModelConfig(BaseModel):
model_type = models.CharField(max_length=200, choices=ModelType.choices, default=ModelType.TEXT)
bi_encoder = models.CharField(max_length=200, default="thenlper/gte-small")
cross_encoder = models.CharField(max_length=200, default="cross-encoder/ms-marco-MiniLM-L-6-v2")
embeddings_inference_endpoint = models.CharField(max_length=200, default=None, null=True, blank=True)
embeddings_inference_endpoint_api_key = models.CharField(max_length=200, default=None, null=True, blank=True)
class TextToImageModelConfig(BaseModel):

View File

@@ -123,9 +123,9 @@ def filter_questions(questions: List[str]):
def converse_offline(
references,
online_results,
user_query,
references=[],
online_results=[],
conversation_log={},
model: str = "mistral-7b-instruct-v0.1.Q4_0.gguf",
loaded_model: Union[Any, None] = None,

View File

@@ -21,9 +21,11 @@ def download_model(model_name: str):
# Try load chat model to GPU if:
# 1. Loading chat model to GPU isn't disabled via CLI and
# 2. Machine has GPU
# 3. GPU has enough free memory to load the chat model
# 3. GPU has enough free memory to load the chat model with max context length of 4096
device = (
"gpu" if state.chat_on_gpu and gpt4all.pyllmodel.LLModel().list_gpu(chat_model_config["path"]) else "cpu"
"gpu"
if state.chat_on_gpu and gpt4all.pyllmodel.LLModel().list_gpu(chat_model_config["path"], 4096)
else "cpu"
)
except ValueError:
device = "cpu"
@@ -35,7 +37,7 @@ def download_model(model_name: str):
raise e
# Now load the downloaded chat model onto appropriate device
chat_model = gpt4all.GPT4All(model_name=model_name, device=device, allow_download=False)
chat_model = gpt4all.GPT4All(model_name=model_name, n_ctx=4096, device=device, allow_download=False)
logger.debug(f"Loaded chat model to {device.upper()}.")
return chat_model

View File

@@ -1,23 +1,69 @@
import logging
from typing import List
import requests
import tqdm
from sentence_transformers import CrossEncoder, SentenceTransformer
from torch import nn
from khoj.utils.helpers import get_device
from khoj.utils.rawconfig import SearchResponse
logger = logging.getLogger(__name__)
class EmbeddingsModel:
def __init__(self, model_name: str = "thenlper/gte-small"):
def __init__(
self,
model_name: str = "thenlper/gte-small",
embeddings_inference_endpoint: str = None,
embeddings_inference_endpoint_api_key: str = None,
):
self.encode_kwargs = {"normalize_embeddings": True}
self.model_kwargs = {"device": get_device()}
self.model_name = model_name
self.inference_endpoint = embeddings_inference_endpoint
self.api_key = embeddings_inference_endpoint_api_key
self.embeddings_model = SentenceTransformer(self.model_name, **self.model_kwargs)
def embed_query(self, query):
if self.api_key is not None and self.inference_endpoint is not None:
target_url = f"{self.inference_endpoint}"
payload = {"inputs": [query]}
headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
response = requests.post(target_url, json=payload, headers=headers)
return response.json()["embeddings"][0]
return self.embeddings_model.encode([query], show_progress_bar=False, **self.encode_kwargs)[0]
def embed_documents(self, docs):
if self.api_key is not None and self.inference_endpoint is not None:
target_url = f"{self.inference_endpoint}"
if "huggingface" not in target_url:
logger.warning(
f"Using custom inference endpoint {target_url} is not yet supported. Please us a HuggingFace inference endpoint."
)
return self.embeddings_model.encode(docs, show_progress_bar=True, **self.encode_kwargs).tolist()
# break up the docs payload in chunks of 1000 to avoid hitting rate limits
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
}
with tqdm.tqdm(total=len(docs)) as pbar:
for i in range(0, len(docs), 1000):
payload = {"inputs": docs[i : i + 1000]}
response = requests.post(target_url, json=payload, headers=headers)
try:
response.raise_for_status()
except requests.exceptions.HTTPError as e:
print(f"Error: {e}")
print(f"Response: {response.json()}")
raise e
if i == 0:
embeddings = response.json()["embeddings"]
else:
embeddings += response.json()["embeddings"]
pbar.update(1000)
return embeddings
return self.embeddings_model.encode(docs, show_progress_bar=True, **self.encode_kwargs).tolist()

View File

@@ -6,6 +6,7 @@ import os
import time
import uuid
from typing import Any, Dict, List, Optional, Union
from urllib.parse import unquote
from asgiref.sync import sync_to_async
from fastapi import APIRouter, Depends, File, HTTPException, Request, UploadFile
@@ -704,6 +705,7 @@ async def chat(
rate_limiter_per_day=Depends(ApiUserRateLimiter(requests=10, subscribed_requests=600, window=60 * 60 * 24)),
) -> Response:
user: KhojUser = request.user.object
q = unquote(q)
await is_ready_to_chat(user)
conversation_command = get_conversation_command(query=q, any_references=True)