Fix Offline Chat without GPU and Decoding Chat Query before Processing

- Only run /online command offline chat director test when `SERPER DEV_API_KEY' present
- Decode URL encoded query string in chat API endpoint before processing
- Make references and online_results optional params to converse_offline
- Pass max context length to fix using updated `GPT4All.list_gpu' method
This commit is contained in:
Debanjum
2024-01-16 14:53:34 +05:30
committed by GitHub
5 changed files with 13 additions and 7 deletions

View File

@@ -62,8 +62,8 @@ dependencies = [
"pymupdf >= 1.23.5", "pymupdf >= 1.23.5",
"django == 4.2.7", "django == 4.2.7",
"authlib == 1.2.1", "authlib == 1.2.1",
"gpt4all >= 2.0.0; platform_system == 'Linux' and platform_machine == 'x86_64'", "gpt4all >= 2.1.0; platform_system == 'Linux' and platform_machine == 'x86_64'",
"gpt4all >= 2.0.0; platform_system == 'Windows' or platform_system == 'Darwin'", "gpt4all >= 2.1.0; platform_system == 'Windows' or platform_system == 'Darwin'",
"itsdangerous == 2.1.2", "itsdangerous == 2.1.2",
"httpx == 0.25.0", "httpx == 0.25.0",
"pgvector == 0.2.4", "pgvector == 0.2.4",

View File

@@ -123,9 +123,9 @@ def filter_questions(questions: List[str]):
def converse_offline( def converse_offline(
references,
online_results,
user_query, user_query,
references=[],
online_results=[],
conversation_log={}, conversation_log={},
model: str = "mistral-7b-instruct-v0.1.Q4_0.gguf", model: str = "mistral-7b-instruct-v0.1.Q4_0.gguf",
loaded_model: Union[Any, None] = None, loaded_model: Union[Any, None] = None,

View File

@@ -21,9 +21,11 @@ def download_model(model_name: str):
# Try load chat model to GPU if: # Try load chat model to GPU if:
# 1. Loading chat model to GPU isn't disabled via CLI and # 1. Loading chat model to GPU isn't disabled via CLI and
# 2. Machine has GPU # 2. Machine has GPU
# 3. GPU has enough free memory to load the chat model # 3. GPU has enough free memory to load the chat model with max context length of 4096
device = ( device = (
"gpu" if state.chat_on_gpu and gpt4all.pyllmodel.LLModel().list_gpu(chat_model_config["path"]) else "cpu" "gpu"
if state.chat_on_gpu and gpt4all.pyllmodel.LLModel().list_gpu(chat_model_config["path"], 4096)
else "cpu"
) )
except ValueError: except ValueError:
device = "cpu" device = "cpu"
@@ -35,7 +37,7 @@ def download_model(model_name: str):
raise e raise e
# Now load the downloaded chat model onto appropriate device # Now load the downloaded chat model onto appropriate device
chat_model = gpt4all.GPT4All(model_name=model_name, device=device, allow_download=False) chat_model = gpt4all.GPT4All(model_name=model_name, n_ctx=4096, device=device, allow_download=False)
logger.debug(f"Loaded chat model to {device.upper()}.") logger.debug(f"Loaded chat model to {device.upper()}.")
return chat_model return chat_model

View File

@@ -6,6 +6,7 @@ import os
import time import time
import uuid import uuid
from typing import Any, Dict, List, Optional, Union from typing import Any, Dict, List, Optional, Union
from urllib.parse import unquote
from asgiref.sync import sync_to_async from asgiref.sync import sync_to_async
from fastapi import APIRouter, Depends, File, HTTPException, Request, UploadFile from fastapi import APIRouter, Depends, File, HTTPException, Request, UploadFile
@@ -704,6 +705,7 @@ async def chat(
rate_limiter_per_day=Depends(ApiUserRateLimiter(requests=10, subscribed_requests=600, window=60 * 60 * 24)), rate_limiter_per_day=Depends(ApiUserRateLimiter(requests=10, subscribed_requests=600, window=60 * 60 * 24)),
) -> Response: ) -> Response:
user: KhojUser = request.user.object user: KhojUser = request.user.object
q = unquote(q)
await is_ready_to_chat(user) await is_ready_to_chat(user)
conversation_command = get_conversation_command(query=q, any_references=True) conversation_command = get_conversation_command(query=q, any_references=True)

View File

@@ -1,3 +1,4 @@
import os
import urllib.parse import urllib.parse
from urllib.parse import quote from urllib.parse import quote
@@ -53,6 +54,7 @@ def test_chat_with_no_chat_history_or_retrieved_content_gpt4all(client_offline_c
# ---------------------------------------------------------------------------------------------------- # ----------------------------------------------------------------------------------------------------
@pytest.mark.skipif(os.getenv("SERPER_DEV_API_KEY") is None, reason="requires SERPER_DEV_API_KEY")
@pytest.mark.chatquality @pytest.mark.chatquality
@pytest.mark.django_db(transaction=True) @pytest.mark.django_db(transaction=True)
def test_chat_with_online_content(chat_client): def test_chat_with_online_content(chat_client):