mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 21:19:12 +00:00
Drop native offline chat support with llama-cpp-python
It is recommended to chat with open-source models by running an open-source server like Ollama, Llama.cpp on your GPU powered machine or use a commercial provider of open-source models like DeepInfra or OpenRouter. These chat model serving options provide a mature Openai compatible API that already works with Khoj. Directly using offline chat models only worked reasonably with pip install on a machine with GPU. Docker setup of khoj had trouble with accessing GPU. And without GPU access offline chat is too slow. Deprecating support for an offline chat provider directly from within Khoj will reduce code complexity and increase developement velocity. Offline models are subsumed to use existing Openai ai model provider.
This commit is contained in:
@@ -196,17 +196,6 @@ def default_openai_chat_model_option():
|
||||
return chat_model
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@pytest.fixture
|
||||
def offline_agent():
|
||||
chat_model = ChatModelFactory()
|
||||
return Agent.objects.create(
|
||||
name="Accountant",
|
||||
chat_model=chat_model,
|
||||
personality="You are a certified CPA. You are able to tell me how much I've spent based on my notes. Regardless of what I ask, you should always respond with the total amount I've spent. ALWAYS RESPOND WITH A SUMMARY TOTAL OF HOW MUCH MONEY I HAVE SPENT.",
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@pytest.fixture
|
||||
def openai_agent():
|
||||
@@ -516,40 +505,6 @@ def client(
|
||||
return TestClient(app)
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def client_offline_chat(search_config: SearchConfig, default_user2: KhojUser):
|
||||
# Initialize app state
|
||||
state.config.search_type = search_config
|
||||
state.SearchType = configure_search_types()
|
||||
|
||||
LocalMarkdownConfig.objects.create(
|
||||
input_files=None,
|
||||
input_filter=["tests/data/markdown/*.markdown"],
|
||||
user=default_user2,
|
||||
)
|
||||
|
||||
all_files = fs_syncer.collect_files(user=default_user2)
|
||||
configure_content(default_user2, all_files)
|
||||
|
||||
# Initialize Processor from Config
|
||||
ChatModelFactory(
|
||||
name="bartowski/Meta-Llama-3.1-3B-Instruct-GGUF",
|
||||
tokenizer=None,
|
||||
max_prompt_size=None,
|
||||
model_type="offline",
|
||||
)
|
||||
UserConversationProcessorConfigFactory(user=default_user2)
|
||||
|
||||
state.anonymous_mode = True
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
configure_routes(app)
|
||||
configure_middleware(app)
|
||||
app.mount("/static", StaticFiles(directory=web_directory), name="static")
|
||||
return TestClient(app)
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def new_org_file(default_user: KhojUser, content_config: ContentConfig):
|
||||
# Setup
|
||||
|
||||
Reference in New Issue
Block a user