mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-07 21:29:13 +00:00
Resolve merge conflicts
This commit is contained in:
@@ -18,6 +18,7 @@ from khoj.utils.helpers import resolve_absolute_path
|
||||
from khoj.utils.rawconfig import (
|
||||
ContentConfig,
|
||||
ConversationProcessorConfig,
|
||||
OfflineChatProcessorConfig,
|
||||
OpenAIProcessorConfig,
|
||||
ProcessorConfig,
|
||||
TextContentConfig,
|
||||
@@ -207,8 +208,9 @@ def processor_config_offline_chat(tmp_path_factory):
|
||||
|
||||
# Setup conversation processor
|
||||
processor_config = ProcessorConfig()
|
||||
offline_chat = OfflineChatProcessorConfig(enable_offline_chat=True)
|
||||
processor_config.conversation = ConversationProcessorConfig(
|
||||
enable_offline_chat=True,
|
||||
offline_chat=offline_chat,
|
||||
conversation_logfile=processor_dir.joinpath("conversation_logs.json"),
|
||||
)
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@ from urllib.parse import quote
|
||||
|
||||
# External Packages
|
||||
from fastapi.testclient import TestClient
|
||||
import pytest
|
||||
|
||||
# Internal Packages
|
||||
from app.main import app
|
||||
@@ -60,13 +61,13 @@ def test_regenerate_with_invalid_content_type(client):
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
def test_index_batch(client):
|
||||
def test_index_update(client):
|
||||
# Arrange
|
||||
request_body = get_sample_files_data()
|
||||
files = get_sample_files_data()
|
||||
headers = {"x-api-key": "secret"}
|
||||
|
||||
# Act
|
||||
response = client.post("/v1/indexer/batch", json=request_body, headers=headers)
|
||||
response = client.post("/api/v1/index/update", files=files, headers=headers)
|
||||
|
||||
# Assert
|
||||
assert response.status_code == 200
|
||||
@@ -76,12 +77,11 @@ def test_index_batch(client):
|
||||
def test_regenerate_with_valid_content_type(client):
|
||||
for content_type in ["all", "org", "markdown", "image", "pdf", "notion", "plugin1"]:
|
||||
# Arrange
|
||||
request_body = get_sample_files_data()
|
||||
|
||||
files = get_sample_files_data()
|
||||
headers = {"x-api-key": "secret"}
|
||||
|
||||
# Act
|
||||
response = client.post(f"/v1/indexer/batch?search_type={content_type}", json=request_body, headers=headers)
|
||||
response = client.post(f"/api/v1/index/update?t={content_type}", files=files, headers=headers)
|
||||
# Assert
|
||||
assert response.status_code == 200, f"Returned status: {response.status_code} for content type: {content_type}"
|
||||
|
||||
@@ -92,17 +92,17 @@ def test_regenerate_with_github_fails_without_pat(client):
|
||||
response = client.get(f"/api/update?force=true&t=github")
|
||||
|
||||
# Arrange
|
||||
request_body = get_sample_files_data()
|
||||
|
||||
files = get_sample_files_data()
|
||||
headers = {"x-api-key": "secret"}
|
||||
|
||||
# Act
|
||||
response = client.post(f"/v1/indexer/batch?search_type=github", json=request_body, headers=headers)
|
||||
response = client.post(f"/api/v1/index/update?t=github", files=files, headers=headers)
|
||||
# Assert
|
||||
assert response.status_code == 200, f"Returned status: {response.status_code} for content type: github"
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
@pytest.mark.skip(reason="Flaky test on parallel test runs")
|
||||
def test_get_configured_types_via_api(client):
|
||||
# Act
|
||||
response = client.get(f"/api/config/types")
|
||||
@@ -288,24 +288,20 @@ def test_notes_search_with_exclude_filter(
|
||||
|
||||
def get_sample_files_data():
|
||||
return {
|
||||
"org": {
|
||||
"path/to/filename.org": "* practicing piano",
|
||||
"path/to/filename1.org": "** top 3 reasons why I moved to SF",
|
||||
"path/to/filename2.org": "* how to build a search engine",
|
||||
},
|
||||
"pdf": {
|
||||
"path/to/filename.pdf": "Moore's law does not apply to consumer hardware",
|
||||
"path/to/filename1.pdf": "The sun is a ball of helium",
|
||||
"path/to/filename2.pdf": "Effect of sunshine on baseline human happiness",
|
||||
},
|
||||
"plaintext": {
|
||||
"path/to/filename.txt": "data,column,value",
|
||||
"path/to/filename1.txt": "<html>my first web page</html>",
|
||||
"path/to/filename2.txt": "2021-02-02 Journal Entry",
|
||||
},
|
||||
"markdown": {
|
||||
"path/to/filename.md": "# Notes from client call",
|
||||
"path/to/filename1.md": "## Studying anthropological records from the Fatimid caliphate",
|
||||
"path/to/filename2.md": "**Understanding science through the lens of art**",
|
||||
},
|
||||
"files": ("path/to/filename.org", "* practicing piano", "text/org"),
|
||||
"files": ("path/to/filename1.org", "** top 3 reasons why I moved to SF", "text/org"),
|
||||
"files": ("path/to/filename2.org", "* how to build a search engine", "text/org"),
|
||||
"files": ("path/to/filename.pdf", "Moore's law does not apply to consumer hardware", "application/pdf"),
|
||||
"files": ("path/to/filename1.pdf", "The sun is a ball of helium", "application/pdf"),
|
||||
"files": ("path/to/filename2.pdf", "Effect of sunshine on baseline human happiness", "application/pdf"),
|
||||
"files": ("path/to/filename.txt", "data,column,value", "text/plain"),
|
||||
"files": ("path/to/filename1.txt", "<html>my first web page</html>", "text/plain"),
|
||||
"files": ("path/to/filename2.txt", "2021-02-02 Journal Entry", "text/plain"),
|
||||
"files": ("path/to/filename.md", "# Notes from client call", "text/markdown"),
|
||||
"files": (
|
||||
"path/to/filename1.md",
|
||||
"## Studying anthropological records from the Fatimid caliphate",
|
||||
"text/markdown",
|
||||
),
|
||||
"files": ("path/to/filename2.md", "**Understanding science through the lens of art**", "text/markdown"),
|
||||
}
|
||||
|
||||
@@ -24,7 +24,7 @@ from khoj.processor.conversation.gpt4all.utils import download_model
|
||||
|
||||
from khoj.processor.conversation.utils import message_to_log
|
||||
|
||||
MODEL_NAME = "llama-2-7b-chat.ggmlv3.q4_K_S.bin"
|
||||
MODEL_NAME = "llama-2-7b-chat.ggmlv3.q4_0.bin"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
@@ -128,15 +128,15 @@ def test_extract_multiple_explicit_questions_from_message(loaded_model):
|
||||
@pytest.mark.chatquality
|
||||
def test_extract_multiple_implicit_questions_from_message(loaded_model):
|
||||
# Act
|
||||
response = extract_questions_offline("Is Morpheus taller than Neo?", loaded_model=loaded_model)
|
||||
response = extract_questions_offline("Is Carl taller than Ross?", loaded_model=loaded_model)
|
||||
|
||||
# Assert
|
||||
expected_responses = ["height", "taller", "shorter", "heights"]
|
||||
expected_responses = ["height", "taller", "shorter", "heights", "who"]
|
||||
assert len(response) <= 3
|
||||
|
||||
for question in response:
|
||||
assert any([expected_response in question.lower() for expected_response in expected_responses]), (
|
||||
"Expected chat actor to ask follow-up questions about Morpheus and Neo, but got: " + question
|
||||
"Expected chat actor to ask follow-up questions about Carl and Ross, but got: " + question
|
||||
)
|
||||
|
||||
|
||||
@@ -145,7 +145,7 @@ def test_extract_multiple_implicit_questions_from_message(loaded_model):
|
||||
def test_generate_search_query_using_question_from_chat_history(loaded_model):
|
||||
# Arrange
|
||||
message_list = [
|
||||
("What is the name of Mr. Vader's daughter?", "Princess Leia", []),
|
||||
("What is the name of Mr. Anderson's daughter?", "Miss Barbara", []),
|
||||
]
|
||||
|
||||
# Act
|
||||
@@ -156,17 +156,22 @@ def test_generate_search_query_using_question_from_chat_history(loaded_model):
|
||||
use_history=True,
|
||||
)
|
||||
|
||||
expected_responses = [
|
||||
"Vader",
|
||||
"sons",
|
||||
all_expected_in_response = [
|
||||
"Anderson",
|
||||
]
|
||||
|
||||
any_expected_in_response = [
|
||||
"son",
|
||||
"Darth",
|
||||
"sons",
|
||||
"children",
|
||||
]
|
||||
|
||||
# Assert
|
||||
assert len(response) >= 1
|
||||
assert any([expected_response in response[0] for expected_response in expected_responses]), (
|
||||
assert all([expected_response in response[0] for expected_response in all_expected_in_response]), (
|
||||
"Expected chat actor to ask for clarification in response, but got: " + response[0]
|
||||
)
|
||||
assert any([expected_response in response[0] for expected_response in any_expected_in_response]), (
|
||||
"Expected chat actor to ask for clarification in response, but got: " + response[0]
|
||||
)
|
||||
|
||||
@@ -176,20 +181,20 @@ def test_generate_search_query_using_question_from_chat_history(loaded_model):
|
||||
def test_generate_search_query_using_answer_from_chat_history(loaded_model):
|
||||
# Arrange
|
||||
message_list = [
|
||||
("What is the name of Mr. Vader's daughter?", "Princess Leia", []),
|
||||
("What is the name of Mr. Anderson's daughter?", "Miss Barbara", []),
|
||||
]
|
||||
|
||||
# Act
|
||||
response = extract_questions_offline(
|
||||
"Is she a Jedi?",
|
||||
"Is she a Doctor?",
|
||||
conversation_log=populate_chat_history(message_list),
|
||||
loaded_model=loaded_model,
|
||||
use_history=True,
|
||||
)
|
||||
|
||||
expected_responses = [
|
||||
"Leia",
|
||||
"Vader",
|
||||
"Barbara",
|
||||
"Robert",
|
||||
"daughter",
|
||||
]
|
||||
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
# Standard Packages
|
||||
import json
|
||||
import os
|
||||
import base64
|
||||
|
||||
# Internal Packages
|
||||
from khoj.processor.pdf.pdf_to_jsonl import PdfToJsonl
|
||||
@@ -16,7 +15,7 @@ def test_single_page_pdf_to_jsonl():
|
||||
# Extract Entries from specified Pdf files
|
||||
# Read singlepage.pdf into memory as bytes
|
||||
with open("tests/data/pdf/singlepage.pdf", "rb") as f:
|
||||
pdf_bytes = base64.b64encode(f.read()).decode("utf-8")
|
||||
pdf_bytes = f.read()
|
||||
|
||||
data = {"tests/data/pdf/singlepage.pdf": pdf_bytes}
|
||||
entries, entry_to_file_map = PdfToJsonl.extract_pdf_entries(pdf_files=data)
|
||||
@@ -36,7 +35,7 @@ def test_multi_page_pdf_to_jsonl():
|
||||
# Act
|
||||
# Extract Entries from specified Pdf files
|
||||
with open("tests/data/pdf/multipage.pdf", "rb") as f:
|
||||
pdf_bytes = base64.b64encode(f.read()).decode("utf-8")
|
||||
pdf_bytes = f.read()
|
||||
|
||||
data = {"tests/data/pdf/multipage.pdf": pdf_bytes}
|
||||
entries, entry_to_file_map = PdfToJsonl.extract_pdf_entries(pdf_files=data)
|
||||
|
||||
@@ -1,26 +1,25 @@
|
||||
# System Packages
|
||||
import logging
|
||||
import locale
|
||||
from pathlib import Path
|
||||
import os
|
||||
|
||||
# External Packages
|
||||
import pytest
|
||||
from khoj.utils.config import SearchModels
|
||||
|
||||
# Internal Packages
|
||||
from khoj.utils.state import content_index, search_models
|
||||
from khoj.search_type import text_search
|
||||
from khoj.utils.rawconfig import ContentConfig, SearchConfig, TextContentConfig
|
||||
from khoj.processor.org_mode.org_to_jsonl import OrgToJsonl
|
||||
from khoj.processor.github.github_to_jsonl import GithubToJsonl
|
||||
from khoj.utils.config import SearchModels
|
||||
from khoj.utils.fs_syncer import get_org_files
|
||||
from khoj.utils.rawconfig import ContentConfig, SearchConfig, TextContentConfig
|
||||
|
||||
|
||||
# Test
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
def test_text_search_setup_with_missing_file_raises_error(
|
||||
org_config_with_only_new_file: TextContentConfig, search_config: SearchConfig
|
||||
):
|
||||
def test_text_search_setup_with_missing_file_raises_error(org_config_with_only_new_file: TextContentConfig):
|
||||
# Arrange
|
||||
# Ensure file mentioned in org.input-files is missing
|
||||
single_new_file = Path(org_config_with_only_new_file.input_files[0])
|
||||
@@ -29,7 +28,23 @@ def test_text_search_setup_with_missing_file_raises_error(
|
||||
# Act
|
||||
# Generate notes embeddings during asymmetric setup
|
||||
with pytest.raises(FileNotFoundError):
|
||||
data = get_org_files(org_config_with_only_new_file)
|
||||
get_org_files(org_config_with_only_new_file)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
def test_get_org_files_with_org_suffixed_dir_doesnt_raise_error(tmp_path: Path):
|
||||
# Arrange
|
||||
orgfile = tmp_path / "directory.org" / "file.org"
|
||||
orgfile.parent.mkdir()
|
||||
with open(orgfile, "w") as f:
|
||||
f.write("* Heading\n- List item\n")
|
||||
org_content_config = TextContentConfig(
|
||||
input_filter=[f"{tmp_path}/**/*"], compressed_jsonl="test.jsonl", embeddings_file="test.pt"
|
||||
)
|
||||
|
||||
# Act
|
||||
# should not raise IsADirectoryError and return orgfile
|
||||
assert get_org_files(org_content_config) == {f"{orgfile}": "* Heading\n- List item\n"}
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
@@ -48,6 +63,7 @@ def test_text_search_setup_with_empty_file_raises_error(
|
||||
def test_text_search_setup(content_config: ContentConfig, search_models: SearchModels):
|
||||
# Arrange
|
||||
data = get_org_files(content_config.org)
|
||||
|
||||
# Act
|
||||
# Regenerate notes embeddings during asymmetric setup
|
||||
notes_model = text_search.setup(
|
||||
|
||||
Reference in New Issue
Block a user