Merge branch 'master' of github.com:khoj-ai/khoj into features/big-upgrade-chat-ux

2026-03-08 05:39:13 +00:00 · 2024-07-27 14:18:05 +05:30
parent 1685c60e3c eb5af38f33
commit 1a1d9c7257
46 changed files with 1598 additions and 1394 deletions
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -61,7 +61,7 @@ def test_search_with_invalid_content_type(client):
@pytest.mark.django_db(transaction=True)
 def test_search_with_valid_content_type(client):
    headers = {"Authorization": "Bearer kk-secret"}
-    for content_type in ["all", "org", "markdown", "image", "pdf", "github", "notion", "plaintext", "docx"]:
+    for content_type in ["all", "org", "markdown", "image", "pdf", "github", "notion", "plaintext", "image", "docx"]:
        # Act
        response = client.get(f"/api/search?q=random&t={content_type}", headers=headers)
        # Assert
@@ -127,6 +127,8 @@ def test_index_update_big_files(client):
    # Arrange
    state.billing_enabled = True
    files = get_big_size_sample_files_data()
+
+    # Credential for the default_user, who is subscribed
    headers = {"Authorization": "Bearer kk-secret"}

    # Act
@@ -455,13 +457,13 @@ def test_user_no_data_returns_empty(client, sample_org_data, api_user3: KhojApiU

@pytest.mark.skipif(os.getenv("OPENAI_API_KEY") is None, reason="requires OPENAI_API_KEY")
@pytest.mark.django_db(transaction=True)
-def test_chat_with_unauthenticated_user(chat_client_with_auth, api_user2: KhojApiUser):
+async def test_chat_with_unauthenticated_user(chat_client_with_auth, api_user2: KhojApiUser):
    # Arrange
    headers = {"Authorization": f"Bearer {api_user2.token}"}

    # Act
-    auth_response = chat_client_with_auth.get(f'/api/chat?q="Hello!"&stream=true', headers=headers)
-    no_auth_response = chat_client_with_auth.get(f'/api/chat?q="Hello!"&stream=true')
+    auth_response = chat_client_with_auth.get(f'/api/chat?q="Hello!"', headers=headers)
+    no_auth_response = chat_client_with_auth.get(f'/api/chat?q="Hello!"')

    # Assert
    assert auth_response.status_code == 200
@@ -497,7 +499,8 @@ def get_sample_files_data():


 def get_big_size_sample_files_data():
-    big_text = "a" * (25 * 1024 * 1024)  # a string of approximately 25 MB
+    # a string of approximately 100 MB
+    big_text = "a" * (100 * 1024 * 1024)
    return [
        (
            "files",
--- a/tests/test_offline_chat_actors.py
+++ b/tests/test_offline_chat_actors.py
@@ -286,7 +286,7 @@ def test_answer_from_chat_history_and_currently_retrieved_content(loaded_model):
    # Act
    response_gen = converse_offline(
        references=[
-            "Testatron was born on 1st April 1984 in Testville."
+            {"compiled": "Testatron was born on 1st April 1984 in Testville."}
        ],  # Assume context retrieved from notes for the user_query
        user_query="Where was I born?",
        conversation_log=populate_chat_history(message_list),
@@ -341,14 +341,22 @@ def test_answer_requires_current_date_awareness(loaded_model):
    "Chat actor should be able to answer questions relative to current date using provided notes"
    # Arrange
    context = [
-        f"""{datetime.now().strftime("%Y-%m-%d")} "Naco Taco" "Tacos for Dinner"
-Expenses:Food:Dining  10.00 USD""",
-        f"""{datetime.now().strftime("%Y-%m-%d")} "Sagar Ratna" "Dosa for Lunch"
-Expenses:Food:Dining  10.00 USD""",
-        f"""2020-04-01 "SuperMercado" "Bananas"
-Expenses:Food:Groceries  10.00 USD""",
-        f"""2020-01-01 "Naco Taco" "Burittos for Dinner"
-Expenses:Food:Dining  10.00 USD""",
+        {
+            "compiled": f"""{datetime.now().strftime("%Y-%m-%d")} "Naco Taco" "Tacos for Dinner"
+Expenses:Food:Dining  10.00 USD"""
+        },
+        {
+            "compiled": f"""{datetime.now().strftime("%Y-%m-%d")} "Sagar Ratna" "Dosa for Lunch"
+Expenses:Food:Dining  10.00 USD"""
+        },
+        {
+            "compiled": f"""2020-04-01 "SuperMercado" "Bananas"
+Expenses:Food:Groceries  10.00 USD"""
+        },
+        {
+            "compiled": f"""2020-01-01 "Naco Taco" "Burittos for Dinner"
+Expenses:Food:Dining  10.00 USD"""
+        },
    ]

    # Act
@@ -373,14 +381,22 @@ def test_answer_requires_date_aware_aggregation_across_provided_notes(loaded_mod
    "Chat actor should be able to answer questions that require date aware aggregation across multiple notes"
    # Arrange
    context = [
-        f"""# {datetime.now().strftime("%Y-%m-%d")} "Naco Taco" "Tacos for Dinner"
-Expenses:Food:Dining  10.00 USD""",
-        f"""{datetime.now().strftime("%Y-%m-%d")} "Sagar Ratna" "Dosa for Lunch"
-Expenses:Food:Dining  10.00 USD""",
-        f"""2020-04-01 "SuperMercado" "Bananas"
-Expenses:Food:Groceries  10.00 USD""",
-        f"""2020-01-01 "Naco Taco" "Burittos for Dinner"
-Expenses:Food:Dining  10.00 USD""",
+        {
+            "compiled": f"""# {datetime.now().strftime("%Y-%m-%d")} "Naco Taco" "Tacos for Dinner"
+Expenses:Food:Dining  10.00 USD"""
+        },
+        {
+            "compiled": f"""{datetime.now().strftime("%Y-%m-%d")} "Sagar Ratna" "Dosa for Lunch"
+Expenses:Food:Dining  10.00 USD"""
+        },
+        {
+            "compiled": f"""2020-04-01 "SuperMercado" "Bananas"
+Expenses:Food:Groceries  10.00 USD"""
+        },
+        {
+            "compiled": f"""2020-01-01 "Naco Taco" "Burittos for Dinner"
+Expenses:Food:Dining  10.00 USD"""
+        },
    ]

    # Act
@@ -430,12 +446,18 @@ def test_ask_for_clarification_if_not_enough_context_in_question(loaded_model):
    "Chat actor should ask for clarification if question cannot be answered unambiguously with the provided context"
    # Arrange
    context = [
-        f"""# Ramya
-My sister, Ramya, is married to Kali Devi. They have 2 kids, Ravi and Rani.""",
-        f"""# Fang
-My sister, Fang Liu is married to Xi Li. They have 1 kid, Xiao Li.""",
-        f"""# Aiyla
-My sister, Aiyla is married to Tolga. They have 3 kids, Yildiz, Ali and Ahmet.""",
+        {
+            "compiled": f"""# Ramya
+My sister, Ramya, is married to Kali Devi. They have 2 kids, Ravi and Rani."""
+        },
+        {
+            "compiled": f"""# Fang
+My sister, Fang Liu is married to Xi Li. They have 1 kid, Xiao Li."""
+        },
+        {
+            "compiled": f"""# Aiyla
+My sister, Aiyla is married to Tolga. They have 3 kids, Yildiz, Ali and Ahmet."""
+        },
    ]

    # Act
@@ -459,9 +481,9 @@ def test_agent_prompt_should_be_used(loaded_model, offline_agent):
    "Chat actor should ask be tuned to think like an accountant based on the agent definition"
    # Arrange
    context = [
-        f"""I went to the store and bought some bananas for 2.20""",
-        f"""I went to the store and bought some apples for 1.30""",
-        f"""I went to the store and bought some oranges for 6.00""",
+        {"compiled": f"""I went to the store and bought some bananas for 2.20"""},
+        {"compiled": f"""I went to the store and bought some apples for 1.30"""},
+        {"compiled": f"""I went to the store and bought some oranges for 6.00"""},
    ]

    # Act
@@ -499,7 +521,7 @@ def test_chat_does_not_exceed_prompt_size(loaded_model):
    "Ensure chat context and response together do not exceed max prompt size for the model"
    # Arrange
    prompt_size_exceeded_error = "ERROR: The prompt size exceeds the context window size and cannot be processed"
-    context = [" ".join([f"{number}" for number in range(2043)])]
+    context = [{"compiled": " ".join([f"{number}" for number in range(2043)])}]

    # Act
    response_gen = converse_offline(
@@ -530,7 +552,7 @@ def test_filter_questions():
 # ----------------------------------------------------------------------------------------------------
@pytest.mark.anyio
@pytest.mark.django_db(transaction=True)
-async def test_use_default_response_mode(client_offline_chat):
+async def test_use_text_response_mode(client_offline_chat):
    # Arrange
    user_query = "What's the latest in the Israel/Palestine conflict?"

@@ -538,7 +560,7 @@ async def test_use_default_response_mode(client_offline_chat):
    mode = await aget_relevant_output_modes(user_query, {})

    # Assert
-    assert mode.value == "default"
+    assert mode.value == "text"


 # ----------------------------------------------------------------------------------------------------
--- a/tests/test_offline_chat_director.py
+++ b/tests/test_offline_chat_director.py
@@ -45,7 +45,6 @@ def create_conversation(message_list, user, agent=None):

 # Tests
 # ----------------------------------------------------------------------------------------------------
-@pytest.mark.xfail(AssertionError, reason="Chat director not capable of answering this question yet")
@pytest.mark.chatquality
@pytest.mark.django_db(transaction=True)
 def test_offline_chat_with_no_chat_history_or_retrieved_content(client_offline_chat):
@@ -68,10 +67,8 @@ def test_chat_with_online_content(client_offline_chat):
    # Act
    q = "/online give me the link to paul graham's essay how to do great work"
    encoded_q = quote(q, safe="")
-    response = client_offline_chat.get(f"/api/chat?q={encoded_q}&stream=true")
-    response_message = response.content.decode("utf-8")
-
-    response_message = response_message.split("### compiled references")[0]
+    response = client_offline_chat.get(f"/api/chat?q={encoded_q}")
+    response_message = response.json()["response"]

    # Assert
    expected_responses = [
@@ -92,10 +89,8 @@ def test_chat_with_online_webpage_content(client_offline_chat):
    # Act
    q = "/online how many firefighters were involved in the great chicago fire and which year did it take place?"
    encoded_q = quote(q, safe="")
-    response = client_offline_chat.get(f"/api/chat?q={encoded_q}&stream=true")
-    response_message = response.content.decode("utf-8")
-
-    response_message = response_message.split("### compiled references")[0]
+    response = client_offline_chat.get(f"/api/chat?q={encoded_q}")
+    response_message = response.json()["response"]

    # Assert
    expected_responses = ["185", "1871", "horse"]
@@ -179,10 +174,6 @@ def test_answer_from_chat_history_and_previously_retrieved_content(client_offlin


 # ----------------------------------------------------------------------------------------------------
-@pytest.mark.xfail(
-    AssertionError,
-    reason="Chat director not capable of answering this question yet because it requires extract_questions",
-)
@pytest.mark.chatquality
@pytest.mark.django_db(transaction=True)
 def test_answer_from_chat_history_and_currently_retrieved_content(client_offline_chat, default_user2):
--- a/tests/test_openai_chat_director.py
+++ b/tests/test_openai_chat_director.py
@@ -49,8 +49,8 @@ def create_conversation(message_list, user, agent=None):
@pytest.mark.django_db(transaction=True)
 def test_chat_with_no_chat_history_or_retrieved_content(chat_client):
    # Act
-    response = chat_client.get(f'/api/chat?q="Hello, my name is Testatron. Who are you?"&stream=true')
-    response_message = response.content.decode("utf-8")
+    response = chat_client.get(f'/api/chat?q="Hello, my name is Testatron. Who are you?"')
+    response_message = response.json()["response"]

    # Assert
    expected_responses = ["Khoj", "khoj"]
@@ -67,10 +67,8 @@ def test_chat_with_online_content(chat_client):
    # Act
    q = "/online give me the link to paul graham's essay how to do great work"
    encoded_q = quote(q, safe="")
-    response = chat_client.get(f"/api/chat?q={encoded_q}&stream=true")
-    response_message = response.content.decode("utf-8")
-
-    response_message = response_message.split("### compiled references")[0]
+    response = chat_client.get(f"/api/chat?q={encoded_q}")
+    response_message = response.json()["response"]

    # Assert
    expected_responses = [
@@ -91,10 +89,8 @@ def test_chat_with_online_webpage_content(chat_client):
    # Act
    q = "/online how many firefighters were involved in the great chicago fire and which year did it take place?"
    encoded_q = quote(q, safe="")
-    response = chat_client.get(f"/api/chat?q={encoded_q}&stream=true")
-    response_message = response.content.decode("utf-8")
-
-    response_message = response_message.split("### compiled references")[0]
+    response = chat_client.get(f"/api/chat?q={encoded_q}")
+    response_message = response.json()["response"]

    # Assert
    expected_responses = ["185", "1871", "horse"]
@@ -144,7 +140,7 @@ def test_answer_from_currently_retrieved_content(chat_client, default_user2: Kho

    # Act
    response = chat_client.get(f'/api/chat?q="Where was Xi Li born?"')
-    response_message = response.content.decode("utf-8")
+    response_message = response.json()["response"]

    # Assert
    assert response.status_code == 200
@@ -168,7 +164,7 @@ def test_answer_from_chat_history_and_previously_retrieved_content(chat_client_n

    # Act
    response = chat_client_no_background.get(f'/api/chat?q="Where was I born?"')
-    response_message = response.content.decode("utf-8")
+    response_message = response.json()["response"]

    # Assert
    assert response.status_code == 200
@@ -191,7 +187,7 @@ def test_answer_from_chat_history_and_currently_retrieved_content(chat_client, d

    # Act
    response = chat_client.get(f'/api/chat?q="Where was I born?"')
-    response_message = response.content.decode("utf-8")
+    response_message = response.json()["response"]

    # Assert
    assert response.status_code == 200
@@ -215,8 +211,8 @@ def test_no_answer_in_chat_history_or_retrieved_content(chat_client, default_use
    create_conversation(message_list, default_user2)

    # Act
-    response = chat_client.get(f'/api/chat?q="Where was I born?"&stream=true')
-    response_message = response.content.decode("utf-8")
+    response = chat_client.get(f'/api/chat?q="Where was I born?"')
+    response_message = response.json()["response"]

    # Assert
    expected_responses = [
@@ -226,6 +222,7 @@ def test_no_answer_in_chat_history_or_retrieved_content(chat_client, default_use
        "do not have",
        "don't have",
        "where were you born?",
+        "where you were born?",
    ]

    assert response.status_code == 200
@@ -280,8 +277,8 @@ def test_answer_not_known_using_notes_command(chat_client_no_background, default
    create_conversation(message_list, default_user2)

    # Act
-    response = chat_client_no_background.get(f"/api/chat?q={query}&stream=true")
-    response_message = response.content.decode("utf-8")
+    response = chat_client_no_background.get(f"/api/chat?q={query}")
+    response_message = response.json()["response"]

    # Assert
    assert response.status_code == 200
@@ -527,8 +524,8 @@ def test_answer_general_question_not_in_chat_history_or_retrieved_content(chat_c
    create_conversation(message_list, default_user2)

    # Act
-    response = chat_client.get(f'/api/chat?q="Write a haiku about unit testing. Do not say anything else."&stream=true')
-    response_message = response.content.decode("utf-8").split("### compiled references")[0]
+    response = chat_client.get(f'/api/chat?q="Write a haiku about unit testing. Do not say anything else.')
+    response_message = response.json()["response"]

    # Assert
    expected_responses = ["test", "Test"]
@@ -544,9 +541,8 @@ def test_answer_general_question_not_in_chat_history_or_retrieved_content(chat_c
@pytest.mark.chatquality
 def test_ask_for_clarification_if_not_enough_context_in_question(chat_client_no_background):
    # Act
-
-    response = chat_client_no_background.get(f'/api/chat?q="What is the name of Namitas older son?"&stream=true')
-    response_message = response.content.decode("utf-8").split("### compiled references")[0].lower()
+    response = chat_client_no_background.get(f'/api/chat?q="What is the name of Namitas older son?"')
+    response_message = response.json()["response"].lower()

    # Assert
    expected_responses = [
@@ -658,8 +654,8 @@ def test_answer_in_chat_history_by_conversation_id_with_agent(
 def test_answer_requires_multiple_independent_searches(chat_client):
    "Chat director should be able to answer by doing multiple independent searches for required information"
    # Act
-    response = chat_client.get(f'/api/chat?q="Is Xi older than Namita? Just the older persons full name"&stream=true')
-    response_message = response.content.decode("utf-8").split("### compiled references")[0].lower()
+    response = chat_client.get(f'/api/chat?q="Is Xi older than Namita? Just the older persons full name"')
+    response_message = response.json()["response"].lower()

    # Assert
    expected_responses = ["he is older than namita", "xi is older than namita", "xi li is older than namita"]
@@ -683,8 +679,8 @@ def test_answer_using_file_filter(chat_client):
        'Is Xi older than Namita? Just say the older persons full name. file:"Namita.markdown" file:"Xi Li.markdown"'
    )

-    response = chat_client.get(f"/api/chat?q={query}&stream=true")
-    response_message = response.content.decode("utf-8").split("### compiled references")[0].lower()
+    response = chat_client.get(f"/api/chat?q={query}")
+    response_message = response.json()["response"].lower()

    # Assert
    expected_responses = ["he is older than namita", "xi is older than namita", "xi li is older than namita"]