Merge branch 'master' of github.com:khoj-ai/khoj into features/big-upgrade-chat-ux

This commit is contained in:
sabaimran
2024-07-27 14:18:05 +05:30
46 changed files with 1598 additions and 1394 deletions

View File

@@ -61,7 +61,7 @@ def test_search_with_invalid_content_type(client):
@pytest.mark.django_db(transaction=True)
def test_search_with_valid_content_type(client):
headers = {"Authorization": "Bearer kk-secret"}
for content_type in ["all", "org", "markdown", "image", "pdf", "github", "notion", "plaintext", "docx"]:
for content_type in ["all", "org", "markdown", "image", "pdf", "github", "notion", "plaintext", "image", "docx"]:
# Act
response = client.get(f"/api/search?q=random&t={content_type}", headers=headers)
# Assert
@@ -127,6 +127,8 @@ def test_index_update_big_files(client):
# Arrange
state.billing_enabled = True
files = get_big_size_sample_files_data()
# Credential for the default_user, who is subscribed
headers = {"Authorization": "Bearer kk-secret"}
# Act
@@ -455,13 +457,13 @@ def test_user_no_data_returns_empty(client, sample_org_data, api_user3: KhojApiU
@pytest.mark.skipif(os.getenv("OPENAI_API_KEY") is None, reason="requires OPENAI_API_KEY")
@pytest.mark.django_db(transaction=True)
def test_chat_with_unauthenticated_user(chat_client_with_auth, api_user2: KhojApiUser):
async def test_chat_with_unauthenticated_user(chat_client_with_auth, api_user2: KhojApiUser):
# Arrange
headers = {"Authorization": f"Bearer {api_user2.token}"}
# Act
auth_response = chat_client_with_auth.get(f'/api/chat?q="Hello!"&stream=true', headers=headers)
no_auth_response = chat_client_with_auth.get(f'/api/chat?q="Hello!"&stream=true')
auth_response = chat_client_with_auth.get(f'/api/chat?q="Hello!"', headers=headers)
no_auth_response = chat_client_with_auth.get(f'/api/chat?q="Hello!"')
# Assert
assert auth_response.status_code == 200
@@ -497,7 +499,8 @@ def get_sample_files_data():
def get_big_size_sample_files_data():
big_text = "a" * (25 * 1024 * 1024) # a string of approximately 25 MB
# a string of approximately 100 MB
big_text = "a" * (100 * 1024 * 1024)
return [
(
"files",

View File

@@ -286,7 +286,7 @@ def test_answer_from_chat_history_and_currently_retrieved_content(loaded_model):
# Act
response_gen = converse_offline(
references=[
"Testatron was born on 1st April 1984 in Testville."
{"compiled": "Testatron was born on 1st April 1984 in Testville."}
], # Assume context retrieved from notes for the user_query
user_query="Where was I born?",
conversation_log=populate_chat_history(message_list),
@@ -341,14 +341,22 @@ def test_answer_requires_current_date_awareness(loaded_model):
"Chat actor should be able to answer questions relative to current date using provided notes"
# Arrange
context = [
f"""{datetime.now().strftime("%Y-%m-%d")} "Naco Taco" "Tacos for Dinner"
Expenses:Food:Dining 10.00 USD""",
f"""{datetime.now().strftime("%Y-%m-%d")} "Sagar Ratna" "Dosa for Lunch"
Expenses:Food:Dining 10.00 USD""",
f"""2020-04-01 "SuperMercado" "Bananas"
Expenses:Food:Groceries 10.00 USD""",
f"""2020-01-01 "Naco Taco" "Burittos for Dinner"
Expenses:Food:Dining 10.00 USD""",
{
"compiled": f"""{datetime.now().strftime("%Y-%m-%d")} "Naco Taco" "Tacos for Dinner"
Expenses:Food:Dining 10.00 USD"""
},
{
"compiled": f"""{datetime.now().strftime("%Y-%m-%d")} "Sagar Ratna" "Dosa for Lunch"
Expenses:Food:Dining 10.00 USD"""
},
{
"compiled": f"""2020-04-01 "SuperMercado" "Bananas"
Expenses:Food:Groceries 10.00 USD"""
},
{
"compiled": f"""2020-01-01 "Naco Taco" "Burittos for Dinner"
Expenses:Food:Dining 10.00 USD"""
},
]
# Act
@@ -373,14 +381,22 @@ def test_answer_requires_date_aware_aggregation_across_provided_notes(loaded_mod
"Chat actor should be able to answer questions that require date aware aggregation across multiple notes"
# Arrange
context = [
f"""# {datetime.now().strftime("%Y-%m-%d")} "Naco Taco" "Tacos for Dinner"
Expenses:Food:Dining 10.00 USD""",
f"""{datetime.now().strftime("%Y-%m-%d")} "Sagar Ratna" "Dosa for Lunch"
Expenses:Food:Dining 10.00 USD""",
f"""2020-04-01 "SuperMercado" "Bananas"
Expenses:Food:Groceries 10.00 USD""",
f"""2020-01-01 "Naco Taco" "Burittos for Dinner"
Expenses:Food:Dining 10.00 USD""",
{
"compiled": f"""# {datetime.now().strftime("%Y-%m-%d")} "Naco Taco" "Tacos for Dinner"
Expenses:Food:Dining 10.00 USD"""
},
{
"compiled": f"""{datetime.now().strftime("%Y-%m-%d")} "Sagar Ratna" "Dosa for Lunch"
Expenses:Food:Dining 10.00 USD"""
},
{
"compiled": f"""2020-04-01 "SuperMercado" "Bananas"
Expenses:Food:Groceries 10.00 USD"""
},
{
"compiled": f"""2020-01-01 "Naco Taco" "Burittos for Dinner"
Expenses:Food:Dining 10.00 USD"""
},
]
# Act
@@ -430,12 +446,18 @@ def test_ask_for_clarification_if_not_enough_context_in_question(loaded_model):
"Chat actor should ask for clarification if question cannot be answered unambiguously with the provided context"
# Arrange
context = [
f"""# Ramya
My sister, Ramya, is married to Kali Devi. They have 2 kids, Ravi and Rani.""",
f"""# Fang
My sister, Fang Liu is married to Xi Li. They have 1 kid, Xiao Li.""",
f"""# Aiyla
My sister, Aiyla is married to Tolga. They have 3 kids, Yildiz, Ali and Ahmet.""",
{
"compiled": f"""# Ramya
My sister, Ramya, is married to Kali Devi. They have 2 kids, Ravi and Rani."""
},
{
"compiled": f"""# Fang
My sister, Fang Liu is married to Xi Li. They have 1 kid, Xiao Li."""
},
{
"compiled": f"""# Aiyla
My sister, Aiyla is married to Tolga. They have 3 kids, Yildiz, Ali and Ahmet."""
},
]
# Act
@@ -459,9 +481,9 @@ def test_agent_prompt_should_be_used(loaded_model, offline_agent):
"Chat actor should ask be tuned to think like an accountant based on the agent definition"
# Arrange
context = [
f"""I went to the store and bought some bananas for 2.20""",
f"""I went to the store and bought some apples for 1.30""",
f"""I went to the store and bought some oranges for 6.00""",
{"compiled": f"""I went to the store and bought some bananas for 2.20"""},
{"compiled": f"""I went to the store and bought some apples for 1.30"""},
{"compiled": f"""I went to the store and bought some oranges for 6.00"""},
]
# Act
@@ -499,7 +521,7 @@ def test_chat_does_not_exceed_prompt_size(loaded_model):
"Ensure chat context and response together do not exceed max prompt size for the model"
# Arrange
prompt_size_exceeded_error = "ERROR: The prompt size exceeds the context window size and cannot be processed"
context = [" ".join([f"{number}" for number in range(2043)])]
context = [{"compiled": " ".join([f"{number}" for number in range(2043)])}]
# Act
response_gen = converse_offline(
@@ -530,7 +552,7 @@ def test_filter_questions():
# ----------------------------------------------------------------------------------------------------
@pytest.mark.anyio
@pytest.mark.django_db(transaction=True)
async def test_use_default_response_mode(client_offline_chat):
async def test_use_text_response_mode(client_offline_chat):
# Arrange
user_query = "What's the latest in the Israel/Palestine conflict?"
@@ -538,7 +560,7 @@ async def test_use_default_response_mode(client_offline_chat):
mode = await aget_relevant_output_modes(user_query, {})
# Assert
assert mode.value == "default"
assert mode.value == "text"
# ----------------------------------------------------------------------------------------------------

View File

@@ -45,7 +45,6 @@ def create_conversation(message_list, user, agent=None):
# Tests
# ----------------------------------------------------------------------------------------------------
@pytest.mark.xfail(AssertionError, reason="Chat director not capable of answering this question yet")
@pytest.mark.chatquality
@pytest.mark.django_db(transaction=True)
def test_offline_chat_with_no_chat_history_or_retrieved_content(client_offline_chat):
@@ -68,10 +67,8 @@ def test_chat_with_online_content(client_offline_chat):
# Act
q = "/online give me the link to paul graham's essay how to do great work"
encoded_q = quote(q, safe="")
response = client_offline_chat.get(f"/api/chat?q={encoded_q}&stream=true")
response_message = response.content.decode("utf-8")
response_message = response_message.split("### compiled references")[0]
response = client_offline_chat.get(f"/api/chat?q={encoded_q}")
response_message = response.json()["response"]
# Assert
expected_responses = [
@@ -92,10 +89,8 @@ def test_chat_with_online_webpage_content(client_offline_chat):
# Act
q = "/online how many firefighters were involved in the great chicago fire and which year did it take place?"
encoded_q = quote(q, safe="")
response = client_offline_chat.get(f"/api/chat?q={encoded_q}&stream=true")
response_message = response.content.decode("utf-8")
response_message = response_message.split("### compiled references")[0]
response = client_offline_chat.get(f"/api/chat?q={encoded_q}")
response_message = response.json()["response"]
# Assert
expected_responses = ["185", "1871", "horse"]
@@ -179,10 +174,6 @@ def test_answer_from_chat_history_and_previously_retrieved_content(client_offlin
# ----------------------------------------------------------------------------------------------------
@pytest.mark.xfail(
AssertionError,
reason="Chat director not capable of answering this question yet because it requires extract_questions",
)
@pytest.mark.chatquality
@pytest.mark.django_db(transaction=True)
def test_answer_from_chat_history_and_currently_retrieved_content(client_offline_chat, default_user2):

View File

@@ -49,8 +49,8 @@ def create_conversation(message_list, user, agent=None):
@pytest.mark.django_db(transaction=True)
def test_chat_with_no_chat_history_or_retrieved_content(chat_client):
# Act
response = chat_client.get(f'/api/chat?q="Hello, my name is Testatron. Who are you?"&stream=true')
response_message = response.content.decode("utf-8")
response = chat_client.get(f'/api/chat?q="Hello, my name is Testatron. Who are you?"')
response_message = response.json()["response"]
# Assert
expected_responses = ["Khoj", "khoj"]
@@ -67,10 +67,8 @@ def test_chat_with_online_content(chat_client):
# Act
q = "/online give me the link to paul graham's essay how to do great work"
encoded_q = quote(q, safe="")
response = chat_client.get(f"/api/chat?q={encoded_q}&stream=true")
response_message = response.content.decode("utf-8")
response_message = response_message.split("### compiled references")[0]
response = chat_client.get(f"/api/chat?q={encoded_q}")
response_message = response.json()["response"]
# Assert
expected_responses = [
@@ -91,10 +89,8 @@ def test_chat_with_online_webpage_content(chat_client):
# Act
q = "/online how many firefighters were involved in the great chicago fire and which year did it take place?"
encoded_q = quote(q, safe="")
response = chat_client.get(f"/api/chat?q={encoded_q}&stream=true")
response_message = response.content.decode("utf-8")
response_message = response_message.split("### compiled references")[0]
response = chat_client.get(f"/api/chat?q={encoded_q}")
response_message = response.json()["response"]
# Assert
expected_responses = ["185", "1871", "horse"]
@@ -144,7 +140,7 @@ def test_answer_from_currently_retrieved_content(chat_client, default_user2: Kho
# Act
response = chat_client.get(f'/api/chat?q="Where was Xi Li born?"')
response_message = response.content.decode("utf-8")
response_message = response.json()["response"]
# Assert
assert response.status_code == 200
@@ -168,7 +164,7 @@ def test_answer_from_chat_history_and_previously_retrieved_content(chat_client_n
# Act
response = chat_client_no_background.get(f'/api/chat?q="Where was I born?"')
response_message = response.content.decode("utf-8")
response_message = response.json()["response"]
# Assert
assert response.status_code == 200
@@ -191,7 +187,7 @@ def test_answer_from_chat_history_and_currently_retrieved_content(chat_client, d
# Act
response = chat_client.get(f'/api/chat?q="Where was I born?"')
response_message = response.content.decode("utf-8")
response_message = response.json()["response"]
# Assert
assert response.status_code == 200
@@ -215,8 +211,8 @@ def test_no_answer_in_chat_history_or_retrieved_content(chat_client, default_use
create_conversation(message_list, default_user2)
# Act
response = chat_client.get(f'/api/chat?q="Where was I born?"&stream=true')
response_message = response.content.decode("utf-8")
response = chat_client.get(f'/api/chat?q="Where was I born?"')
response_message = response.json()["response"]
# Assert
expected_responses = [
@@ -226,6 +222,7 @@ def test_no_answer_in_chat_history_or_retrieved_content(chat_client, default_use
"do not have",
"don't have",
"where were you born?",
"where you were born?",
]
assert response.status_code == 200
@@ -280,8 +277,8 @@ def test_answer_not_known_using_notes_command(chat_client_no_background, default
create_conversation(message_list, default_user2)
# Act
response = chat_client_no_background.get(f"/api/chat?q={query}&stream=true")
response_message = response.content.decode("utf-8")
response = chat_client_no_background.get(f"/api/chat?q={query}")
response_message = response.json()["response"]
# Assert
assert response.status_code == 200
@@ -527,8 +524,8 @@ def test_answer_general_question_not_in_chat_history_or_retrieved_content(chat_c
create_conversation(message_list, default_user2)
# Act
response = chat_client.get(f'/api/chat?q="Write a haiku about unit testing. Do not say anything else."&stream=true')
response_message = response.content.decode("utf-8").split("### compiled references")[0]
response = chat_client.get(f'/api/chat?q="Write a haiku about unit testing. Do not say anything else.')
response_message = response.json()["response"]
# Assert
expected_responses = ["test", "Test"]
@@ -544,9 +541,8 @@ def test_answer_general_question_not_in_chat_history_or_retrieved_content(chat_c
@pytest.mark.chatquality
def test_ask_for_clarification_if_not_enough_context_in_question(chat_client_no_background):
# Act
response = chat_client_no_background.get(f'/api/chat?q="What is the name of Namitas older son?"&stream=true')
response_message = response.content.decode("utf-8").split("### compiled references")[0].lower()
response = chat_client_no_background.get(f'/api/chat?q="What is the name of Namitas older son?"')
response_message = response.json()["response"].lower()
# Assert
expected_responses = [
@@ -658,8 +654,8 @@ def test_answer_in_chat_history_by_conversation_id_with_agent(
def test_answer_requires_multiple_independent_searches(chat_client):
"Chat director should be able to answer by doing multiple independent searches for required information"
# Act
response = chat_client.get(f'/api/chat?q="Is Xi older than Namita? Just the older persons full name"&stream=true')
response_message = response.content.decode("utf-8").split("### compiled references")[0].lower()
response = chat_client.get(f'/api/chat?q="Is Xi older than Namita? Just the older persons full name"')
response_message = response.json()["response"].lower()
# Assert
expected_responses = ["he is older than namita", "xi is older than namita", "xi li is older than namita"]
@@ -683,8 +679,8 @@ def test_answer_using_file_filter(chat_client):
'Is Xi older than Namita? Just say the older persons full name. file:"Namita.markdown" file:"Xi Li.markdown"'
)
response = chat_client.get(f"/api/chat?q={query}&stream=true")
response_message = response.content.decode("utf-8").split("### compiled references")[0].lower()
response = chat_client.get(f"/api/chat?q={query}")
response_message = response.json()["response"].lower()
# Assert
expected_responses = ["he is older than namita", "xi is older than namita", "xi li is older than namita"]