Improve Khoj Chat in Emacs, Server

Merge pull request #192 from debanjum/improvements-to-khoj-chat-in-emacs

### Khoj Chat on Emacs Improvements
- d78454d Load Khoj Chat buffer before asking for query to provide context
- 93e2aff Use org footnotes to add references, allows jump to def on click
- 5e9558d Stylize reference links as superscripts and show definition on hover
- bc71c19 Use `m` or `C-x m` in-buffer keybindings to send messages to Khoj

### Khoj Chat Server Improvements
- 27217a3 Time chat API sub-components for performance analysis
- 508b217 Update Chat API, Logs, Interfaces to store, use references as list
- d4b3866 Truncate message logs to below max supported prompt size by chat model
- cf28f10 Register separate timestamps for user query and response by Khoj Chat
This commit is contained in:
Debanjum
2023-03-25 05:49:27 +07:00
committed by GitHub
8 changed files with 216 additions and 131 deletions

View File

@@ -41,6 +41,7 @@ dependencies = [
"fastapi == 0.77.1", "fastapi == 0.77.1",
"jinja2 == 3.1.2", "jinja2 == 3.1.2",
"openai >= 0.27.0", "openai >= 0.27.0",
"tiktoken >= 0.3.0",
"pillow == 9.3.0", "pillow == 9.3.0",
"pydantic == 1.9.1", "pydantic == 1.9.1",
"pyqt6 == 6.3.1", "pyqt6 == 6.3.1",

View File

@@ -109,6 +109,8 @@
(defvar khoj--content-type "org" (defvar khoj--content-type "org"
"The type of content to perform search on.") "The type of content to perform search on.")
(declare-function org-element-property "org-mode" (PROPERTY ELEMENT))
(declare-function org-element-type "org-mode" (ELEMENT))
(declare-function beancount-mode "beancount" ()) (declare-function beancount-mode "beancount" ())
(declare-function markdown-mode "markdown-mode" ()) (declare-function markdown-mode "markdown-mode" ())
(declare-function org-music-mode "org-music" ()) (declare-function org-music-mode "org-music" ())
@@ -134,6 +136,7 @@ NO-PAGING FILTER))
"C-x M | music\n")))) "C-x M | music\n"))))
(defvar khoj--rerank nil "Track when re-rank of results triggered.") (defvar khoj--rerank nil "Track when re-rank of results triggered.")
(defvar khoj--reference-count 0 "Track number of references currently in chat bufffer.")
(defun khoj--search-markdown () "Set content-type to `markdown'." (interactive) (setq khoj--content-type "markdown")) (defun khoj--search-markdown () "Set content-type to `markdown'." (interactive) (setq khoj--content-type "markdown"))
(defun khoj--search-org () "Set content-type to `org-mode'." (interactive) (setq khoj--content-type "org")) (defun khoj--search-org () "Set content-type to `org-mode'." (interactive) (setq khoj--content-type "org"))
(defun khoj--search-ledger () "Set content-type to `ledger'." (interactive) (setq khoj--content-type "ledger")) (defun khoj--search-ledger () "Set content-type to `ledger'." (interactive) (setq khoj--content-type "ledger"))
@@ -335,15 +338,22 @@ Render results in BUFFER-NAME using QUERY, CONTENT-TYPE."
(defun khoj--chat () (defun khoj--chat ()
"Chat with Khoj." "Chat with Khoj."
(interactive)
(when (not (get-buffer khoj--chat-buffer-name))
(khoj--load-chat-history khoj--chat-buffer-name))
(switch-to-buffer khoj--chat-buffer-name)
(let ((query (read-string "Query: "))) (let ((query (read-string "Query: ")))
(khoj--query-chat-api-and-render-messages query khoj--chat-buffer-name) (when (not (string-empty-p query))
(switch-to-buffer khoj--chat-buffer-name))) (khoj--query-chat-api-and-render-messages query khoj--chat-buffer-name))))
(defun khoj--load-chat-history (buffer-name) (defun khoj--load-chat-history (buffer-name)
"Load Khoj Chat conversation history into BUFFER-NAME."
(let ((json-response (cdr (assoc 'response (khoj--query-chat-api ""))))) (let ((json-response (cdr (assoc 'response (khoj--query-chat-api "")))))
(with-current-buffer (get-buffer-create buffer-name) (with-current-buffer (get-buffer-create buffer-name)
(erase-buffer) (erase-buffer)
(insert "#+STARTUP: showall hidestars\n") (insert "#+STARTUP: showall hidestars\n")
;; allow sub, superscript text within {} for footnotes
(insert "#+OPTIONS: ^:{}\n")
(thread-last (thread-last
json-response json-response
;; generate chat messages from Khoj Chat API response ;; generate chat messages from Khoj Chat API response
@@ -352,23 +362,53 @@ Render results in BUFFER-NAME using QUERY, CONTENT-TYPE."
(mapc #'insert)) (mapc #'insert))
(progn (org-mode) (progn (org-mode)
(visual-line-mode) (visual-line-mode)
(khoj--add-hover-text-to-footnote-refs (point-min))
(use-local-map (copy-keymap org-mode-map))
(local-set-key (kbd "m") #'khoj--chat)
(local-set-key (kbd "C-x m") #'khoj--chat)
(read-only-mode t))))) (read-only-mode t)))))
(defun khoj--add-hover-text-to-footnote-refs (start-pos)
"Show footnote defs on mouse hover on footnote refs from START-POS."
(org-with-wide-buffer
(goto-char start-pos)
(while (re-search-forward org-footnote-re nil t)
(backward-char)
(let* ((context (org-element-context))
(label (org-element-property :label context))
(footnote-def (nth 3 (org-footnote-get-definition label)))
(footnote-width (if (< (length footnote-def) 70) nil 70))
(begin-pos (org-element-property :begin context))
(end-pos (org-element-property :end context))
(overlay (make-overlay begin-pos end-pos)))
(when (memq (org-element-type context)
'(footnote-reference))
(-->
footnote-def
;; truncate footnote definition if required
(substring it 0 footnote-width)
;; append continuation suffix if truncated
(concat it (if footnote-width "..." ""))
;; show definition on hover on footnote reference
(overlay-put overlay 'help-echo it)))))))
(defun khoj--query-chat-api-and-render-messages (query buffer-name) (defun khoj--query-chat-api-and-render-messages (query buffer-name)
"Send QUERY to Khoj Chat. Render the chat messages from exchange in BUFFER-NAME." "Send QUERY to Khoj Chat. Render the chat messages from exchange in BUFFER-NAME."
;; render json response into formatted chat messages ;; render json response into formatted chat messages
(if (not (get-buffer buffer-name))
(khoj--load-chat-history buffer-name)
(with-current-buffer (get-buffer buffer-name) (with-current-buffer (get-buffer buffer-name)
(let ((inhibit-read-only t) (let ((inhibit-read-only t)
(new-content-start-pos (point-max))
(query-time (format-time-string "%F %T"))
(json-response (khoj--query-chat-api query))) (json-response (khoj--query-chat-api query)))
(goto-char (point-max)) (goto-char new-content-start-pos)
(insert (insert
(khoj--render-chat-message query "you") (khoj--render-chat-message query "you" query-time)
(khoj--render-chat-response json-response))) (khoj--render-chat-response json-response))
(progn (org-mode) (khoj--add-hover-text-to-footnote-refs new-content-start-pos))
(visual-line-mode)) (progn
(read-only-mode t)))) (org-set-startup-visibility)
(visual-line-mode)
(re-search-backward "^\*+ 🦅" nil t))))
(defun khoj--query-chat-api (query) (defun khoj--query-chat-api (query)
"Send QUERY to Khoj Chat API." "Send QUERY to Khoj Chat API."
@@ -388,37 +428,46 @@ RECEIVE-DATE is the message receive date."
(let ((first-message-line (car (split-string message "\n" t))) (let ((first-message-line (car (split-string message "\n" t)))
(rest-message-lines (string-join (cdr (split-string message "\n" t)) "\n")) (rest-message-lines (string-join (cdr (split-string message "\n" t)) "\n"))
(heading-level (if (equal sender "you") "**" "***")) (heading-level (if (equal sender "you") "**" "***"))
(emojified-by (if (equal sender "you") "🤔 *You*" "🦅 *Khoj*")) (emojified-sender (if (equal sender "you") "🤔 *You*" "🦅 *Khoj*"))
(suffix-newlines (if (equal sender "khoj") "\n\n" ""))
(received (or receive-date (format-time-string "%F %T")))) (received (or receive-date (format-time-string "%F %T"))))
(format "%s %s: %s\n :PROPERTIES:\n :RECEIVED: [%s]\n :END:\n %s\n" (format "%s %s: %s\n :PROPERTIES:\n :RECEIVED: [%s]\n :END:\n%s\n%s"
heading-level heading-level
emojified-by emojified-sender
first-message-line first-message-line
received received
rest-message-lines))) rest-message-lines
suffix-newlines)))
(defun khoj--generate-reference (index reference) (defun khoj--generate-reference (reference)
"Create `org-mode' links with REFERENCE as link and INDEX as link description." "Create `org-mode' footnotes with REFERENCE."
(with-temp-buffer (setq khoj--reference-count (1+ khoj--reference-count))
(org-insert-link (cons
nil (propertize (format "^{ [fn:%x]}" khoj--reference-count) 'help-echo reference)
(format "%s" (replace-regexp-in-string "\n" " " reference)) (thread-last
(format "%s" index)) reference
(format "[%s]" (buffer-substring-no-properties (point-min) (point-max))))) (replace-regexp-in-string "\n\n" "\n")
(format "\n[fn:%x] %s" khoj--reference-count))))
(defun khoj--render-chat-response (json-response) (defun khoj--render-chat-response (json-response)
"Render chat message using JSON-RESPONSE from Khoj Chat API." "Render chat message using JSON-RESPONSE from Khoj Chat API."
(let* ((message (cdr (or (assoc 'response json-response) (assoc 'message json-response)))) (let* ((message (cdr (or (assoc 'response json-response) (assoc 'message json-response))))
(sender (cdr (assoc 'by json-response))) (sender (cdr (assoc 'by json-response)))
(receive-date (cdr (assoc 'created json-response))) (receive-date (cdr (assoc 'created json-response)))
(context (or (cdr (assoc 'context json-response)) "")) (references (or (cdr (assoc 'context json-response)) '()))
(reference-texts (split-string context "\n\n# " t)) (footnotes (mapcar #'khoj--generate-reference references))
(reference-links (-map-indexed #'khoj--generate-reference reference-texts))) (footnote-links (mapcar #'car footnotes))
(footnote-defs (mapcar #'cdr footnotes)))
(thread-first (thread-first
;; extract khoj message from API response and make it bold ;; concatenate khoj message and references from API
(format "%s" message) (concat
;; append references to khoj message message
(concat " " (string-join reference-links " ")) ;; append reference links to khoj message
(string-join footnote-links "")
;; append reference sub-section to khoj message and fold it
(if footnote-defs "\n**** References\n:PROPERTIES:\n:VISIBILITY: folded\n:END:" "")
;; append reference definitions to references subsection
(string-join footnote-defs " "))
;; Render chat message using data obtained from API ;; Render chat message using data obtained from API
(khoj--render-chat-message sender receive-date)))) (khoj--render-chat-message sender receive-date))))

View File

@@ -39,7 +39,6 @@
let references = ''; let references = '';
if (context) { if (context) {
references = context references = context
.split("\n\n# ")
.map((reference, index) => generateReference(reference, index)) .map((reference, index) => generateReference(reference, index))
.join("<sup>,</sup>"); .join("<sup>,</sup>");
} }

View File

@@ -223,13 +223,14 @@ A:{ "search-type": "notes" }"""
return json.loads(story.strip(empty_escape_sequences)) return json.loads(story.strip(empty_escape_sequences))
def converse(text, user_query, conversation_log={}, api_key=None, temperature=0.2): def converse(references, user_query, conversation_log={}, api_key=None, temperature=0.2):
""" """
Converse with user using OpenAI's ChatGPT Converse with user using OpenAI's ChatGPT
""" """
# Initialize Variables # Initialize Variables
model = "gpt-3.5-turbo" model = "gpt-3.5-turbo"
openai.api_key = api_key or os.getenv("OPENAI_API_KEY") openai.api_key = api_key or os.getenv("OPENAI_API_KEY")
compiled_references = "\n\n".join({f"# {item}" for item in references})
personality_primer = "You are Khoj, a friendly, smart and helpful personal assistant." personality_primer = "You are Khoj, a friendly, smart and helpful personal assistant."
conversation_primer = f""" conversation_primer = f"""
@@ -237,7 +238,7 @@ Using the notes and our past conversations as context, answer the following ques
Current Date: {datetime.now().strftime("%Y-%m-%d")} Current Date: {datetime.now().strftime("%Y-%m-%d")}
Notes: Notes:
{text} {compiled_references}
Question: {user_query}""" Question: {user_query}"""
@@ -246,6 +247,7 @@ Question: {user_query}"""
conversation_primer, conversation_primer,
personality_primer, personality_primer,
conversation_log, conversation_log,
model,
) )
# Get Response from GPT # Get Response from GPT

View File

@@ -1,22 +1,44 @@
# Standard Packages # Standard Packages
from datetime import datetime from datetime import datetime
# External Packages
import tiktoken
# Internal Packages # Internal Packages
from khoj.utils.helpers import merge_dicts from khoj.utils.helpers import merge_dicts
def generate_chatml_messages_with_context(user_message, system_message, conversation_log={}): max_prompt_size = {"gpt-3.5-turbo": 4096, "gpt-4": 8192}
def generate_chatml_messages_with_context(
user_message, system_message, conversation_log={}, model_name="gpt-3.5-turbo", lookback_turns=2
):
"""Generate messages for ChatGPT with context from previous conversation""" """Generate messages for ChatGPT with context from previous conversation"""
# Extract Chat History for Context # Extract Chat History for Context
chat_logs = [f'{chat["message"]}\n\nNotes:\n{chat.get("context","")}' for chat in conversation_log.get("chat", [])] chat_logs = [f'{chat["message"]}\n\nNotes:\n{chat.get("context","")}' for chat in conversation_log.get("chat", [])]
last_backnforth = reciprocal_conversation_to_chatml(chat_logs[-2:]) rest_backnforths = []
rest_backnforth = reciprocal_conversation_to_chatml(chat_logs[-4:-2]) # Extract in reverse chronological order
for user_msg, assistant_msg in zip(chat_logs[-2::-2], chat_logs[::-2]):
if len(rest_backnforths) >= 2 * lookback_turns:
break
rest_backnforths += reciprocal_conversation_to_chatml([user_msg, assistant_msg])[::-1]
# Format user and system messages to chatml format # Format user and system messages to chatml format
system_chatml_message = [message_to_chatml(system_message, "system")] system_chatml_message = [message_to_chatml(system_message, "system")]
user_chatml_message = [message_to_chatml(user_message, "user")] user_chatml_message = [message_to_chatml(user_message, "user")]
return rest_backnforth + system_chatml_message + last_backnforth + user_chatml_message messages = user_chatml_message + rest_backnforths[:2] + system_chatml_message + rest_backnforths[2:]
# Truncate oldest messages from conversation history until under max supported prompt size by model
encoder = tiktoken.encoding_for_model(model_name)
tokens = sum([len(encoder.encode(value)) for message in messages for value in message.values()])
while tokens > max_prompt_size[model_name]:
messages.pop()
tokens = sum([len(encoder.encode(value)) for message in messages for value in message.values()])
# Return message in chronological order
return messages[::-1]
def reciprocal_conversation_to_chatml(message_pair): def reciprocal_conversation_to_chatml(message_pair):
@@ -38,20 +60,20 @@ def message_to_prompt(
return f"{conversation_history}{restart_sequence} {user_message}{start_sequence}{gpt_message}" return f"{conversation_history}{restart_sequence} {user_message}{start_sequence}{gpt_message}"
def message_to_log(user_message, gpt_message, khoj_message_metadata={}, conversation_log=[]): def message_to_log(user_message, gpt_message, user_message_metadata={}, khoj_message_metadata={}, conversation_log=[]):
"""Create json logs from messages, metadata for conversation log""" """Create json logs from messages, metadata for conversation log"""
default_khoj_message_metadata = { default_khoj_message_metadata = {
"intent": {"type": "remember", "memory-type": "notes", "query": user_message}, "intent": {"type": "remember", "memory-type": "notes", "query": user_message},
"trigger-emotion": "calm", "trigger-emotion": "calm",
} }
current_dt = datetime.now().strftime("%Y-%m-%d %H:%M:%S") khoj_response_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
# Create json log from Human's message # Create json log from Human's message
human_log = {"message": user_message, "by": "you", "created": current_dt} human_log = merge_dicts({"message": user_message, "by": "you"}, user_message_metadata)
# Create json log from GPT's response # Create json log from GPT's response
khoj_log = merge_dicts(khoj_message_metadata, default_khoj_message_metadata) khoj_log = merge_dicts(khoj_message_metadata, default_khoj_message_metadata)
khoj_log = merge_dicts({"message": gpt_message, "by": "khoj", "created": current_dt}, khoj_log) khoj_log = merge_dicts({"message": gpt_message, "by": "khoj", "created": khoj_response_time}, khoj_log)
conversation_log.extend([human_log, khoj_log]) conversation_log.extend([human_log, khoj_log])
return conversation_log return conversation_log

View File

@@ -2,6 +2,7 @@
import math import math
import yaml import yaml
import logging import logging
from datetime import datetime
from typing import List, Optional, Union from typing import List, Optional, Union
# External Packages # External Packages
@@ -192,6 +193,7 @@ def chat(q: Optional[str] = None):
# Initialize Variables # Initialize Variables
api_key = state.processor_config.conversation.openai_api_key api_key = state.processor_config.conversation.openai_api_key
model = state.processor_config.conversation.model model = state.processor_config.conversation.model
user_message_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
# Load Conversation History # Load Conversation History
chat_session = state.processor_config.conversation.chat_session chat_session = state.processor_config.conversation.chat_session
@@ -205,16 +207,19 @@ def chat(q: Optional[str] = None):
return {"status": "ok", "response": []} return {"status": "ok", "response": []}
# Infer search queries from user message # Infer search queries from user message
with timer("Extracting search queries took", logger):
inferred_queries = extract_questions(q, model=model, api_key=api_key, conversation_log=meta_log) inferred_queries = extract_questions(q, model=model, api_key=api_key, conversation_log=meta_log)
# Collate search results as context for GPT # Collate search results as context for GPT
with timer("Searching knowledge base took", logger):
result_list = [] result_list = []
for query in inferred_queries: for query in inferred_queries:
result_list.extend(search(query, n=5, r=True, score_threshold=-5.0, dedupe=False)) result_list.extend(search(query, n=5, r=True, score_threshold=-5.0, dedupe=False))
collated_result = "\n\n".join({f"# {item.additional['compiled']}" for item in result_list}) compiled_references = [item.additional["compiled"] for item in result_list]
try: try:
gpt_response = converse(collated_result, q, meta_log, api_key=api_key) with timer("Generating chat response took", logger):
gpt_response = converse(compiled_references, q, meta_log, api_key=api_key)
status = "ok" status = "ok"
except Exception as e: except Exception as e:
gpt_response = str(e) gpt_response = str(e)
@@ -225,8 +230,9 @@ def chat(q: Optional[str] = None):
state.processor_config.conversation.meta_log["chat"] = message_to_log( state.processor_config.conversation.meta_log["chat"] = message_to_log(
q, q,
gpt_response, gpt_response,
khoj_message_metadata={"context": collated_result, "intent": {"inferred-queries": inferred_queries}}, user_message_metadata={"created": user_message_time},
khoj_message_metadata={"context": compiled_references, "intent": {"inferred-queries": inferred_queries}},
conversation_log=meta_log.get("chat", []), conversation_log=meta_log.get("chat", []),
) )
return {"status": status, "response": gpt_response, "context": collated_result} return {"status": status, "response": gpt_response, "context": compiled_references}

View File

@@ -111,7 +111,7 @@ def test_extract_multiple_implicit_questions_from_message():
def test_generate_search_query_using_question_from_chat_history(): def test_generate_search_query_using_question_from_chat_history():
# Arrange # Arrange
message_list = [ message_list = [
("What is the name of Mr. Vaders daughter?", "Princess Leia", ""), ("What is the name of Mr. Vader's daughter?", "Princess Leia", []),
] ]
# Act # Act
@@ -127,7 +127,7 @@ def test_generate_search_query_using_question_from_chat_history():
def test_generate_search_query_using_answer_from_chat_history(): def test_generate_search_query_using_answer_from_chat_history():
# Arrange # Arrange
message_list = [ message_list = [
("What is the name of Mr. Vaders daughter?", "Princess Leia", ""), ("What is the name of Mr. Vader's daughter?", "Princess Leia", []),
] ]
# Act # Act
@@ -143,7 +143,7 @@ def test_generate_search_query_using_answer_from_chat_history():
def test_generate_search_query_using_question_and_answer_from_chat_history(): def test_generate_search_query_using_question_and_answer_from_chat_history():
# Arrange # Arrange
message_list = [ message_list = [
("Does Luke Skywalker have any Siblings?", "Yes, Princess Leia", ""), ("Does Luke Skywalker have any Siblings?", "Yes, Princess Leia", []),
] ]
# Act # Act
@@ -159,7 +159,7 @@ def test_generate_search_query_using_question_and_answer_from_chat_history():
def test_generate_search_query_with_date_and_context_from_chat_history(): def test_generate_search_query_with_date_and_context_from_chat_history():
# Arrange # Arrange
message_list = [ message_list = [
("When did I visit Masai Mara?", "You visited Masai Mara in April 2000", ""), ("When did I visit Masai Mara?", "You visited Masai Mara in April 2000", []),
] ]
# Act # Act
@@ -184,7 +184,7 @@ def test_generate_search_query_with_date_and_context_from_chat_history():
def test_chat_with_no_chat_history_or_retrieved_content(): def test_chat_with_no_chat_history_or_retrieved_content():
# Act # Act
response = converse( response = converse(
text="", # Assume no context retrieved from notes for the user_query references=[], # Assume no context retrieved from notes for the user_query
user_query="Hello, my name is Testatron. Who are you?", user_query="Hello, my name is Testatron. Who are you?",
api_key=api_key, api_key=api_key,
) )
@@ -202,13 +202,13 @@ def test_chat_with_no_chat_history_or_retrieved_content():
def test_answer_from_chat_history_and_no_content(): def test_answer_from_chat_history_and_no_content():
# Arrange # Arrange
message_list = [ message_list = [
("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""), ("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", []),
("When was I born?", "You were born on 1st April 1984.", ""), ("When was I born?", "You were born on 1st April 1984.", []),
] ]
# Act # Act
response = converse( response = converse(
text="", # Assume no context retrieved from notes for the user_query references=[], # Assume no context retrieved from notes for the user_query
user_query="What is my name?", user_query="What is my name?",
conversation_log=populate_chat_history(message_list), conversation_log=populate_chat_history(message_list),
api_key=api_key, api_key=api_key,
@@ -228,13 +228,17 @@ def test_answer_from_chat_history_and_previously_retrieved_content():
"Chat actor needs to use context in previous notes and chat history to answer question" "Chat actor needs to use context in previous notes and chat history to answer question"
# Arrange # Arrange
message_list = [ message_list = [
("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""), ("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", []),
("When was I born?", "You were born on 1st April 1984.", "Testatron was born on 1st April 1984 in Testville."), (
"When was I born?",
"You were born on 1st April 1984.",
["Testatron was born on 1st April 1984 in Testville."],
),
] ]
# Act # Act
response = converse( response = converse(
text="", # Assume no context retrieved from notes for the user_query references=[], # Assume no context retrieved from notes for the user_query
user_query="Where was I born?", user_query="Where was I born?",
conversation_log=populate_chat_history(message_list), conversation_log=populate_chat_history(message_list),
api_key=api_key, api_key=api_key,
@@ -252,13 +256,15 @@ def test_answer_from_chat_history_and_currently_retrieved_content():
"Chat actor needs to use context across currently retrieved notes and chat history to answer question" "Chat actor needs to use context across currently retrieved notes and chat history to answer question"
# Arrange # Arrange
message_list = [ message_list = [
("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""), ("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", []),
("When was I born?", "You were born on 1st April 1984.", ""), ("When was I born?", "You were born on 1st April 1984.", []),
] ]
# Act # Act
response = converse( response = converse(
text="Testatron was born on 1st April 1984 in Testville.", # Assume context retrieved from notes for the user_query references=[
"Testatron was born on 1st April 1984 in Testville."
], # Assume context retrieved from notes for the user_query
user_query="Where was I born?", user_query="Where was I born?",
conversation_log=populate_chat_history(message_list), conversation_log=populate_chat_history(message_list),
api_key=api_key, api_key=api_key,
@@ -275,13 +281,13 @@ def test_no_answer_in_chat_history_or_retrieved_content():
"Chat actor should say don't know as not enough contexts in chat history or retrieved to answer question" "Chat actor should say don't know as not enough contexts in chat history or retrieved to answer question"
# Arrange # Arrange
message_list = [ message_list = [
("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""), ("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", []),
("When was I born?", "You were born on 1st April 1984.", ""), ("When was I born?", "You were born on 1st April 1984.", []),
] ]
# Act # Act
response = converse( response = converse(
text="", # Assume no context retrieved from notes for the user_query references=[], # Assume no context retrieved from notes for the user_query
user_query="Where was I born?", user_query="Where was I born?",
conversation_log=populate_chat_history(message_list), conversation_log=populate_chat_history(message_list),
api_key=api_key, api_key=api_key,
@@ -300,23 +306,20 @@ def test_no_answer_in_chat_history_or_retrieved_content():
def test_answer_requires_current_date_awareness(): def test_answer_requires_current_date_awareness():
"Chat actor should be able to answer questions relative to current date using provided notes" "Chat actor should be able to answer questions relative to current date using provided notes"
# Arrange # Arrange
context = f""" context = [
# {datetime.now().strftime("%Y-%m-%d")} "Naco Taco" "Tacos for Dinner" f"""{datetime.now().strftime("%Y-%m-%d")} "Naco Taco" "Tacos for Dinner"
Expenses:Food:Dining 10.00 USD Expenses:Food:Dining 10.00 USD""",
f"""{datetime.now().strftime("%Y-%m-%d")} "Sagar Ratna" "Dosa for Lunch"
# {datetime.now().strftime("%Y-%m-%d")} "Sagar Ratna" "Dosa for Lunch" Expenses:Food:Dining 10.00 USD""",
Expenses:Food:Dining 10.00 USD f"""2020-04-01 "SuperMercado" "Bananas"
Expenses:Food:Groceries 10.00 USD""",
# 2020-04-01 "SuperMercado" "Bananas" f"""2020-01-01 "Naco Taco" "Burittos for Dinner"
Expenses:Food:Groceries 10.00 USD Expenses:Food:Dining 10.00 USD""",
]
# 2020-01-01 "Naco Taco" "Burittos for Dinner"
Expenses:Food:Dining 10.00 USD
"""
# Act # Act
response = converse( response = converse(
text=context, # Assume context retrieved from notes for the user_query references=context, # Assume context retrieved from notes for the user_query
user_query="What did I have for Dinner today?", user_query="What did I have for Dinner today?",
api_key=api_key, api_key=api_key,
) )
@@ -334,23 +337,20 @@ def test_answer_requires_current_date_awareness():
def test_answer_requires_date_aware_aggregation_across_provided_notes(): def test_answer_requires_date_aware_aggregation_across_provided_notes():
"Chat actor should be able to answer questions that require date aware aggregation across multiple notes" "Chat actor should be able to answer questions that require date aware aggregation across multiple notes"
# Arrange # Arrange
context = f""" context = [
# {datetime.now().strftime("%Y-%m-%d")} "Naco Taco" "Tacos for Dinner" f"""# {datetime.now().strftime("%Y-%m-%d")} "Naco Taco" "Tacos for Dinner"
Expenses:Food:Dining 10.00 USD Expenses:Food:Dining 10.00 USD""",
f"""{datetime.now().strftime("%Y-%m-%d")} "Sagar Ratna" "Dosa for Lunch"
# {datetime.now().strftime("%Y-%m-%d")} "Sagar Ratna" "Dosa for Lunch" Expenses:Food:Dining 10.00 USD""",
Expenses:Food:Dining 10.00 USD f"""2020-04-01 "SuperMercado" "Bananas"
Expenses:Food:Groceries 10.00 USD""",
# 2020-04-01 "SuperMercado" "Bananas" f"""2020-01-01 "Naco Taco" "Burittos for Dinner"
Expenses:Food:Groceries 10.00 USD Expenses:Food:Dining 10.00 USD""",
]
# 2020-01-01 "Naco Taco" "Burittos for Dinner"
Expenses:Food:Dining 10.00 USD
"""
# Act # Act
response = converse( response = converse(
text=context, # Assume context retrieved from notes for the user_query references=context, # Assume context retrieved from notes for the user_query
user_query="How much did I spend on dining this year?", user_query="How much did I spend on dining this year?",
api_key=api_key, api_key=api_key,
) )
@@ -366,14 +366,14 @@ def test_answer_general_question_not_in_chat_history_or_retrieved_content():
"Chat actor should be able to answer general questions not requiring looking at chat history or notes" "Chat actor should be able to answer general questions not requiring looking at chat history or notes"
# Arrange # Arrange
message_list = [ message_list = [
("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""), ("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", []),
("When was I born?", "You were born on 1st April 1984.", ""), ("When was I born?", "You were born on 1st April 1984.", []),
("Where was I born?", "You were born Testville.", ""), ("Where was I born?", "You were born Testville.", []),
] ]
# Act # Act
response = converse( response = converse(
text="", # Assume no context retrieved from notes for the user_query references=[], # Assume no context retrieved from notes for the user_query
user_query="Write a haiku about unit testing in 3 lines", user_query="Write a haiku about unit testing in 3 lines",
conversation_log=populate_chat_history(message_list), conversation_log=populate_chat_history(message_list),
api_key=api_key, api_key=api_key,
@@ -393,20 +393,18 @@ def test_answer_general_question_not_in_chat_history_or_retrieved_content():
def test_ask_for_clarification_if_not_enough_context_in_question(): def test_ask_for_clarification_if_not_enough_context_in_question():
"Chat actor should ask for clarification if question cannot be answered unambiguously with the provided context" "Chat actor should ask for clarification if question cannot be answered unambiguously with the provided context"
# Arrange # Arrange
context = f""" context = [
# Ramya f"""# Ramya
My sister, Ramya, is married to Kali Devi. They have 2 kids, Ravi and Rani. My sister, Ramya, is married to Kali Devi. They have 2 kids, Ravi and Rani.""",
f"""# Fang
# Fang My sister, Fang Liu is married to Xi Li. They have 1 kid, Xiao Li.""",
My sister, Fang Liu is married to Xi Li. They have 1 kid, Xiao Li. f"""# Aiyla
My sister, Aiyla is married to Tolga. They have 3 kids, Yildiz, Ali and Ahmet.""",
# Aiyla ]
My sister, Aiyla is married to Tolga. They have 3 kids, Yildiz, Ali and Ahmet.
"""
# Act # Act
response = converse( response = converse(
text=context, # Assume context retrieved from notes for the user_query references=context, # Assume context retrieved from notes for the user_query
user_query="How many kids does my older sister have?", user_query="How many kids does my older sister have?",
api_key=api_key, api_key=api_key,
) )

View File

@@ -47,7 +47,7 @@ def test_chat_with_no_chat_history_or_retrieved_content(chat_client):
expected_responses = ["Khoj", "khoj"] expected_responses = ["Khoj", "khoj"]
assert response.status_code == 200 assert response.status_code == 200
assert any([expected_response in response_message for expected_response in expected_responses]), ( assert any([expected_response in response_message for expected_response in expected_responses]), (
"Expected assistants name, [K|k]hoj, in response but got" + response_message "Expected assistants name, [K|k]hoj, in response but got: " + response_message
) )
@@ -56,8 +56,8 @@ def test_chat_with_no_chat_history_or_retrieved_content(chat_client):
def test_answer_from_chat_history(chat_client): def test_answer_from_chat_history(chat_client):
# Arrange # Arrange
message_list = [ message_list = [
("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""), ("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", []),
("When was I born?", "You were born on 1st April 1984.", ""), ("When was I born?", "You were born on 1st April 1984.", []),
] ]
populate_chat_history(message_list) populate_chat_history(message_list)
@@ -69,7 +69,7 @@ def test_answer_from_chat_history(chat_client):
expected_responses = ["Testatron", "testatron"] expected_responses = ["Testatron", "testatron"]
assert response.status_code == 200 assert response.status_code == 200
assert any([expected_response in response_message for expected_response in expected_responses]), ( assert any([expected_response in response_message for expected_response in expected_responses]), (
"Expected [T|t]estatron in response but got" + response_message "Expected [T|t]estatron in response but got: " + response_message
) )
@@ -78,8 +78,12 @@ def test_answer_from_chat_history(chat_client):
def test_answer_from_currently_retrieved_content(chat_client): def test_answer_from_currently_retrieved_content(chat_client):
# Arrange # Arrange
message_list = [ message_list = [
("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""), ("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", []),
("When was I born?", "You were born on 1st April 1984.", "Testatron was born on 1st April 1984 in Testville."), (
"When was I born?",
"You were born on 1st April 1984.",
["Testatron was born on 1st April 1984 in Testville."],
),
] ]
populate_chat_history(message_list) populate_chat_history(message_list)
@@ -97,8 +101,12 @@ def test_answer_from_currently_retrieved_content(chat_client):
def test_answer_from_chat_history_and_previously_retrieved_content(chat_client): def test_answer_from_chat_history_and_previously_retrieved_content(chat_client):
# Arrange # Arrange
message_list = [ message_list = [
("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""), ("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", []),
("When was I born?", "You were born on 1st April 1984.", "Testatron was born on 1st April 1984 in Testville."), (
"When was I born?",
"You were born on 1st April 1984.",
["Testatron was born on 1st April 1984 in Testville."],
),
] ]
populate_chat_history(message_list) populate_chat_history(message_list)
@@ -119,8 +127,8 @@ def test_answer_from_chat_history_and_previously_retrieved_content(chat_client):
def test_answer_from_chat_history_and_currently_retrieved_content(chat_client): def test_answer_from_chat_history_and_currently_retrieved_content(chat_client):
# Arrange # Arrange
message_list = [ message_list = [
("Hello, my name is Xi Li. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""), ("Hello, my name is Xi Li. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", []),
("When was I born?", "You were born on 1st April 1984.", ""), ("When was I born?", "You were born on 1st April 1984.", []),
] ]
populate_chat_history(message_list) populate_chat_history(message_list)
@@ -143,8 +151,8 @@ def test_no_answer_in_chat_history_or_retrieved_content(chat_client):
"Chat director should say don't know as not enough contexts in chat history or retrieved to answer question" "Chat director should say don't know as not enough contexts in chat history or retrieved to answer question"
# Arrange # Arrange
message_list = [ message_list = [
("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""), ("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", []),
("When was I born?", "You were born on 1st April 1984.", ""), ("When was I born?", "You were born on 1st April 1984.", []),
] ]
populate_chat_history(message_list) populate_chat_history(message_list)
@@ -197,9 +205,9 @@ def test_answer_requires_date_aware_aggregation_across_provided_notes(chat_clien
def test_answer_general_question_not_in_chat_history_or_retrieved_content(chat_client): def test_answer_general_question_not_in_chat_history_or_retrieved_content(chat_client):
# Arrange # Arrange
message_list = [ message_list = [
("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""), ("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", []),
("When was I born?", "You were born on 1st April 1984.", ""), ("When was I born?", "You were born on 1st April 1984.", []),
("Where was I born?", "You were born Testville.", ""), ("Where was I born?", "You were born Testville.", []),
] ]
populate_chat_history(message_list) populate_chat_history(message_list)
@@ -243,9 +251,9 @@ def test_ask_for_clarification_if_not_enough_context_in_question(chat_client):
def test_answer_in_chat_history_beyond_lookback_window(chat_client): def test_answer_in_chat_history_beyond_lookback_window(chat_client):
# Arrange # Arrange
message_list = [ message_list = [
("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""), ("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", []),
("When was I born?", "You were born on 1st April 1984.", ""), ("When was I born?", "You were born on 1st April 1984.", []),
("Where was I born?", "You were born Testville.", ""), ("Where was I born?", "You were born Testville.", []),
] ]
populate_chat_history(message_list) populate_chat_history(message_list)