From 37f7f9fd1d75c4593ea01d82e5f04cb7003904a9 Mon Sep 17 00:00:00 2001 From: sabaimran <65192171+sabaimran@users.noreply.github.com> Date: Fri, 14 Jul 2023 10:14:07 -0700 Subject: [PATCH] Add additional telemetry for system understanding (#316) * Add additional telemetry in order to understand which data sources are the most useful * Make actions side by side in the configuration page * Restore main run command * Update links to point to wiki pages for Github, Notion integrations * Stanardize nomenclature of the api_type to use _config suffix Remove header fields that aren't actually helpful for understanding config usage --- src/khoj/interface/web/base_config.html | 14 ++ src/khoj/interface/web/config.html | 2 + .../web/content_type_github_input.html | 3 + .../web/content_type_notion_input.html | 3 + src/khoj/routers/api.py | 199 +++++++++++++----- src/khoj/routers/helpers.py | 34 ++- 6 files changed, 195 insertions(+), 60 deletions(-) diff --git a/src/khoj/interface/web/base_config.html b/src/khoj/interface/web/base_config.html index 76a25eeb..70d4be70 100644 --- a/src/khoj/interface/web/base_config.html +++ b/src/khoj/interface/web/base_config.html @@ -51,6 +51,10 @@ body.khoj-configure { padding: 0; } + + div.section { + padding: 12px; + } } img.khoj-logo { @@ -69,6 +73,11 @@ display: grid; justify-self: center; } + + div.instructions { + font-size: large; + } + .section-title { margin: 0; padding: 0 0 16px 0; @@ -162,6 +171,11 @@ max-width: 16px; } + div.finalize-actions { + grid-auto-flow: column; + grid-gap: 24px; + } + @media screen and (max-width: 600px) { .section-cards { grid-template-columns: 1fr; diff --git a/src/khoj/interface/web/config.html b/src/khoj/interface/web/config.html index e69df67e..07ad8189 100644 --- a/src/khoj/interface/web/config.html +++ b/src/khoj/interface/web/config.html @@ -204,6 +204,8 @@ + +
diff --git a/src/khoj/interface/web/content_type_github_input.html b/src/khoj/interface/web/content_type_github_input.html index edbad3e4..8b7190bb 100644 --- a/src/khoj/interface/web/content_type_github_input.html +++ b/src/khoj/interface/web/content_type_github_input.html @@ -5,6 +5,9 @@

Github Github +
+ ⓘ Help +

diff --git a/src/khoj/interface/web/content_type_notion_input.html b/src/khoj/interface/web/content_type_notion_input.html index 494ff7a3..4b54a756 100644 --- a/src/khoj/interface/web/content_type_notion_input.html +++ b/src/khoj/interface/web/content_type_notion_input.html @@ -5,6 +5,9 @@

Notion Notion +
+ ⓘ Help +

diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py index d04284e5..5faefc25 100644 --- a/src/khoj/routers/api.py +++ b/src/khoj/routers/api.py @@ -34,7 +34,7 @@ from khoj.utils.state import SearchType from khoj.utils import state, constants from khoj.utils.yaml import save_config_to_file_updated_state from fastapi.responses import StreamingResponse, Response -from khoj.routers.helpers import perform_chat_checks, generate_chat_response +from khoj.routers.helpers import perform_chat_checks, generate_chat_response, update_telemetry_state from khoj.processor.conversation.gpt import extract_questions from fastapi.requests import Request @@ -56,15 +56,44 @@ if not state.demo: return state.config @api.post("/config/data") - async def set_config_data(updated_config: FullConfig): + async def set_config_data( + request: Request, + updated_config: FullConfig, + client: Optional[str] = None, + ): state.config = updated_config with open(state.config_file, "w") as outfile: yaml.dump(yaml.safe_load(state.config.json(by_alias=True)), outfile) outfile.close() + + configuration_update_metadata = dict() + + if state.config.content_type is not None: + configuration_update_metadata["github"] = state.config.content_type.github is not None + configuration_update_metadata["notion"] = state.config.content_type.notion is not None + configuration_update_metadata["org"] = state.config.content_type.org is not None + configuration_update_metadata["pdf"] = state.config.content_type.pdf is not None + configuration_update_metadata["markdown"] = state.config.content_type.markdown is not None + configuration_update_metadata["plugins"] = state.config.content_type.plugins is not None + + if state.config.processor is not None: + configuration_update_metadata["conversation_processor"] = state.config.processor.conversation is not None + + update_telemetry_state( + request=request, + telemetry_type="api", + api="set_config", + client=client, + metadata=configuration_update_metadata, + ) return state.config @api.post("/config/data/content_type/github", status_code=200) - async def set_content_config_github_data(updated_config: Union[GithubContentConfig, None]): + async def set_content_config_github_data( + request: Request, + updated_config: Union[GithubContentConfig, None], + client: Optional[str] = None, + ): _initialize_config() if not state.config.content_type: @@ -72,6 +101,14 @@ if not state.demo: else: state.config.content_type.github = updated_config + update_telemetry_state( + request=request, + telemetry_type="api", + api="set_content_config", + client=client, + metadata={"content_type": "github"}, + ) + try: save_config_to_file_updated_state() return {"status": "ok"} @@ -79,7 +116,11 @@ if not state.demo: return {"status": "error", "message": str(e)} @api.post("/config/data/content_type/notion", status_code=200) - async def set_content_config_notion_data(updated_config: Union[NotionContentConfig, None]): + async def set_content_config_notion_data( + request: Request, + updated_config: Union[NotionContentConfig, None], + client: Optional[str] = None, + ): _initialize_config() if not state.config.content_type: @@ -87,6 +128,14 @@ if not state.demo: else: state.config.content_type.notion = updated_config + update_telemetry_state( + request=request, + telemetry_type="api", + api="set_content_config", + client=client, + metadata={"content_type": "notion"}, + ) + try: save_config_to_file_updated_state() return {"status": "ok"} @@ -94,10 +143,22 @@ if not state.demo: return {"status": "error", "message": str(e)} @api.post("/delete/config/data/content_type/{content_type}", status_code=200) - async def remove_content_config_data(content_type: str): + async def remove_content_config_data( + request: Request, + content_type: str, + client: Optional[str] = None, + ): if not state.config or not state.config.content_type: return {"status": "ok"} + update_telemetry_state( + request=request, + telemetry_type="api", + api="delete_content_config", + client=client, + metadata={"content_type": content_type}, + ) + if state.config.content_type: state.config.content_type[content_type] = None @@ -121,12 +182,23 @@ if not state.demo: return {"status": "error", "message": str(e)} @api.post("/delete/config/data/processor/conversation", status_code=200) - async def remove_processor_conversation_config_data(): + async def remove_processor_conversation_config_data( + request: Request, + client: Optional[str] = None, + ): if not state.config or not state.config.processor or not state.config.processor.conversation: return {"status": "ok"} state.config.processor.conversation = None + update_telemetry_state( + request=request, + telemetry_type="api", + api="delete_processor_config", + client=client, + metadata={"processor_type": "conversation"}, + ) + try: save_config_to_file_updated_state() return {"status": "ok"} @@ -134,7 +206,12 @@ if not state.demo: return {"status": "error", "message": str(e)} @api.post("/config/data/content_type/{content_type}", status_code=200) - async def set_content_config_data(content_type: str, updated_config: Union[TextContentConfig, None]): + async def set_content_config_data( + request: Request, + content_type: str, + updated_config: Union[TextContentConfig, None], + client: Optional[str] = None, + ): _initialize_config() if not state.config.content_type: @@ -142,6 +219,14 @@ if not state.demo: else: state.config.content_type[content_type] = updated_config + update_telemetry_state( + request=request, + telemetry_type="api", + api="set_content_config", + client=client, + metadata={"content_type": content_type}, + ) + try: save_config_to_file_updated_state() return {"status": "ok"} @@ -149,11 +234,24 @@ if not state.demo: return {"status": "error", "message": str(e)} @api.post("/config/data/processor/conversation", status_code=200) - async def set_processor_conversation_config_data(updated_config: Union[ConversationProcessorConfig, None]): + async def set_processor_conversation_config_data( + request: Request, + updated_config: Union[ConversationProcessorConfig, None], + client: Optional[str] = None, + ): _initialize_config() state.config.processor = ProcessorConfig(conversation=updated_config) state.processor_config = configure_processor(state.config.processor) + + update_telemetry_state( + request=request, + telemetry_type="api", + api="set_content_config", + client=client, + metadata={"processor_type": "conversation"}, + ) + try: save_config_to_file_updated_state() return {"status": "ok"} @@ -369,20 +467,16 @@ async def search( # Cache results state.query_cache[query_cache_key] = results - user_state = { - "client_host": request.client.host if request.client else "unknown", - "user_agent": user_agent or "unknown", - "referer": referer or "unknown", - "host": host or "unknown", - } + update_telemetry_state( + request=request, + telemetry_type="api", + api="search", + client=client, + user_agent=user_agent, + referer=referer, + host=host, + ) - # Only log telemetry if query is new and not a continuation of previous query - if state.previous_query is None or state.previous_query not in user_query: - state.telemetry += [ - log_telemetry( - telemetry_type="api", api="search", client=client, app_config=state.config.app, properties=user_state - ) - ] state.previous_query = user_query end_time = time.time() @@ -425,18 +519,15 @@ def update( else: logger.info("📬 Processor reconfigured via API") - user_state = { - "client_host": request.client.host if request.client else None, - "user_agent": user_agent or "unknown", - "referer": referer or "unknown", - "host": host or "unknown", - } - - state.telemetry += [ - log_telemetry( - telemetry_type="api", api="update", client=client, app_config=state.config.app, properties=user_state - ) - ] + update_telemetry_state( + request=request, + telemetry_type="api", + api="update", + client=client, + user_agent=user_agent, + referer=referer, + host=host, + ) return {"status": "ok", "message": "khoj reloaded"} @@ -454,18 +545,15 @@ def chat_history( # Load Conversation History meta_log = state.processor_config.conversation.meta_log - user_state = { - "client_host": request.client.host if request.client else None, - "user_agent": user_agent or "unknown", - "referer": referer or "unknown", - "host": host or "unknown", - } - - state.telemetry += [ - log_telemetry( - telemetry_type="api", api="chat", client=client, app_config=state.config.app, properties=user_state - ) - ] + update_telemetry_state( + request=request, + telemetry_type="api", + api="chat", + client=client, + user_agent=user_agent, + referer=referer, + host=host, + ) return {"status": "ok", "response": meta_log.get("chat", [])} @@ -509,18 +597,15 @@ async def chat( response_obj = {"response": actual_response, "context": compiled_references} - user_state = { - "client_host": request.client.host if request.client else None, - "user_agent": user_agent or "unknown", - "referer": referer or "unknown", - "host": host or "unknown", - } - - state.telemetry += [ - log_telemetry( - telemetry_type="api", api="chat", client=client, app_config=state.config.app, properties=user_state - ) - ] + update_telemetry_state( + request=request, + telemetry_type="api", + api="chat", + client=client, + user_agent=user_agent, + referer=referer, + host=host, + ) return Response(content=json.dumps(response_obj), media_type="application/json", status_code=200) diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index 0b414cbb..fe4cdac2 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -1,11 +1,12 @@ -from fastapi import HTTPException import logging from datetime import datetime from functools import partial -from typing import List +from typing import List, Optional + +from fastapi import HTTPException, Request from khoj.utils import state -from khoj.utils.helpers import timer +from khoj.utils.helpers import timer, log_telemetry from khoj.processor.conversation.gpt import converse from khoj.processor.conversation.utils import message_to_log, reciprocal_conversation_to_chatml @@ -24,6 +25,33 @@ def perform_chat_checks(): ) +def update_telemetry_state( + request: Request, + telemetry_type: str, + api: str, + client: Optional[str] = None, + user_agent: Optional[str] = None, + referer: Optional[str] = None, + host: Optional[str] = None, + metadata: Optional[dict] = None, +): + user_state = { + "client_host": request.client.host if request.client else None, + "user_agent": user_agent or "unknown", + "referer": referer or "unknown", + "host": host or "unknown", + } + + if metadata: + user_state.update(metadata) + + state.telemetry += [ + log_telemetry( + telemetry_type=telemetry_type, api=api, client=client, app_config=state.config.app, properties=user_state + ) + ] + + def generate_chat_response( q: str, meta_log: dict,