mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-07 21:29:13 +00:00
Add additional telemetry for system understanding (#316)
* Add additional telemetry in order to understand which data sources are the most useful * Make actions side by side in the configuration page * Restore main run command * Update links to point to wiki pages for Github, Notion integrations * Stanardize nomenclature of the api_type to use _config suffix Remove header fields that aren't actually helpful for understanding config usage
This commit is contained in:
@@ -51,6 +51,10 @@
|
|||||||
body.khoj-configure {
|
body.khoj-configure {
|
||||||
padding: 0;
|
padding: 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
div.section {
|
||||||
|
padding: 12px;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
img.khoj-logo {
|
img.khoj-logo {
|
||||||
@@ -69,6 +73,11 @@
|
|||||||
display: grid;
|
display: grid;
|
||||||
justify-self: center;
|
justify-self: center;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
div.instructions {
|
||||||
|
font-size: large;
|
||||||
|
}
|
||||||
|
|
||||||
.section-title {
|
.section-title {
|
||||||
margin: 0;
|
margin: 0;
|
||||||
padding: 0 0 16px 0;
|
padding: 0 0 16px 0;
|
||||||
@@ -162,6 +171,11 @@
|
|||||||
max-width: 16px;
|
max-width: 16px;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
div.finalize-actions {
|
||||||
|
grid-auto-flow: column;
|
||||||
|
grid-gap: 24px;
|
||||||
|
}
|
||||||
|
|
||||||
@media screen and (max-width: 600px) {
|
@media screen and (max-width: 600px) {
|
||||||
.section-cards {
|
.section-cards {
|
||||||
grid-template-columns: 1fr;
|
grid-template-columns: 1fr;
|
||||||
|
|||||||
@@ -204,6 +204,8 @@
|
|||||||
<input type="range" id="results-count-slider" name="results-count-slider" min="1" max="10" step="1" value="5">
|
<input type="range" id="results-count-slider" name="results-count-slider" min="1" max="10" step="1" value="5">
|
||||||
</div>
|
</div>
|
||||||
<div id="status" style="display: none;"></div>
|
<div id="status" style="display: none;"></div>
|
||||||
|
</div>
|
||||||
|
<div class="section finalize-actions">
|
||||||
<button id="configure" type="submit" title="Update index with the latest changes">⚙️ Configure</button>
|
<button id="configure" type="submit" title="Update index with the latest changes">⚙️ Configure</button>
|
||||||
<button id="reinitialize" type="submit" title="Regenerate index from scratch">🔄 Reinitialize</button>
|
<button id="reinitialize" type="submit" title="Regenerate index from scratch">🔄 Reinitialize</button>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -5,6 +5,9 @@
|
|||||||
<h2 class="section-title">
|
<h2 class="section-title">
|
||||||
<img class="card-icon" src="/static/assets/icons/github.svg" alt="Github">
|
<img class="card-icon" src="/static/assets/icons/github.svg" alt="Github">
|
||||||
<span class="card-title-text">Github</span>
|
<span class="card-title-text">Github</span>
|
||||||
|
<div class="instructions">
|
||||||
|
<a href="https://github.com/khoj-ai/khoj/wiki/Setup-Github-integration">ⓘ Help</a>
|
||||||
|
</div>
|
||||||
</h2>
|
</h2>
|
||||||
<form>
|
<form>
|
||||||
<table>
|
<table>
|
||||||
|
|||||||
@@ -5,6 +5,9 @@
|
|||||||
<h2 class="section-title">
|
<h2 class="section-title">
|
||||||
<img class="card-icon" src="/static/assets/icons/notion.svg" alt="Notion">
|
<img class="card-icon" src="/static/assets/icons/notion.svg" alt="Notion">
|
||||||
<span class="card-title-text">Notion</span>
|
<span class="card-title-text">Notion</span>
|
||||||
|
<div class="instructions">
|
||||||
|
<a href="https://github.com/khoj-ai/khoj/wiki/Setup-Notion-Integration">ⓘ Help</a>
|
||||||
|
</div>
|
||||||
</h2>
|
</h2>
|
||||||
<form>
|
<form>
|
||||||
<table>
|
<table>
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ from khoj.utils.state import SearchType
|
|||||||
from khoj.utils import state, constants
|
from khoj.utils import state, constants
|
||||||
from khoj.utils.yaml import save_config_to_file_updated_state
|
from khoj.utils.yaml import save_config_to_file_updated_state
|
||||||
from fastapi.responses import StreamingResponse, Response
|
from fastapi.responses import StreamingResponse, Response
|
||||||
from khoj.routers.helpers import perform_chat_checks, generate_chat_response
|
from khoj.routers.helpers import perform_chat_checks, generate_chat_response, update_telemetry_state
|
||||||
from khoj.processor.conversation.gpt import extract_questions
|
from khoj.processor.conversation.gpt import extract_questions
|
||||||
from fastapi.requests import Request
|
from fastapi.requests import Request
|
||||||
|
|
||||||
@@ -56,15 +56,44 @@ if not state.demo:
|
|||||||
return state.config
|
return state.config
|
||||||
|
|
||||||
@api.post("/config/data")
|
@api.post("/config/data")
|
||||||
async def set_config_data(updated_config: FullConfig):
|
async def set_config_data(
|
||||||
|
request: Request,
|
||||||
|
updated_config: FullConfig,
|
||||||
|
client: Optional[str] = None,
|
||||||
|
):
|
||||||
state.config = updated_config
|
state.config = updated_config
|
||||||
with open(state.config_file, "w") as outfile:
|
with open(state.config_file, "w") as outfile:
|
||||||
yaml.dump(yaml.safe_load(state.config.json(by_alias=True)), outfile)
|
yaml.dump(yaml.safe_load(state.config.json(by_alias=True)), outfile)
|
||||||
outfile.close()
|
outfile.close()
|
||||||
|
|
||||||
|
configuration_update_metadata = dict()
|
||||||
|
|
||||||
|
if state.config.content_type is not None:
|
||||||
|
configuration_update_metadata["github"] = state.config.content_type.github is not None
|
||||||
|
configuration_update_metadata["notion"] = state.config.content_type.notion is not None
|
||||||
|
configuration_update_metadata["org"] = state.config.content_type.org is not None
|
||||||
|
configuration_update_metadata["pdf"] = state.config.content_type.pdf is not None
|
||||||
|
configuration_update_metadata["markdown"] = state.config.content_type.markdown is not None
|
||||||
|
configuration_update_metadata["plugins"] = state.config.content_type.plugins is not None
|
||||||
|
|
||||||
|
if state.config.processor is not None:
|
||||||
|
configuration_update_metadata["conversation_processor"] = state.config.processor.conversation is not None
|
||||||
|
|
||||||
|
update_telemetry_state(
|
||||||
|
request=request,
|
||||||
|
telemetry_type="api",
|
||||||
|
api="set_config",
|
||||||
|
client=client,
|
||||||
|
metadata=configuration_update_metadata,
|
||||||
|
)
|
||||||
return state.config
|
return state.config
|
||||||
|
|
||||||
@api.post("/config/data/content_type/github", status_code=200)
|
@api.post("/config/data/content_type/github", status_code=200)
|
||||||
async def set_content_config_github_data(updated_config: Union[GithubContentConfig, None]):
|
async def set_content_config_github_data(
|
||||||
|
request: Request,
|
||||||
|
updated_config: Union[GithubContentConfig, None],
|
||||||
|
client: Optional[str] = None,
|
||||||
|
):
|
||||||
_initialize_config()
|
_initialize_config()
|
||||||
|
|
||||||
if not state.config.content_type:
|
if not state.config.content_type:
|
||||||
@@ -72,6 +101,14 @@ if not state.demo:
|
|||||||
else:
|
else:
|
||||||
state.config.content_type.github = updated_config
|
state.config.content_type.github = updated_config
|
||||||
|
|
||||||
|
update_telemetry_state(
|
||||||
|
request=request,
|
||||||
|
telemetry_type="api",
|
||||||
|
api="set_content_config",
|
||||||
|
client=client,
|
||||||
|
metadata={"content_type": "github"},
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
save_config_to_file_updated_state()
|
save_config_to_file_updated_state()
|
||||||
return {"status": "ok"}
|
return {"status": "ok"}
|
||||||
@@ -79,7 +116,11 @@ if not state.demo:
|
|||||||
return {"status": "error", "message": str(e)}
|
return {"status": "error", "message": str(e)}
|
||||||
|
|
||||||
@api.post("/config/data/content_type/notion", status_code=200)
|
@api.post("/config/data/content_type/notion", status_code=200)
|
||||||
async def set_content_config_notion_data(updated_config: Union[NotionContentConfig, None]):
|
async def set_content_config_notion_data(
|
||||||
|
request: Request,
|
||||||
|
updated_config: Union[NotionContentConfig, None],
|
||||||
|
client: Optional[str] = None,
|
||||||
|
):
|
||||||
_initialize_config()
|
_initialize_config()
|
||||||
|
|
||||||
if not state.config.content_type:
|
if not state.config.content_type:
|
||||||
@@ -87,6 +128,14 @@ if not state.demo:
|
|||||||
else:
|
else:
|
||||||
state.config.content_type.notion = updated_config
|
state.config.content_type.notion = updated_config
|
||||||
|
|
||||||
|
update_telemetry_state(
|
||||||
|
request=request,
|
||||||
|
telemetry_type="api",
|
||||||
|
api="set_content_config",
|
||||||
|
client=client,
|
||||||
|
metadata={"content_type": "notion"},
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
save_config_to_file_updated_state()
|
save_config_to_file_updated_state()
|
||||||
return {"status": "ok"}
|
return {"status": "ok"}
|
||||||
@@ -94,10 +143,22 @@ if not state.demo:
|
|||||||
return {"status": "error", "message": str(e)}
|
return {"status": "error", "message": str(e)}
|
||||||
|
|
||||||
@api.post("/delete/config/data/content_type/{content_type}", status_code=200)
|
@api.post("/delete/config/data/content_type/{content_type}", status_code=200)
|
||||||
async def remove_content_config_data(content_type: str):
|
async def remove_content_config_data(
|
||||||
|
request: Request,
|
||||||
|
content_type: str,
|
||||||
|
client: Optional[str] = None,
|
||||||
|
):
|
||||||
if not state.config or not state.config.content_type:
|
if not state.config or not state.config.content_type:
|
||||||
return {"status": "ok"}
|
return {"status": "ok"}
|
||||||
|
|
||||||
|
update_telemetry_state(
|
||||||
|
request=request,
|
||||||
|
telemetry_type="api",
|
||||||
|
api="delete_content_config",
|
||||||
|
client=client,
|
||||||
|
metadata={"content_type": content_type},
|
||||||
|
)
|
||||||
|
|
||||||
if state.config.content_type:
|
if state.config.content_type:
|
||||||
state.config.content_type[content_type] = None
|
state.config.content_type[content_type] = None
|
||||||
|
|
||||||
@@ -121,12 +182,23 @@ if not state.demo:
|
|||||||
return {"status": "error", "message": str(e)}
|
return {"status": "error", "message": str(e)}
|
||||||
|
|
||||||
@api.post("/delete/config/data/processor/conversation", status_code=200)
|
@api.post("/delete/config/data/processor/conversation", status_code=200)
|
||||||
async def remove_processor_conversation_config_data():
|
async def remove_processor_conversation_config_data(
|
||||||
|
request: Request,
|
||||||
|
client: Optional[str] = None,
|
||||||
|
):
|
||||||
if not state.config or not state.config.processor or not state.config.processor.conversation:
|
if not state.config or not state.config.processor or not state.config.processor.conversation:
|
||||||
return {"status": "ok"}
|
return {"status": "ok"}
|
||||||
|
|
||||||
state.config.processor.conversation = None
|
state.config.processor.conversation = None
|
||||||
|
|
||||||
|
update_telemetry_state(
|
||||||
|
request=request,
|
||||||
|
telemetry_type="api",
|
||||||
|
api="delete_processor_config",
|
||||||
|
client=client,
|
||||||
|
metadata={"processor_type": "conversation"},
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
save_config_to_file_updated_state()
|
save_config_to_file_updated_state()
|
||||||
return {"status": "ok"}
|
return {"status": "ok"}
|
||||||
@@ -134,7 +206,12 @@ if not state.demo:
|
|||||||
return {"status": "error", "message": str(e)}
|
return {"status": "error", "message": str(e)}
|
||||||
|
|
||||||
@api.post("/config/data/content_type/{content_type}", status_code=200)
|
@api.post("/config/data/content_type/{content_type}", status_code=200)
|
||||||
async def set_content_config_data(content_type: str, updated_config: Union[TextContentConfig, None]):
|
async def set_content_config_data(
|
||||||
|
request: Request,
|
||||||
|
content_type: str,
|
||||||
|
updated_config: Union[TextContentConfig, None],
|
||||||
|
client: Optional[str] = None,
|
||||||
|
):
|
||||||
_initialize_config()
|
_initialize_config()
|
||||||
|
|
||||||
if not state.config.content_type:
|
if not state.config.content_type:
|
||||||
@@ -142,6 +219,14 @@ if not state.demo:
|
|||||||
else:
|
else:
|
||||||
state.config.content_type[content_type] = updated_config
|
state.config.content_type[content_type] = updated_config
|
||||||
|
|
||||||
|
update_telemetry_state(
|
||||||
|
request=request,
|
||||||
|
telemetry_type="api",
|
||||||
|
api="set_content_config",
|
||||||
|
client=client,
|
||||||
|
metadata={"content_type": content_type},
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
save_config_to_file_updated_state()
|
save_config_to_file_updated_state()
|
||||||
return {"status": "ok"}
|
return {"status": "ok"}
|
||||||
@@ -149,11 +234,24 @@ if not state.demo:
|
|||||||
return {"status": "error", "message": str(e)}
|
return {"status": "error", "message": str(e)}
|
||||||
|
|
||||||
@api.post("/config/data/processor/conversation", status_code=200)
|
@api.post("/config/data/processor/conversation", status_code=200)
|
||||||
async def set_processor_conversation_config_data(updated_config: Union[ConversationProcessorConfig, None]):
|
async def set_processor_conversation_config_data(
|
||||||
|
request: Request,
|
||||||
|
updated_config: Union[ConversationProcessorConfig, None],
|
||||||
|
client: Optional[str] = None,
|
||||||
|
):
|
||||||
_initialize_config()
|
_initialize_config()
|
||||||
|
|
||||||
state.config.processor = ProcessorConfig(conversation=updated_config)
|
state.config.processor = ProcessorConfig(conversation=updated_config)
|
||||||
state.processor_config = configure_processor(state.config.processor)
|
state.processor_config = configure_processor(state.config.processor)
|
||||||
|
|
||||||
|
update_telemetry_state(
|
||||||
|
request=request,
|
||||||
|
telemetry_type="api",
|
||||||
|
api="set_content_config",
|
||||||
|
client=client,
|
||||||
|
metadata={"processor_type": "conversation"},
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
save_config_to_file_updated_state()
|
save_config_to_file_updated_state()
|
||||||
return {"status": "ok"}
|
return {"status": "ok"}
|
||||||
@@ -369,20 +467,16 @@ async def search(
|
|||||||
# Cache results
|
# Cache results
|
||||||
state.query_cache[query_cache_key] = results
|
state.query_cache[query_cache_key] = results
|
||||||
|
|
||||||
user_state = {
|
update_telemetry_state(
|
||||||
"client_host": request.client.host if request.client else "unknown",
|
request=request,
|
||||||
"user_agent": user_agent or "unknown",
|
telemetry_type="api",
|
||||||
"referer": referer or "unknown",
|
api="search",
|
||||||
"host": host or "unknown",
|
client=client,
|
||||||
}
|
user_agent=user_agent,
|
||||||
|
referer=referer,
|
||||||
|
host=host,
|
||||||
|
)
|
||||||
|
|
||||||
# Only log telemetry if query is new and not a continuation of previous query
|
|
||||||
if state.previous_query is None or state.previous_query not in user_query:
|
|
||||||
state.telemetry += [
|
|
||||||
log_telemetry(
|
|
||||||
telemetry_type="api", api="search", client=client, app_config=state.config.app, properties=user_state
|
|
||||||
)
|
|
||||||
]
|
|
||||||
state.previous_query = user_query
|
state.previous_query = user_query
|
||||||
|
|
||||||
end_time = time.time()
|
end_time = time.time()
|
||||||
@@ -425,18 +519,15 @@ def update(
|
|||||||
else:
|
else:
|
||||||
logger.info("📬 Processor reconfigured via API")
|
logger.info("📬 Processor reconfigured via API")
|
||||||
|
|
||||||
user_state = {
|
update_telemetry_state(
|
||||||
"client_host": request.client.host if request.client else None,
|
request=request,
|
||||||
"user_agent": user_agent or "unknown",
|
telemetry_type="api",
|
||||||
"referer": referer or "unknown",
|
api="update",
|
||||||
"host": host or "unknown",
|
client=client,
|
||||||
}
|
user_agent=user_agent,
|
||||||
|
referer=referer,
|
||||||
state.telemetry += [
|
host=host,
|
||||||
log_telemetry(
|
)
|
||||||
telemetry_type="api", api="update", client=client, app_config=state.config.app, properties=user_state
|
|
||||||
)
|
|
||||||
]
|
|
||||||
|
|
||||||
return {"status": "ok", "message": "khoj reloaded"}
|
return {"status": "ok", "message": "khoj reloaded"}
|
||||||
|
|
||||||
@@ -454,18 +545,15 @@ def chat_history(
|
|||||||
# Load Conversation History
|
# Load Conversation History
|
||||||
meta_log = state.processor_config.conversation.meta_log
|
meta_log = state.processor_config.conversation.meta_log
|
||||||
|
|
||||||
user_state = {
|
update_telemetry_state(
|
||||||
"client_host": request.client.host if request.client else None,
|
request=request,
|
||||||
"user_agent": user_agent or "unknown",
|
telemetry_type="api",
|
||||||
"referer": referer or "unknown",
|
api="chat",
|
||||||
"host": host or "unknown",
|
client=client,
|
||||||
}
|
user_agent=user_agent,
|
||||||
|
referer=referer,
|
||||||
state.telemetry += [
|
host=host,
|
||||||
log_telemetry(
|
)
|
||||||
telemetry_type="api", api="chat", client=client, app_config=state.config.app, properties=user_state
|
|
||||||
)
|
|
||||||
]
|
|
||||||
|
|
||||||
return {"status": "ok", "response": meta_log.get("chat", [])}
|
return {"status": "ok", "response": meta_log.get("chat", [])}
|
||||||
|
|
||||||
@@ -509,18 +597,15 @@ async def chat(
|
|||||||
|
|
||||||
response_obj = {"response": actual_response, "context": compiled_references}
|
response_obj = {"response": actual_response, "context": compiled_references}
|
||||||
|
|
||||||
user_state = {
|
update_telemetry_state(
|
||||||
"client_host": request.client.host if request.client else None,
|
request=request,
|
||||||
"user_agent": user_agent or "unknown",
|
telemetry_type="api",
|
||||||
"referer": referer or "unknown",
|
api="chat",
|
||||||
"host": host or "unknown",
|
client=client,
|
||||||
}
|
user_agent=user_agent,
|
||||||
|
referer=referer,
|
||||||
state.telemetry += [
|
host=host,
|
||||||
log_telemetry(
|
)
|
||||||
telemetry_type="api", api="chat", client=client, app_config=state.config.app, properties=user_state
|
|
||||||
)
|
|
||||||
]
|
|
||||||
|
|
||||||
return Response(content=json.dumps(response_obj), media_type="application/json", status_code=200)
|
return Response(content=json.dumps(response_obj), media_type="application/json", status_code=200)
|
||||||
|
|
||||||
|
|||||||
@@ -1,11 +1,12 @@
|
|||||||
from fastapi import HTTPException
|
|
||||||
import logging
|
import logging
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from typing import List
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from fastapi import HTTPException, Request
|
||||||
|
|
||||||
from khoj.utils import state
|
from khoj.utils import state
|
||||||
from khoj.utils.helpers import timer
|
from khoj.utils.helpers import timer, log_telemetry
|
||||||
from khoj.processor.conversation.gpt import converse
|
from khoj.processor.conversation.gpt import converse
|
||||||
from khoj.processor.conversation.utils import message_to_log, reciprocal_conversation_to_chatml
|
from khoj.processor.conversation.utils import message_to_log, reciprocal_conversation_to_chatml
|
||||||
|
|
||||||
@@ -24,6 +25,33 @@ def perform_chat_checks():
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def update_telemetry_state(
|
||||||
|
request: Request,
|
||||||
|
telemetry_type: str,
|
||||||
|
api: str,
|
||||||
|
client: Optional[str] = None,
|
||||||
|
user_agent: Optional[str] = None,
|
||||||
|
referer: Optional[str] = None,
|
||||||
|
host: Optional[str] = None,
|
||||||
|
metadata: Optional[dict] = None,
|
||||||
|
):
|
||||||
|
user_state = {
|
||||||
|
"client_host": request.client.host if request.client else None,
|
||||||
|
"user_agent": user_agent or "unknown",
|
||||||
|
"referer": referer or "unknown",
|
||||||
|
"host": host or "unknown",
|
||||||
|
}
|
||||||
|
|
||||||
|
if metadata:
|
||||||
|
user_state.update(metadata)
|
||||||
|
|
||||||
|
state.telemetry += [
|
||||||
|
log_telemetry(
|
||||||
|
telemetry_type=telemetry_type, api=api, client=client, app_config=state.config.app, properties=user_state
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def generate_chat_response(
|
def generate_chat_response(
|
||||||
q: str,
|
q: str,
|
||||||
meta_log: dict,
|
meta_log: dict,
|
||||||
|
|||||||
Reference in New Issue
Block a user