Improve agent creation safety checker with response schema, better prompt

2026-03-02 21:19:12 +00:00 · 2025-04-03 02:49:40 +05:30
parent aab010723c
commit f77e871cc8
2 changed files with 10 additions and 2 deletions
--- a/src/khoj/processor/conversation/prompts.py
+++ b/src/khoj/processor/conversation/prompts.py
@@ -1252,6 +1252,7 @@ A: {{ "safe": "False", "reason": "The prompt contains sexual content that could
 Q: You are an astute financial analyst. Assess my financial situation and provide advice.
 A: {{ "safe": "True" }}

+# Actual:
 Q: {prompt}
 A:
 """.strip()
@@ -1287,6 +1288,7 @@ A: {{ "safe": "False", "reason": "The prompt contains content that could be cons
 Q: You are a great analyst. Assess my financial situation and provide advice.
 A: {{ "safe": "True" }}

+# Actual:
 Q: {prompt}
 A:
 """.strip()
--- a/src/khoj/routers/helpers.py
+++ b/src/khoj/routers/helpers.py
@@ -321,13 +321,19 @@ async def acheck_if_safe_prompt(system_prompt: str, user: KhojUser = None, lax:
    is_safe = True
    reason = ""

+    class SafetyCheck(BaseModel):
+        safe: bool
+        reason: str
+
    with timer("Chat actor: Check if safe prompt", logger):
-        response = await send_message_to_model_wrapper(safe_prompt_check, user=user)
+        response = await send_message_to_model_wrapper(
+            safe_prompt_check, user=user, response_type="json_object", response_schema=SafetyCheck
+        )

        response = response.strip()
        try:
            response = json.loads(clean_json(response))
-            is_safe = response.get("safe", "True") == "True"
+            is_safe = str(response.get("safe", "true")).lower() == "true"
            if not is_safe:
                reason = response.get("reason", "")
        except Exception: