Improve agent creation safety checker with response schema, better prompt

This commit is contained in:
Debanjum
2025-04-03 02:49:40 +05:30
parent aab010723c
commit f77e871cc8
2 changed files with 10 additions and 2 deletions

View File

@@ -1252,6 +1252,7 @@ A: {{ "safe": "False", "reason": "The prompt contains sexual content that could
Q: You are an astute financial analyst. Assess my financial situation and provide advice.
A: {{ "safe": "True" }}
# Actual:
Q: {prompt}
A:
""".strip()
@@ -1287,6 +1288,7 @@ A: {{ "safe": "False", "reason": "The prompt contains content that could be cons
Q: You are a great analyst. Assess my financial situation and provide advice.
A: {{ "safe": "True" }}
# Actual:
Q: {prompt}
A:
""".strip()

View File

@@ -321,13 +321,19 @@ async def acheck_if_safe_prompt(system_prompt: str, user: KhojUser = None, lax:
is_safe = True
reason = ""
class SafetyCheck(BaseModel):
safe: bool
reason: str
with timer("Chat actor: Check if safe prompt", logger):
response = await send_message_to_model_wrapper(safe_prompt_check, user=user)
response = await send_message_to_model_wrapper(
safe_prompt_check, user=user, response_type="json_object", response_schema=SafetyCheck
)
response = response.strip()
try:
response = json.loads(clean_json(response))
is_safe = response.get("safe", "True") == "True"
is_safe = str(response.get("safe", "true")).lower() == "true"
if not is_safe:
reason = response.get("reason", "")
except Exception: