Improve safety settings for Gemini chat models

- Align remaining harm categories to only refuse in high harm
  scenarios as well
- Handle response for new "negligible" harm probability as well
This commit is contained in:
Debanjum
2025-03-27 13:48:45 +05:30
parent 2ec5cf3ae7
commit ccd9de7792

View File

@@ -52,6 +52,10 @@ SAFETY_SETTINGS = [
category=gtypes.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
threshold=gtypes.HarmBlockThreshold.BLOCK_ONLY_HIGH,
),
gtypes.SafetySetting(
category=gtypes.HarmCategory.HARM_CATEGORY_CIVIC_INTEGRITY,
threshold=gtypes.HarmBlockThreshold.BLOCK_ONLY_HIGH,
),
]
@@ -246,6 +250,7 @@ def generate_safety_response(safety_ratings: list[gtypes.SafetyRating]):
# Add a bit of variety to the discomfort level based on the safety rating probability
discomfort_level = {
gtypes.HarmProbability.HARM_PROBABILITY_UNSPECIFIED: " ",
gtypes.HarmProbability.NEGLIGIBLE: "a little ",
gtypes.HarmProbability.LOW: "a bit ",
gtypes.HarmProbability.MEDIUM: "moderately ",
gtypes.HarmProbability.HIGH: random.choice(["very ", "quite ", "fairly "]),