mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 21:19:12 +00:00
Default to gemini 2.0 flash instead of 1.5 flash on Gemini setup
Add price of gemini 2.0 flash for cost calculations
This commit is contained in:
2
.github/workflows/run_evals.yml
vendored
2
.github/workflows/run_evals.yml
vendored
@@ -147,7 +147,7 @@ jobs:
|
|||||||
echo "## Evaluation Summary of Khoj on ${{ matrix.dataset }} in ${{ matrix.khoj_mode }} mode" >> $GITHUB_STEP_SUMMARY
|
echo "## Evaluation Summary of Khoj on ${{ matrix.dataset }} in ${{ matrix.khoj_mode }} mode" >> $GITHUB_STEP_SUMMARY
|
||||||
echo "**$(head -n 1 *_evaluation_summary_*.txt)**" >> $GITHUB_STEP_SUMMARY
|
echo "**$(head -n 1 *_evaluation_summary_*.txt)**" >> $GITHUB_STEP_SUMMARY
|
||||||
echo "- Khoj Version: ${{ steps.hatch.outputs.version }}" >> $GITHUB_STEP_SUMMARY
|
echo "- Khoj Version: ${{ steps.hatch.outputs.version }}" >> $GITHUB_STEP_SUMMARY
|
||||||
echo "- Chat Model: Gemini 1.5 Flash 002" >> $GITHUB_STEP_SUMMARY
|
echo "- Chat Model: Gemini 2.0 Flash" >> $GITHUB_STEP_SUMMARY
|
||||||
echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
|
echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
|
||||||
tail -n +2 *_evaluation_summary_*.txt >> $GITHUB_STEP_SUMMARY
|
tail -n +2 *_evaluation_summary_*.txt >> $GITHUB_STEP_SUMMARY
|
||||||
echo "" >> $GITHUB_STEP_SUMMARY
|
echo "" >> $GITHUB_STEP_SUMMARY
|
||||||
|
|||||||
@@ -333,7 +333,7 @@ Using Ollama? See the [Ollama Integration](/advanced/ollama) section for more cu
|
|||||||
- Add your [Gemini API key](https://aistudio.google.com/app/apikey)
|
- Add your [Gemini API key](https://aistudio.google.com/app/apikey)
|
||||||
- Give the configuration a friendly name like `Gemini`. Do not configure the API base url.
|
- Give the configuration a friendly name like `Gemini`. Do not configure the API base url.
|
||||||
2. Create a new [chat model](http://localhost:42110/server/admin/database/chatmodel/add)
|
2. Create a new [chat model](http://localhost:42110/server/admin/database/chatmodel/add)
|
||||||
- Set the `chat-model` field to a [Google Gemini chat model](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models). Example: `gemini-1.5-flash`.
|
- Set the `chat-model` field to a [Google Gemini chat model](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models). Example: `gemini-2.0-flash`.
|
||||||
- Set the `model-type` field to `Google`.
|
- Set the `model-type` field to `Google`.
|
||||||
- Set the `ai model api` field to the Gemini AI Model API you created in step 1.
|
- Set the `ai model api` field to the Gemini AI Model API you created in step 1.
|
||||||
|
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
def extract_questions_gemini(
|
def extract_questions_gemini(
|
||||||
text,
|
text,
|
||||||
model: Optional[str] = "gemini-1.5-flash",
|
model: Optional[str] = "gemini-2.0-flash",
|
||||||
conversation_log={},
|
conversation_log={},
|
||||||
api_key=None,
|
api_key=None,
|
||||||
temperature=0,
|
temperature=0,
|
||||||
@@ -132,9 +132,9 @@ def gemini_send_message_to_model(
|
|||||||
|
|
||||||
model_kwargs = {}
|
model_kwargs = {}
|
||||||
|
|
||||||
# Sometimes, this causes unwanted behavior and terminates response early. Disable for now while it's flaky.
|
# This caused unwanted behavior and terminates response early for gemini 1.5 series. Monitor for flakiness with 2.0 series.
|
||||||
# if response_type == "json_object":
|
if response_type == "json_object" and model in ["gemini-2.0-flash"]:
|
||||||
# model_kwargs["response_mime_type"] = "application/json"
|
model_kwargs["response_mime_type"] = "application/json"
|
||||||
|
|
||||||
# Get Response from Gemini
|
# Get Response from Gemini
|
||||||
return gemini_completion_with_backoff(
|
return gemini_completion_with_backoff(
|
||||||
@@ -154,7 +154,7 @@ def converse_gemini(
|
|||||||
online_results: Optional[Dict[str, Dict]] = None,
|
online_results: Optional[Dict[str, Dict]] = None,
|
||||||
code_results: Optional[Dict[str, Dict]] = None,
|
code_results: Optional[Dict[str, Dict]] = None,
|
||||||
conversation_log={},
|
conversation_log={},
|
||||||
model: Optional[str] = "gemini-1.5-flash",
|
model: Optional[str] = "gemini-2.0-flash",
|
||||||
api_key: Optional[str] = None,
|
api_key: Optional[str] = None,
|
||||||
temperature: float = 0.2,
|
temperature: float = 0.2,
|
||||||
completion_func=None,
|
completion_func=None,
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ default_offline_chat_models = [
|
|||||||
"bartowski/Qwen2.5-14B-Instruct-GGUF",
|
"bartowski/Qwen2.5-14B-Instruct-GGUF",
|
||||||
]
|
]
|
||||||
default_openai_chat_models = ["gpt-4o-mini", "gpt-4o"]
|
default_openai_chat_models = ["gpt-4o-mini", "gpt-4o"]
|
||||||
default_gemini_chat_models = ["gemini-1.5-flash", "gemini-1.5-pro"]
|
default_gemini_chat_models = ["gemini-2.0-flash", "gemini-1.5-pro"]
|
||||||
default_anthropic_chat_models = ["claude-3-5-sonnet-20241022", "claude-3-5-haiku-20241022"]
|
default_anthropic_chat_models = ["claude-3-5-sonnet-20241022", "claude-3-5-haiku-20241022"]
|
||||||
|
|
||||||
empty_config = {
|
empty_config = {
|
||||||
@@ -46,6 +46,7 @@ model_to_cost: Dict[str, Dict[str, float]] = {
|
|||||||
"gemini-1.5-flash-002": {"input": 0.075, "output": 0.30},
|
"gemini-1.5-flash-002": {"input": 0.075, "output": 0.30},
|
||||||
"gemini-1.5-pro": {"input": 1.25, "output": 5.00},
|
"gemini-1.5-pro": {"input": 1.25, "output": 5.00},
|
||||||
"gemini-1.5-pro-002": {"input": 1.25, "output": 5.00},
|
"gemini-1.5-pro-002": {"input": 1.25, "output": 5.00},
|
||||||
|
"gemini-2.0-flash": {"input": 0.10, "output": 0.40},
|
||||||
# Anthropic Pricing: https://www.anthropic.com/pricing#anthropic-api_
|
# Anthropic Pricing: https://www.anthropic.com/pricing#anthropic-api_
|
||||||
"claude-3-5-sonnet-20241022": {"input": 3.0, "output": 15.0},
|
"claude-3-5-sonnet-20241022": {"input": 3.0, "output": 15.0},
|
||||||
"claude-3-5-haiku-20241022": {"input": 1.0, "output": 5.0},
|
"claude-3-5-haiku-20241022": {"input": 1.0, "output": 5.0},
|
||||||
|
|||||||
@@ -315,7 +315,7 @@ def chat_client_builder(search_config, user, index_content=True, require_auth=Fa
|
|||||||
if chat_provider == ChatModel.ModelType.OPENAI:
|
if chat_provider == ChatModel.ModelType.OPENAI:
|
||||||
online_chat_model = ChatModelFactory(name="gpt-4o-mini", model_type="openai")
|
online_chat_model = ChatModelFactory(name="gpt-4o-mini", model_type="openai")
|
||||||
elif chat_provider == ChatModel.ModelType.GOOGLE:
|
elif chat_provider == ChatModel.ModelType.GOOGLE:
|
||||||
online_chat_model = ChatModelFactory(name="gemini-1.5-flash", model_type="google")
|
online_chat_model = ChatModelFactory(name="gemini-2.0-flash", model_type="google")
|
||||||
elif chat_provider == ChatModel.ModelType.ANTHROPIC:
|
elif chat_provider == ChatModel.ModelType.ANTHROPIC:
|
||||||
online_chat_model = ChatModelFactory(name="claude-3-5-haiku-20241022", model_type="anthropic")
|
online_chat_model = ChatModelFactory(name="claude-3-5-haiku-20241022", model_type="anthropic")
|
||||||
if online_chat_model:
|
if online_chat_model:
|
||||||
|
|||||||
@@ -629,7 +629,7 @@ def main():
|
|||||||
response_evaluator = evaluate_response_with_mcq_match
|
response_evaluator = evaluate_response_with_mcq_match
|
||||||
elif args.dataset == "math500":
|
elif args.dataset == "math500":
|
||||||
response_evaluator = partial(
|
response_evaluator = partial(
|
||||||
evaluate_response_with_gemini, eval_model=os.getenv("GEMINI_EVAL_MODEL", "gemini-1.5-flash-002")
|
evaluate_response_with_gemini, eval_model=os.getenv("GEMINI_EVAL_MODEL", "gemini-2.0-flash-001")
|
||||||
)
|
)
|
||||||
elif args.dataset == "frames_ir":
|
elif args.dataset == "frames_ir":
|
||||||
response_evaluator = evaluate_response_for_ir
|
response_evaluator = evaluate_response_for_ir
|
||||||
|
|||||||
Reference in New Issue
Block a user