mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-07 21:29:13 +00:00
Use groq with service tier auto to fallback to flex on rate limit
Merge gpt-oss config with openai reasoning config as similar tuning. Add pricing for gpt oss 20b model
This commit is contained in:
@@ -145,11 +145,8 @@ def completion_with_backoff(
|
||||
# See https://qwenlm.github.io/blog/qwen3/#advanced-usages
|
||||
if not deepthought:
|
||||
add_qwen_no_think_tag(formatted_messages)
|
||||
elif "gpt-oss" in model_name.lower():
|
||||
model_kwargs["temperature"] = 1
|
||||
reasoning_effort = "medium" if deepthought else "low"
|
||||
model_kwargs["reasoning_effort"] = reasoning_effort
|
||||
model_kwargs["top_p"] = 1.0
|
||||
elif is_groq_api(api_base_url):
|
||||
model_kwargs["service_tier"] = "auto"
|
||||
|
||||
read_timeout = 300 if is_local_api(api_base_url) else 60
|
||||
if os.getenv("KHOJ_LLM_SEED"):
|
||||
@@ -355,11 +352,8 @@ async def chat_completion_with_backoff(
|
||||
# See https://qwenlm.github.io/blog/qwen3/#advanced-usages
|
||||
if not deepthought:
|
||||
add_qwen_no_think_tag(formatted_messages)
|
||||
elif "gpt-oss" in model_name.lower():
|
||||
temperature = 1
|
||||
reasoning_effort = "medium" if deepthought else "low"
|
||||
model_kwargs["reasoning_effort"] = reasoning_effort
|
||||
model_kwargs["top_p"] = 1.0
|
||||
elif is_groq_api(api_base_url):
|
||||
model_kwargs["service_tier"] = "auto"
|
||||
|
||||
read_timeout = 300 if is_local_api(api_base_url) else 60
|
||||
if os.getenv("KHOJ_LLM_SEED"):
|
||||
@@ -854,8 +848,10 @@ def is_openai_reasoning_model(model_name: str, api_base_url: str = None) -> bool
|
||||
"""
|
||||
Check if the model is an OpenAI reasoning model
|
||||
"""
|
||||
return is_openai_api(api_base_url) and (
|
||||
model_name.lower().startswith("o") or model_name.lower().startswith("gpt-5")
|
||||
return (
|
||||
is_openai_api(api_base_url)
|
||||
and (model_name.lower().startswith("o") or model_name.lower().startswith("gpt-5"))
|
||||
or model_name.lower().startswith("gpt-oss")
|
||||
)
|
||||
|
||||
|
||||
@@ -879,6 +875,13 @@ def is_twitter_reasoning_model(model_name: str, api_base_url: str = None) -> boo
|
||||
)
|
||||
|
||||
|
||||
def is_groq_api(api_base_url: str = None) -> bool:
|
||||
"""
|
||||
Check if the model is served over the Groq API
|
||||
"""
|
||||
return api_base_url is not None and api_base_url.startswith("https://api.groq.com")
|
||||
|
||||
|
||||
def is_qwen_style_reasoning_model(model_name: str, api_base_url: str = None) -> bool:
|
||||
"""
|
||||
Check if the model is a Qwen style reasoning model
|
||||
|
||||
@@ -75,4 +75,5 @@ model_to_cost: Dict[str, Dict[str, float]] = {
|
||||
# Groq pricing
|
||||
"moonshotai/kimi-k2-instruct": {"input": 1.00, "output": 3.00},
|
||||
"openai/gpt-oss-120b": {"input": 0.15, "output": 0.75},
|
||||
"openai/gpt-oss-20b": {"input": 0.10, "output": 0.50},
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user