Set effort for openai reasoning models to pick tool in research mode

This is analogous to how we enable extended thinking for claude models
in research mode.

Default to medium effort irrespective of deepthought for openai
reasoning models as high effort is currently flaky with regular
timeouts and low effort isn't great.
This commit is contained in:
Debanjum
2025-03-24 07:02:57 +05:30
parent b4929905b2
commit 39aa48738f
3 changed files with 14 additions and 4 deletions

View File

@@ -124,6 +124,7 @@ def send_message_to_model(
model,
response_type="text",
response_schema=None,
deepthought=False,
api_base_url=None,
tracer: dict = {},
):
@@ -144,6 +145,7 @@ def send_message_to_model(
model_name=model,
openai_api_key=api_key,
api_base_url=api_base_url,
deepthought=deepthought,
model_kwargs=model_kwargs,
tracer=tracer,
)
@@ -172,6 +174,7 @@ def converse_openai(
generated_files: List[FileAttachment] = None,
generated_asset_results: Dict[str, Dict] = {},
program_execution_context: List[str] = None,
deepthought: Optional[bool] = False,
tracer: dict = {},
):
"""
@@ -250,6 +253,7 @@ def converse_openai(
openai_api_key=api_key,
api_base_url=api_base_url,
completion_func=completion_func,
deepthought=deepthought,
model_kwargs={"stop": ["Notes:\n["]},
tracer=tracer,
)

View File

@@ -51,6 +51,7 @@ def completion_with_backoff(
temperature=0.8,
openai_api_key=None,
api_base_url=None,
deepthought: bool = False,
model_kwargs: dict = {},
tracer: dict = {},
) -> str:
@@ -128,13 +129,14 @@ def chat_completion_with_backoff(
openai_api_key=None,
api_base_url=None,
completion_func=None,
deepthought=False,
model_kwargs=None,
tracer: dict = {},
):
g = ThreadedGenerator(compiled_references, online_results, completion_func=completion_func)
t = Thread(
target=llm_thread,
args=(g, messages, model_name, temperature, openai_api_key, api_base_url, model_kwargs, tracer),
args=(g, messages, model_name, temperature, openai_api_key, api_base_url, deepthought, model_kwargs, tracer),
)
t.start()
return g
@@ -147,6 +149,7 @@ def llm_thread(
temperature,
openai_api_key=None,
api_base_url=None,
deepthought=False,
model_kwargs: dict = {},
tracer: dict = {},
):
@@ -160,10 +163,11 @@ def llm_thread(
formatted_messages = [{"role": message.role, "content": message.content} for message in messages]
# Tune reasoning models arguments
if model_name.startswith("o1"):
temperature = 1
elif model_name.startswith("o3"):
if model_name.startswith("o1") or model_name.startswith("o3"):
temperature = 1
model_kwargs["reasoning_effort"] = "medium"
if model_name.startswith("o3"):
# Get the first system message and add the string `Formatting re-enabled` to it.
# See https://platform.openai.com/docs/guides/reasoning-best-practices
if len(formatted_messages) > 0:

View File

@@ -1215,6 +1215,7 @@ async def send_message_to_model_wrapper(
model=chat_model_name,
response_type=response_type,
response_schema=response_schema,
deepthought=deepthought,
api_base_url=api_base_url,
tracer=tracer,
)
@@ -1511,6 +1512,7 @@ def generate_chat_response(
generated_files=raw_generated_files,
generated_asset_results=generated_asset_results,
program_execution_context=program_execution_context,
deepthought=deepthought,
tracer=tracer,
)