mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-09 21:29:11 +00:00
Gracefully close thread when there's an exception in the openai llm thread. Closes #894.
This commit is contained in:
@@ -100,34 +100,37 @@ def chat_completion_with_backoff(
|
|||||||
|
|
||||||
|
|
||||||
def llm_thread(g, messages, model_name, temperature, openai_api_key=None, api_base_url=None, model_kwargs=None):
|
def llm_thread(g, messages, model_name, temperature, openai_api_key=None, api_base_url=None, model_kwargs=None):
|
||||||
client_key = f"{openai_api_key}--{api_base_url}"
|
try:
|
||||||
if client_key not in openai_clients:
|
client_key = f"{openai_api_key}--{api_base_url}"
|
||||||
client: openai.OpenAI = openai.OpenAI(
|
if client_key not in openai_clients:
|
||||||
api_key=openai_api_key,
|
client: openai.OpenAI = openai.OpenAI(
|
||||||
base_url=api_base_url,
|
api_key=openai_api_key,
|
||||||
|
base_url=api_base_url,
|
||||||
|
)
|
||||||
|
openai_clients[client_key] = client
|
||||||
|
else:
|
||||||
|
client: openai.OpenAI = openai_clients[client_key]
|
||||||
|
|
||||||
|
formatted_messages = [{"role": message.role, "content": message.content} for message in messages]
|
||||||
|
|
||||||
|
chat = client.chat.completions.create(
|
||||||
|
stream=True,
|
||||||
|
messages=formatted_messages,
|
||||||
|
model=model_name, # type: ignore
|
||||||
|
temperature=temperature,
|
||||||
|
timeout=20,
|
||||||
|
**(model_kwargs or dict()),
|
||||||
)
|
)
|
||||||
openai_clients[client_key] = client
|
|
||||||
else:
|
|
||||||
client: openai.OpenAI = openai_clients[client_key]
|
|
||||||
|
|
||||||
formatted_messages = [{"role": message.role, "content": message.content} for message in messages]
|
for chunk in chat:
|
||||||
|
if len(chunk.choices) == 0:
|
||||||
chat = client.chat.completions.create(
|
continue
|
||||||
stream=True,
|
delta_chunk = chunk.choices[0].delta
|
||||||
messages=formatted_messages,
|
if isinstance(delta_chunk, str):
|
||||||
model=model_name, # type: ignore
|
g.send(delta_chunk)
|
||||||
temperature=temperature,
|
elif delta_chunk.content:
|
||||||
timeout=20,
|
g.send(delta_chunk.content)
|
||||||
**(model_kwargs or dict()),
|
except Exception as e:
|
||||||
)
|
logger.error(f"Error in llm_thread: {e}")
|
||||||
|
finally:
|
||||||
for chunk in chat:
|
g.close()
|
||||||
if len(chunk.choices) == 0:
|
|
||||||
continue
|
|
||||||
delta_chunk = chunk.choices[0].delta
|
|
||||||
if isinstance(delta_chunk, str):
|
|
||||||
g.send(delta_chunk)
|
|
||||||
elif delta_chunk.content:
|
|
||||||
g.send(delta_chunk.content)
|
|
||||||
|
|
||||||
g.close()
|
|
||||||
|
|||||||
Reference in New Issue
Block a user