From 703a7c89c0f493089aae903f62cf66e562e2e48f Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Wed, 31 May 2023 10:52:59 +0530 Subject: [PATCH] Reduce retry count and request timeout for faster response or failure - Fix bug where both LangChain and Khoj retry requests 6 times each. So a total of 12 requests at >1minute intervals for each chat response in case of OpenAI API being down - Retrying too many times when the API is failing doesn't help - The earlier 60 second request timeout was spacing out the interval between retries way too much. This slowed down chat response times quite a bit when API was being flaky - With these updates you'll know if call to chat API failed in under a minute --- src/khoj/processor/conversation/utils.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py index e18f7bbd..f058b877 100644 --- a/src/khoj/processor/conversation/utils.py +++ b/src/khoj/processor/conversation/utils.py @@ -34,15 +34,15 @@ max_prompt_size = {"gpt-3.5-turbo": 4096, "gpt-4": 8192} | retry_if_exception_type(openai.error.RateLimitError) | retry_if_exception_type(openai.error.ServiceUnavailableError) ), - wait=wait_random_exponential(min=1, max=30), - stop=stop_after_attempt(6), + wait=wait_random_exponential(min=1, max=10), + stop=stop_after_attempt(3), before_sleep=before_sleep_log(logger, logging.DEBUG), reraise=True, ) def completion_with_backoff(**kwargs): prompt = kwargs.pop("prompt") kwargs["openai_api_key"] = kwargs["api_key"] if kwargs.get("api_key") else os.getenv("OPENAI_API_KEY") - llm = OpenAI(**kwargs, request_timeout=60) + llm = OpenAI(**kwargs, request_timeout=10, max_retries=1) return llm(prompt) @@ -55,7 +55,7 @@ def completion_with_backoff(**kwargs): | retry_if_exception_type(openai.error.ServiceUnavailableError) ), wait=wait_exponential(multiplier=1, min=4, max=10), - stop=stop_after_attempt(6), + stop=stop_after_attempt(3), before_sleep=before_sleep_log(logger, logging.DEBUG), reraise=True, ) @@ -65,7 +65,8 @@ def chat_completion_with_backoff(messages, model, temperature, **kwargs): model_name=model, temperature=temperature, openai_api_key=openai_api_key, - request_timeout=60, + request_timeout=10, + max_retries=1, ) return chat(messages).content