mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-08 05:39:13 +00:00
Reduce retry count and request timeout for faster response or failure
- Fix bug where both LangChain and Khoj retry requests 6 times each. So a total of 12 requests at >1minute intervals for each chat response in case of OpenAI API being down - Retrying too many times when the API is failing doesn't help - The earlier 60 second request timeout was spacing out the interval between retries way too much. This slowed down chat response times quite a bit when API was being flaky - With these updates you'll know if call to chat API failed in under a minute
This commit is contained in:
@@ -34,15 +34,15 @@ max_prompt_size = {"gpt-3.5-turbo": 4096, "gpt-4": 8192}
|
|||||||
| retry_if_exception_type(openai.error.RateLimitError)
|
| retry_if_exception_type(openai.error.RateLimitError)
|
||||||
| retry_if_exception_type(openai.error.ServiceUnavailableError)
|
| retry_if_exception_type(openai.error.ServiceUnavailableError)
|
||||||
),
|
),
|
||||||
wait=wait_random_exponential(min=1, max=30),
|
wait=wait_random_exponential(min=1, max=10),
|
||||||
stop=stop_after_attempt(6),
|
stop=stop_after_attempt(3),
|
||||||
before_sleep=before_sleep_log(logger, logging.DEBUG),
|
before_sleep=before_sleep_log(logger, logging.DEBUG),
|
||||||
reraise=True,
|
reraise=True,
|
||||||
)
|
)
|
||||||
def completion_with_backoff(**kwargs):
|
def completion_with_backoff(**kwargs):
|
||||||
prompt = kwargs.pop("prompt")
|
prompt = kwargs.pop("prompt")
|
||||||
kwargs["openai_api_key"] = kwargs["api_key"] if kwargs.get("api_key") else os.getenv("OPENAI_API_KEY")
|
kwargs["openai_api_key"] = kwargs["api_key"] if kwargs.get("api_key") else os.getenv("OPENAI_API_KEY")
|
||||||
llm = OpenAI(**kwargs, request_timeout=60)
|
llm = OpenAI(**kwargs, request_timeout=10, max_retries=1)
|
||||||
return llm(prompt)
|
return llm(prompt)
|
||||||
|
|
||||||
|
|
||||||
@@ -55,7 +55,7 @@ def completion_with_backoff(**kwargs):
|
|||||||
| retry_if_exception_type(openai.error.ServiceUnavailableError)
|
| retry_if_exception_type(openai.error.ServiceUnavailableError)
|
||||||
),
|
),
|
||||||
wait=wait_exponential(multiplier=1, min=4, max=10),
|
wait=wait_exponential(multiplier=1, min=4, max=10),
|
||||||
stop=stop_after_attempt(6),
|
stop=stop_after_attempt(3),
|
||||||
before_sleep=before_sleep_log(logger, logging.DEBUG),
|
before_sleep=before_sleep_log(logger, logging.DEBUG),
|
||||||
reraise=True,
|
reraise=True,
|
||||||
)
|
)
|
||||||
@@ -65,7 +65,8 @@ def chat_completion_with_backoff(messages, model, temperature, **kwargs):
|
|||||||
model_name=model,
|
model_name=model,
|
||||||
temperature=temperature,
|
temperature=temperature,
|
||||||
openai_api_key=openai_api_key,
|
openai_api_key=openai_api_key,
|
||||||
request_timeout=60,
|
request_timeout=10,
|
||||||
|
max_retries=1,
|
||||||
)
|
)
|
||||||
return chat(messages).content
|
return chat(messages).content
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user