From 8e77b3dc82c9d61f4871370bfbc8a73077df8f7f Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Tue, 23 Apr 2024 23:29:15 +0530 Subject: [PATCH] Fix infer_max_tokens func when configured_max_tokens is set to None --- src/khoj/processor/conversation/offline/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/khoj/processor/conversation/offline/utils.py b/src/khoj/processor/conversation/offline/utils.py index 24d39ca4..c43c7353 100644 --- a/src/khoj/processor/conversation/offline/utils.py +++ b/src/khoj/processor/conversation/offline/utils.py @@ -65,8 +65,9 @@ def load_model_from_cache(repo_id: str, filename: str, repo_type="models"): return None -def infer_max_tokens(model_context_window: int, configured_max_tokens=math.inf) -> int: +def infer_max_tokens(model_context_window: int, configured_max_tokens=None) -> int: """Infer max prompt size based on device memory and max context window supported by the model""" configured_max_tokens = math.inf if configured_max_tokens is None else configured_max_tokens vram_based_n_ctx = int(get_device_memory() / 2e6) # based on heuristic + configured_max_tokens = configured_max_tokens or math.inf # do not use if set to None return min(configured_max_tokens, vram_based_n_ctx, model_context_window)