From 02086888014e65e816981bb3f3c66c4d562624fd Mon Sep 17 00:00:00 2001 From: sabaimran Date: Tue, 16 Apr 2024 22:48:45 +0530 Subject: [PATCH] Increase factor for n_ctx reduciton to 2e6 --- src/khoj/processor/conversation/offline/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/khoj/processor/conversation/offline/utils.py b/src/khoj/processor/conversation/offline/utils.py index c2b08bfa..4a7c69a9 100644 --- a/src/khoj/processor/conversation/offline/utils.py +++ b/src/khoj/processor/conversation/offline/utils.py @@ -67,7 +67,7 @@ def load_model_from_cache(repo_id: str, filename: str, repo_type="models"): def infer_max_tokens(model_context_window: int, configured_max_tokens=math.inf) -> int: """Infer max prompt size based on device memory and max context window supported by the model""" - vram_based_n_ctx = int(get_device_memory() / 1e6) # based on heuristic + vram_based_n_ctx = int(get_device_memory() / 2e6) # based on heuristic if configured_max_tokens: return min(configured_max_tokens, model_context_window) else: