mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 21:19:12 +00:00
Increase factor for n_ctx reduciton to 2e6
This commit is contained in:
@@ -67,7 +67,7 @@ def load_model_from_cache(repo_id: str, filename: str, repo_type="models"):
|
||||
|
||||
def infer_max_tokens(model_context_window: int, configured_max_tokens=math.inf) -> int:
|
||||
"""Infer max prompt size based on device memory and max context window supported by the model"""
|
||||
vram_based_n_ctx = int(get_device_memory() / 1e6) # based on heuristic
|
||||
vram_based_n_ctx = int(get_device_memory() / 2e6) # based on heuristic
|
||||
if configured_max_tokens:
|
||||
return min(configured_max_tokens, model_context_window)
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user