Bump default max prompt size for commercial chat models

This commit is contained in:
Debanjum
2024-12-21 17:31:05 -08:00
parent 37ae48d9cf
commit 8d129c4675

View File

@@ -34,7 +34,6 @@ from khoj.search_filter.word_filter import WordFilter
from khoj.utils import state from khoj.utils import state
from khoj.utils.helpers import ( from khoj.utils.helpers import (
ConversationCommand, ConversationCommand,
in_debug_mode,
is_none_or_empty, is_none_or_empty,
is_promptrace_enabled, is_promptrace_enabled,
merge_dicts, merge_dicts,
@@ -47,28 +46,27 @@ logger = logging.getLogger(__name__)
try: try:
from git import Repo from git import Repo
except ImportError: except ImportError:
if in_debug_mode(): if is_promptrace_enabled():
logger.warning("GitPython not installed. `pip install gitpython` to enable prompt tracer.") logger.warning("GitPython not installed. `pip install gitpython` to use prompt tracer.")
model_to_prompt_size = { model_to_prompt_size = {
# OpenAI Models # OpenAI Models
"gpt-4o": 20000, "gpt-4o": 60000,
"gpt-4o-mini": 20000, "gpt-4o-mini": 60000,
"o1": 20000, "o1": 20000,
"o1-mini": 20000, "o1-mini": 60000,
# Google Models # Google Models
"gemini-1.5-flash": 20000, "gemini-1.5-flash": 60000,
"gemini-1.5-pro": 20000, "gemini-1.5-pro": 60000,
# Anthropic Models # Anthropic Models
"claude-3-5-sonnet-20241022": 20000, "claude-3-5-sonnet-20241022": 60000,
"claude-3-5-haiku-20241022": 20000, "claude-3-5-haiku-20241022": 60000,
# Offline Models # Offline Models
"bartowski/Meta-Llama-3.1-8B-Instruct-GGUF": 20000, "Qwen/Qwen2.5-14B-Instruct-GGUF": 20000,
"bartowski/Meta-Llama-3.1-8B-Instruct-GGUF": 20000, "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF": 20000,
"bartowski/Llama-3.2-3B-Instruct-GGUF": 20000, "bartowski/Llama-3.2-3B-Instruct-GGUF": 20000,
"bartowski/gemma-2-9b-it-GGUF": 6000, "bartowski/gemma-2-9b-it-GGUF": 6000,
"bartowski/gemma-2-2b-it-GGUF": 6000, "bartowski/gemma-2-2b-it-GGUF": 6000,
"Qwen/Qwen2.5-14B-Instruct-GGUF": 20000,
} }
model_to_tokenizer: Dict[str, str] = {} model_to_tokenizer: Dict[str, str] = {}