diff --git a/src/interface/web/app/common/layoutHelper.tsx b/src/interface/web/app/common/layoutHelper.tsx index 5c34e641..2d5159d4 100644 --- a/src/interface/web/app/common/layoutHelper.tsx +++ b/src/interface/web/app/common/layoutHelper.tsx @@ -9,7 +9,7 @@ export function ContentSecurityPolicy() { style-src 'self' https://assets.khoj.dev 'unsafe-inline' https://fonts.googleapis.com https://accounts.google.com; img-src 'self' data: blob: https://*.khoj.dev https://accounts.google.com https://*.googleusercontent.com https://*.google.com/ https://*.gstatic.com; font-src 'self' https://assets.khoj.dev https://fonts.gstatic.com; - frame-src 'self' https://accounts.google.com; + frame-src 'self' https://accounts.google.com https://app.chatwoot.com; child-src 'self' https://app.chatwoot.com; object-src 'none';" > diff --git a/src/khoj/processor/conversation/openai/utils.py b/src/khoj/processor/conversation/openai/utils.py index 82f68259..8af836f1 100644 --- a/src/khoj/processor/conversation/openai/utils.py +++ b/src/khoj/processor/conversation/openai/utils.py @@ -58,13 +58,17 @@ def completion_with_backoff( openai_clients[client_key] = client formatted_messages = [{"role": message.role, "content": message.content} for message in messages] - stream = True # Update request parameters for compatability with o1 model series # Refer: https://platform.openai.com/docs/guides/reasoning/beta-limitations - if model_name.startswith("o1"): + stream = True + model_kwargs["stream_options"] = {"include_usage": True} + if model_name == "o1": + temperature = 1 + stream = False + model_kwargs.pop("stream_options", None) + elif model_name.startswith("o1"): temperature = 1 - model_kwargs.pop("stop", None) model_kwargs.pop("response_format", None) if os.getenv("KHOJ_LLM_SEED"): @@ -74,7 +78,6 @@ def completion_with_backoff( messages=formatted_messages, # type: ignore model=model_name, # type: ignore stream=stream, - stream_options={"include_usage": True} if stream else {}, temperature=temperature, timeout=20, **model_kwargs, @@ -165,13 +168,17 @@ def llm_thread( client = openai_clients[client_key] formatted_messages = [{"role": message.role, "content": message.content} for message in messages] - stream = True # Update request parameters for compatability with o1 model series # Refer: https://platform.openai.com/docs/guides/reasoning/beta-limitations - if model_name.startswith("o1"): + stream = True + model_kwargs["stream_options"] = {"include_usage": True} + if model_name == "o1": + temperature = 1 + stream = False + model_kwargs.pop("stream_options", None) + elif model_name.startswith("o1-"): temperature = 1 - model_kwargs.pop("stop", None) model_kwargs.pop("response_format", None) if os.getenv("KHOJ_LLM_SEED"): @@ -181,7 +188,6 @@ def llm_thread( messages=formatted_messages, model=model_name, # type: ignore stream=stream, - stream_options={"include_usage": True} if stream else {}, temperature=temperature, timeout=20, **model_kwargs, diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py index 64a46efc..883650ec 100644 --- a/src/khoj/processor/conversation/utils.py +++ b/src/khoj/processor/conversation/utils.py @@ -34,7 +34,6 @@ from khoj.search_filter.word_filter import WordFilter from khoj.utils import state from khoj.utils.helpers import ( ConversationCommand, - in_debug_mode, is_none_or_empty, is_promptrace_enabled, merge_dicts, @@ -47,28 +46,27 @@ logger = logging.getLogger(__name__) try: from git import Repo except ImportError: - if in_debug_mode(): - logger.warning("GitPython not installed. `pip install gitpython` to enable prompt tracer.") + if is_promptrace_enabled(): + logger.warning("GitPython not installed. `pip install gitpython` to use prompt tracer.") model_to_prompt_size = { # OpenAI Models - "gpt-4o": 20000, - "gpt-4o-mini": 20000, - "o1-preview": 20000, - "o1-mini": 20000, + "gpt-4o": 60000, + "gpt-4o-mini": 60000, + "o1": 20000, + "o1-mini": 60000, # Google Models - "gemini-1.5-flash": 20000, - "gemini-1.5-pro": 20000, + "gemini-1.5-flash": 60000, + "gemini-1.5-pro": 60000, # Anthropic Models - "claude-3-5-sonnet-20241022": 20000, - "claude-3-5-haiku-20241022": 20000, + "claude-3-5-sonnet-20241022": 60000, + "claude-3-5-haiku-20241022": 60000, # Offline Models - "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF": 20000, + "Qwen/Qwen2.5-14B-Instruct-GGUF": 20000, "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF": 20000, "bartowski/Llama-3.2-3B-Instruct-GGUF": 20000, "bartowski/gemma-2-9b-it-GGUF": 6000, "bartowski/gemma-2-2b-it-GGUF": 6000, - "Qwen/Qwen2.5-14B-Instruct-GGUF": 20000, } model_to_tokenizer: Dict[str, str] = {} diff --git a/src/khoj/utils/constants.py b/src/khoj/utils/constants.py index f2ab40c6..59534895 100644 --- a/src/khoj/utils/constants.py +++ b/src/khoj/utils/constants.py @@ -38,7 +38,7 @@ model_to_cost: Dict[str, Dict[str, float]] = { # OpenAI Pricing: https://openai.com/api/pricing/ "gpt-4o": {"input": 2.50, "output": 10.00}, "gpt-4o-mini": {"input": 0.15, "output": 0.60}, - "o1-preview": {"input": 15.0, "output": 60.00}, + "o1": {"input": 15.0, "output": 60.00}, "o1-mini": {"input": 3.0, "output": 12.0}, # Gemini Pricing: https://ai.google.dev/pricing "gemini-1.5-flash": {"input": 0.075, "output": 0.30},