Upgrade default offline chat model to llama 3.1

This commit is contained in:
Debanjum Singh Solanky
2024-08-16 07:58:04 -05:00
parent acdc3f9470
commit 58c8068079
12 changed files with 30 additions and 12 deletions

View File

@@ -0,0 +1,17 @@
# Generated by Django 5.0.7 on 2024-08-19 12:37
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("database", "0057_remove_serverchatsettings_default_model_and_more"),
]
operations = [
migrations.AlterField(
model_name="chatmodeloptions",
name="chat_model",
field=models.CharField(default="bartowski/Meta-Llama-3.1-8B-Instruct-GGUF", max_length=200),
),
]

View File

@@ -91,7 +91,7 @@ class ChatModelOptions(BaseModel):
max_prompt_size = models.IntegerField(default=None, null=True, blank=True)
subscribed_max_prompt_size = models.IntegerField(default=None, null=True, blank=True)
tokenizer = models.CharField(max_length=200, default=None, null=True, blank=True)
chat_model = models.CharField(max_length=200, default="NousResearch/Hermes-2-Pro-Mistral-7B-GGUF")
chat_model = models.CharField(max_length=200, default="bartowski/Meta-Llama-3.1-8B-Instruct-GGUF")
model_type = models.CharField(max_length=200, choices=ModelType.choices, default=ModelType.OFFLINE)
openai_config = models.ForeignKey(
OpenAIProcessorConversationConfig, on_delete=models.CASCADE, default=None, null=True, blank=True

View File

@@ -24,7 +24,7 @@ logger = logging.getLogger(__name__)
def extract_questions_offline(
text: str,
model: str = "NousResearch/Hermes-2-Pro-Mistral-7B-GGUF",
model: str = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
loaded_model: Union[Any, None] = None,
conversation_log={},
use_history: bool = True,
@@ -141,7 +141,7 @@ def converse_offline(
references=[],
online_results=[],
conversation_log={},
model: str = "NousResearch/Hermes-2-Pro-Mistral-7B-GGUF",
model: str = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
loaded_model: Union[Any, None] = None,
completion_func=None,
conversation_commands=[ConversationCommand.Default],
@@ -240,7 +240,7 @@ def llm_thread(g, messages: List[ChatMessage], model: Any, max_prompt_size: int
def send_message_to_model_offline(
messages: List[ChatMessage],
loaded_model=None,
model="NousResearch/Hermes-2-Pro-Mistral-7B-GGUF",
model="bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
temperature: float = 0.2,
streaming=False,
stop=[],

View File

@@ -75,6 +75,6 @@ def load_model_from_cache(repo_id: str, filename: str, repo_type="models"):
def infer_max_tokens(model_context_window: int, configured_max_tokens=None) -> int:
"""Infer max prompt size based on device memory and max context window supported by the model"""
configured_max_tokens = math.inf if configured_max_tokens is None else configured_max_tokens
vram_based_n_ctx = int(get_device_memory() / 2e6) # based on heuristic
vram_based_n_ctx = int(get_device_memory() / 1e6) # based on heuristic
configured_max_tokens = configured_max_tokens or math.inf # do not use if set to None
return min(configured_max_tokens, vram_based_n_ctx, model_context_window)

View File

@@ -25,6 +25,7 @@ model_to_prompt_size = {
"gpt-4-turbo-preview": 20000,
"TheBloke/Mistral-7B-Instruct-v0.2-GGUF": 3500,
"NousResearch/Hermes-2-Pro-Mistral-7B-GGUF": 3500,
"bartowski/Meta-Llama-3.1-8B-Instruct-GGUF": 20000,
}
model_to_tokenizer: Dict[str, str] = {}

View File

@@ -70,7 +70,7 @@ class OfflineChatProcessorConfig:
class OfflineChatProcessorModel:
def __init__(self, chat_model: str = "NousResearch/Hermes-2-Pro-Mistral-7B-GGUF", max_tokens: int = None):
def __init__(self, chat_model: str = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF", max_tokens: int = None):
self.chat_model = chat_model
self.loaded_model = None
try:

View File

@@ -8,7 +8,7 @@ empty_escape_sequences = "\n|\r|\t| "
app_env_filepath = "~/.khoj/env"
telemetry_server = "https://khoj.beta.haletic.com/v1/telemetry"
content_directory = "~/.khoj/content/"
default_offline_chat_model = "NousResearch/Hermes-2-Pro-Mistral-7B-GGUF"
default_offline_chat_model = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF"
default_online_chat_model = "gpt-4-turbo-preview"
empty_config = {

View File

@@ -93,7 +93,7 @@ class OpenAIProcessorConfig(ConfigBase):
class OfflineChatProcessorConfig(ConfigBase):
chat_model: Optional[str] = "NousResearch/Hermes-2-Pro-Mistral-7B-GGUF"
chat_model: Optional[str] = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF"
class ConversationProcessorConfig(ConfigBase):