mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-07 21:29:13 +00:00
Allow using OpenAI models for search in Khoj
- Init processor before search to instantiate `openai_api_key' from `khoj.yml'. The key is used to configure search with openai models - To use OpenAI models for search in Khoj - Set `encoder' to name of an OpenAI model. E.g text-embedding-ada-002 - Set `encoder-type' in `khoj.yml' to `src.utils.models.OpenAI' - Set `model-directory' to `null', as online model cannot be stored on disk
This commit is contained in:
@@ -34,14 +34,14 @@ def configure_server(args, required=False):
|
|||||||
else:
|
else:
|
||||||
state.config = args.config
|
state.config = args.config
|
||||||
|
|
||||||
|
# Initialize Processor from Config
|
||||||
|
state.processor_config = configure_processor(args.config.processor)
|
||||||
|
|
||||||
# Initialize the search model from Config
|
# Initialize the search model from Config
|
||||||
state.search_index_lock.acquire()
|
state.search_index_lock.acquire()
|
||||||
state.model = configure_search(state.model, state.config, args.regenerate)
|
state.model = configure_search(state.model, state.config, args.regenerate)
|
||||||
state.search_index_lock.release()
|
state.search_index_lock.release()
|
||||||
|
|
||||||
# Initialize Processor from Config
|
|
||||||
state.processor_config = configure_processor(args.config.processor)
|
|
||||||
|
|
||||||
|
|
||||||
@schedule.repeat(schedule.every(1).hour)
|
@schedule.repeat(schedule.every(1).hour)
|
||||||
def update_search_index():
|
def update_search_index():
|
||||||
|
|||||||
28
src/utils/models.py
Normal file
28
src/utils/models.py
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
# External Packages
|
||||||
|
import openai
|
||||||
|
import torch
|
||||||
|
from tqdm import trange
|
||||||
|
|
||||||
|
# Internal Packages
|
||||||
|
from src.utils.state import processor_config
|
||||||
|
|
||||||
|
|
||||||
|
class OpenAI:
|
||||||
|
def __init__(self, model_name, device=None):
|
||||||
|
self.model_name = model_name
|
||||||
|
openai.api_key = processor_config.conversation.openai_api_key
|
||||||
|
self.embedding_dimensions = 1536 # Default to embedding dimensions of text-embedding-ada-002 model
|
||||||
|
|
||||||
|
def encode(self, entries, device=None, **kwargs):
|
||||||
|
embedding_tensors = []
|
||||||
|
for index in trange(0, len(entries)):
|
||||||
|
try:
|
||||||
|
response = openai.Embedding.create(input=entries[index], model=self.model_name)
|
||||||
|
embedding_tensors += [torch.tensor(response.data[0].embedding, device=device)]
|
||||||
|
self.embedding_dimensions = len(response.data[0].embedding) # Set embedding dimensions to this model's
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Failed to encode entry {index} of length: {len(entries[index])}\n\n{entries[index][:1000]}...\n\n{e}")
|
||||||
|
embedding_tensors += [torch.zeros(self.embedding_dimensions, device=device)]
|
||||||
|
return torch.stack(embedding_tensors)
|
||||||
|
|
||||||
|
|
||||||
Reference in New Issue
Block a user