mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-09 13:25:11 +00:00
Resolve relative file paths to model embeddings in all search types
This commit is contained in:
@@ -15,7 +15,7 @@ import torch
|
|||||||
from sentence_transformers import SentenceTransformer, CrossEncoder, util
|
from sentence_transformers import SentenceTransformer, CrossEncoder, util
|
||||||
|
|
||||||
# Internal Packages
|
# Internal Packages
|
||||||
from utils.helpers import get_absolute_path
|
from utils.helpers import get_absolute_path, resolve_absolute_path
|
||||||
from processor.org_mode.org_to_jsonl import org_to_jsonl
|
from processor.org_mode.org_to_jsonl import org_to_jsonl
|
||||||
|
|
||||||
|
|
||||||
@@ -50,7 +50,7 @@ def extract_entries(notesfile, verbose=0):
|
|||||||
def compute_embeddings(entries, bi_encoder, embeddings_file, regenerate=False, verbose=0):
|
def compute_embeddings(entries, bi_encoder, embeddings_file, regenerate=False, verbose=0):
|
||||||
"Compute (and Save) Embeddings or Load Pre-Computed Embeddings"
|
"Compute (and Save) Embeddings or Load Pre-Computed Embeddings"
|
||||||
# Load pre-computed embeddings from file if exists
|
# Load pre-computed embeddings from file if exists
|
||||||
if embeddings_file.exists() and not regenerate:
|
if resolve_absolute_path(embeddings_file).exists() and not regenerate:
|
||||||
corpus_embeddings = torch.load(get_absolute_path(embeddings_file))
|
corpus_embeddings = torch.load(get_absolute_path(embeddings_file))
|
||||||
if verbose > 0:
|
if verbose > 0:
|
||||||
print(f"Loaded embeddings from {embeddings_file}")
|
print(f"Loaded embeddings from {embeddings_file}")
|
||||||
@@ -152,7 +152,7 @@ def setup(input_files, input_filter, compressed_jsonl, embeddings, regenerate=Fa
|
|||||||
bi_encoder, cross_encoder, top_k = initialize_model()
|
bi_encoder, cross_encoder, top_k = initialize_model()
|
||||||
|
|
||||||
# Map notes in Org-Mode files to (compressed) JSONL formatted file
|
# Map notes in Org-Mode files to (compressed) JSONL formatted file
|
||||||
if not compressed_jsonl.exists() or regenerate:
|
if not resolve_absolute_path(compressed_jsonl).exists() or regenerate:
|
||||||
org_to_jsonl(input_files, input_filter, compressed_jsonl, verbose)
|
org_to_jsonl(input_files, input_filter, compressed_jsonl, verbose)
|
||||||
|
|
||||||
# Extract Entries
|
# Extract Entries
|
||||||
|
|||||||
@@ -32,7 +32,7 @@ def compute_embeddings(image_names, model, embeddings_file, regenerate=False, ve
|
|||||||
image_embeddings = None
|
image_embeddings = None
|
||||||
|
|
||||||
# Load pre-computed embeddings from file if exists
|
# Load pre-computed embeddings from file if exists
|
||||||
if embeddings_file.exists() and not regenerate:
|
if resolve_absolute_path(embeddings_file).exists() and not regenerate:
|
||||||
image_embeddings = torch.load(embeddings_file)
|
image_embeddings = torch.load(embeddings_file)
|
||||||
if verbose:
|
if verbose:
|
||||||
print(f"Loaded pre-computed embeddings from {embeddings_file}")
|
print(f"Loaded pre-computed embeddings from {embeddings_file}")
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ import torch
|
|||||||
from sentence_transformers import SentenceTransformer, CrossEncoder, util
|
from sentence_transformers import SentenceTransformer, CrossEncoder, util
|
||||||
|
|
||||||
# Internal Packages
|
# Internal Packages
|
||||||
from utils.helpers import get_absolute_path
|
from utils.helpers import get_absolute_path, resolve_absolute_path
|
||||||
from processor.ledger.beancount_to_jsonl import beancount_to_jsonl
|
from processor.ledger.beancount_to_jsonl import beancount_to_jsonl
|
||||||
|
|
||||||
|
|
||||||
@@ -44,7 +44,7 @@ def extract_entries(notesfile, verbose=0):
|
|||||||
def compute_embeddings(entries, bi_encoder, embeddings_file, regenerate=False, verbose=0):
|
def compute_embeddings(entries, bi_encoder, embeddings_file, regenerate=False, verbose=0):
|
||||||
"Compute (and Save) Embeddings or Load Pre-Computed Embeddings"
|
"Compute (and Save) Embeddings or Load Pre-Computed Embeddings"
|
||||||
# Load pre-computed embeddings from file if exists
|
# Load pre-computed embeddings from file if exists
|
||||||
if embeddings_file.exists() and not regenerate:
|
if resolve_absolute_path(embeddings_file).exists() and not regenerate:
|
||||||
corpus_embeddings = torch.load(get_absolute_path(embeddings_file))
|
corpus_embeddings = torch.load(get_absolute_path(embeddings_file))
|
||||||
if verbose > 0:
|
if verbose > 0:
|
||||||
print(f"Loaded embeddings from {embeddings_file}")
|
print(f"Loaded embeddings from {embeddings_file}")
|
||||||
@@ -146,7 +146,7 @@ def setup(input_files, input_filter, compressed_jsonl, embeddings, regenerate=Fa
|
|||||||
bi_encoder, cross_encoder, top_k = initialize_model()
|
bi_encoder, cross_encoder, top_k = initialize_model()
|
||||||
|
|
||||||
# Map notes in Org-Mode files to (compressed) JSONL formatted file
|
# Map notes in Org-Mode files to (compressed) JSONL formatted file
|
||||||
if not compressed_jsonl.exists() or regenerate:
|
if not resolve_absolute_path(compressed_jsonl).exists() or regenerate:
|
||||||
beancount_to_jsonl(input_files, input_filter, compressed_jsonl, verbose)
|
beancount_to_jsonl(input_files, input_filter, compressed_jsonl, verbose)
|
||||||
|
|
||||||
# Extract Entries
|
# Extract Entries
|
||||||
|
|||||||
Reference in New Issue
Block a user