mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-04 21:29:12 +00:00
Merge branch 'master' into support-incremental-updates-of-embeddings
This commit is contained in:
@@ -92,7 +92,7 @@ class MainWindow(QtWidgets.QMainWindow):
|
||||
search_type_layout = QtWidgets.QVBoxLayout(search_type_settings)
|
||||
enable_search_type = SearchCheckBox(f"Search {search_type.name}", search_type)
|
||||
# Add file browser to set input files for given search type
|
||||
input_files = FileBrowser(file_input_text, search_type, current_content_files)
|
||||
input_files = FileBrowser(file_input_text, search_type, current_content_files or [])
|
||||
|
||||
# Set enabled/disabled based on checkbox state
|
||||
enable_search_type.setChecked(current_content_files is not None and len(current_content_files) > 0)
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
;; Author: Debanjum Singh Solanky <debanjum@gmail.com>
|
||||
;; Description: Natural, Incremental Search for your Second Brain
|
||||
;; Keywords: search, org-mode, outlines, markdown, beancount, ledger, image
|
||||
;; Version: 0.1.6
|
||||
;; Version: 0.1.9
|
||||
;; Package-Requires: ((emacs "27.1"))
|
||||
;; URL: http://github.com/debanjum/khoj/interface/emacs
|
||||
|
||||
|
||||
@@ -3,8 +3,12 @@ import os
|
||||
import signal
|
||||
import sys
|
||||
import logging
|
||||
import warnings
|
||||
from platform import system
|
||||
|
||||
# Ignore non-actionable warnings
|
||||
warnings.filterwarnings("ignore", message=r'snapshot_download.py has been made private', category=FutureWarning)
|
||||
|
||||
# External Packages
|
||||
import uvicorn
|
||||
from fastapi import FastAPI
|
||||
@@ -63,6 +67,9 @@ def run():
|
||||
args = cli(state.cli_args)
|
||||
set_state(args)
|
||||
|
||||
# Create app directory, if it doesn't exist
|
||||
state.config_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Setup Logger
|
||||
if args.verbose == 0:
|
||||
logger.setLevel(logging.WARN)
|
||||
|
||||
@@ -41,8 +41,13 @@ from os.path import relpath
|
||||
indent_regex = re.compile(r'^\s*')
|
||||
|
||||
def normalize_filename(filename):
|
||||
file_relative_to_home = f'~/{relpath(filename, start=Path.home())}'
|
||||
escaped_filename = f'{file_relative_to_home}'.replace("[","\[").replace("]","\]")
|
||||
"Normalize and escape filename for rendering"
|
||||
if not Path(filename).is_absolute():
|
||||
# Normalize relative filename to be relative to current directory
|
||||
normalized_filename = f'~/{relpath(filename, start=Path.home())}'
|
||||
else:
|
||||
normalized_filename = filename
|
||||
escaped_filename = f'{normalized_filename}'.replace("[","\[").replace("]","\]")
|
||||
return escaped_filename
|
||||
|
||||
def makelist(filename):
|
||||
@@ -61,7 +66,7 @@ def makelist(filename):
|
||||
|
||||
todos = { "TODO": "", "WAITING": "", "ACTIVE": "",
|
||||
"DONE": "", "CANCELLED": "", "FAILED": ""} # populated from #+SEQ_TODO line
|
||||
level = 0
|
||||
level = ""
|
||||
heading = ""
|
||||
bodytext = ""
|
||||
tags = list() # set of all tags in headline
|
||||
@@ -73,6 +78,7 @@ def makelist(filename):
|
||||
propdict = dict()
|
||||
in_properties_drawer = False
|
||||
in_logbook_drawer = False
|
||||
file_title = f'{filename}'
|
||||
|
||||
for line in f:
|
||||
ctr += 1
|
||||
@@ -111,6 +117,16 @@ def makelist(filename):
|
||||
kwlist = re.findall(r'([A-Z]+)\(', line)
|
||||
for kw in kwlist: todos[kw] = ""
|
||||
|
||||
# Set file title to TITLE property, if it exists
|
||||
title_search = re.search(r'^#\+TITLE:\s*(.*)$', line)
|
||||
if title_search and title_search.group(1).strip() != '':
|
||||
title_text = title_search.group(1).strip()
|
||||
if file_title == f'{filename}':
|
||||
file_title = title_text
|
||||
else:
|
||||
file_title += f' {title_text}'
|
||||
continue
|
||||
|
||||
# Ignore Properties Drawers Completely
|
||||
if re.search(':PROPERTIES:', line):
|
||||
in_properties_drawer=True
|
||||
@@ -167,7 +183,7 @@ def makelist(filename):
|
||||
bodytext = bodytext + line
|
||||
|
||||
# write out last node
|
||||
thisNode = Orgnode(level, heading, bodytext, tags)
|
||||
thisNode = Orgnode(level, heading or file_title, bodytext, tags)
|
||||
thisNode.setProperties(propdict)
|
||||
if sched_date:
|
||||
thisNode.setScheduled(sched_date)
|
||||
@@ -196,8 +212,12 @@ def makelist(filename):
|
||||
n.setHeading(prtysrch.group(2))
|
||||
|
||||
# Set SOURCE property to a file+heading based org-mode link to the entry
|
||||
escaped_heading = n.Heading().replace("[","\\[").replace("]","\\]")
|
||||
n.properties['SOURCE'] = f'[[file:{normalize_filename(filename)}::*{escaped_heading}]]'
|
||||
if n.Level() == 0:
|
||||
n.properties['LINE'] = f'file:{normalize_filename(filename)}::0'
|
||||
n.properties['SOURCE'] = f'[[file:{normalize_filename(filename)}]]'
|
||||
else:
|
||||
escaped_heading = n.Heading().replace("[","\\[").replace("]","\\]")
|
||||
n.properties['SOURCE'] = f'[[file:{normalize_filename(filename)}::*{escaped_heading}]]'
|
||||
|
||||
return nodelist
|
||||
|
||||
|
||||
@@ -27,7 +27,7 @@ class WordFilter(BaseFilter):
|
||||
|
||||
def load(self, entries, regenerate=False):
|
||||
start = time.time()
|
||||
self.cache = {} # Clear cache on reload of filter
|
||||
self.cache = {} # Clear cache on filter (re-)load
|
||||
entry_splitter = r',|\.| |\]|\[\(|\)|\{|\}|\<|\>|\t|\n|\:|\;|\?|\!|\(|\)|\&|\^|\$|\@|\%|\+|\=|\/|\\|\||\~|\`|\"|\''
|
||||
# Create map of words to entries they exist in
|
||||
for entry_index, entry in enumerate(entries):
|
||||
|
||||
@@ -11,7 +11,7 @@ from src.search_filter.base_filter import BaseFilter
|
||||
|
||||
# Internal Packages
|
||||
from src.utils import state
|
||||
from src.utils.helpers import get_absolute_path, resolve_absolute_path, load_model
|
||||
from src.utils.helpers import get_absolute_path, is_none_or_empty, resolve_absolute_path, load_model
|
||||
from src.utils.config import TextSearchModel
|
||||
from src.utils.rawconfig import TextSearchConfig, TextContentConfig
|
||||
from src.utils.jsonl import load_jsonl
|
||||
@@ -187,6 +187,8 @@ def setup(text_to_jsonl, config: TextContentConfig, search_config: TextSearchCon
|
||||
|
||||
# Extract Updated Entries
|
||||
entries = extract_entries(config.compressed_jsonl)
|
||||
if is_none_or_empty(entries):
|
||||
raise ValueError(f"No valid entries found in specified files: {config.input_files} or {config.input_filter}")
|
||||
top_k = min(len(entries), top_k) # top_k hits can't be more than the total entries in corpus
|
||||
|
||||
# Compute or Load Embeddings
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
# Standard Packages
|
||||
import argparse
|
||||
import pathlib
|
||||
from importlib.metadata import version
|
||||
|
||||
# Internal Packages
|
||||
from src.utils.helpers import resolve_absolute_path
|
||||
@@ -17,9 +18,15 @@ def cli(args=None):
|
||||
parser.add_argument('--host', type=str, default='127.0.0.1', help="Host address of the server. Default: 127.0.0.1")
|
||||
parser.add_argument('--port', '-p', type=int, default=8000, help="Port of the server. Default: 8000")
|
||||
parser.add_argument('--socket', type=pathlib.Path, help="Path to UNIX socket for server. Use to run server behind reverse proxy. Default: /tmp/uvicorn.sock")
|
||||
parser.add_argument('--version', '-V', action='store_true', help="Print the installed Khoj version and exit")
|
||||
|
||||
args = parser.parse_args(args)
|
||||
|
||||
if args.version:
|
||||
# Show version of khoj installed and exit
|
||||
print(version('khoj-assistant'))
|
||||
exit(0)
|
||||
|
||||
# Normalize config_file path to absolute path
|
||||
args.config_file = resolve_absolute_path(args.config_file)
|
||||
|
||||
|
||||
@@ -44,7 +44,7 @@ def dump_jsonl(jsonl_data, output_path):
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
f.write(jsonl_data)
|
||||
|
||||
logger.info(f'Wrote {len(jsonl_data)} lines to jsonl at {output_path}')
|
||||
logger.info(f'Wrote jsonl data to {output_path}')
|
||||
|
||||
|
||||
def compress_jsonl_data(jsonl_data, output_path):
|
||||
|
||||
Reference in New Issue
Block a user