Automate updating embeddings, search index on a hourly schedule

- Use the schedule pypi package
- Use QTimer to poll schedule.run_pending() regularly for jobs to run
This commit is contained in:
Debanjum Singh Solanky
2023-01-01 16:22:35 -03:00
parent a58c243bc0
commit 3b0783aab9
4 changed files with 15 additions and 2 deletions

View File

@@ -41,6 +41,7 @@ setup(
"dateparser == 1.1.1", "dateparser == 1.1.1",
"pyqt6 == 6.3.1", "pyqt6 == 6.3.1",
"defusedxml == 0.7.1", "defusedxml == 0.7.1",
'schedule == 1.1.0',
], ],
include_package_data=True, include_package_data=True,
entry_points={"console_scripts": ["khoj = src.main:run"]}, entry_points={"console_scripts": ["khoj = src.main:run"]},

View File

@@ -3,6 +3,9 @@ import sys
import logging import logging
import json import json
# External Packages
import schedule
# Internal Packages # Internal Packages
from src.processor.ledger.beancount_to_jsonl import BeancountToJsonl from src.processor.ledger.beancount_to_jsonl import BeancountToJsonl
from src.processor.markdown.markdown_to_jsonl import MarkdownToJsonl from src.processor.markdown.markdown_to_jsonl import MarkdownToJsonl
@@ -37,6 +40,12 @@ def configure_server(args, required=False):
state.processor_config = configure_processor(args.config.processor) state.processor_config = configure_processor(args.config.processor)
@schedule.repeat(schedule.every(1).hour)
def update_search_index():
state.model = configure_search(state.model, state.config, regenerate=False)
logger.info("Search Index updated via Scheduler")
def configure_search(model: SearchModels, config: FullConfig, regenerate: bool, t: SearchType = None): def configure_search(model: SearchModels, config: FullConfig, regenerate: bool, t: SearchType = None):
# Initialize Org Notes Search # Initialize Org Notes Search
if (t == SearchType.Org or t == None) and config.content_type.org: if (t == SearchType.Org or t == None) and config.content_type.org:

View File

@@ -16,6 +16,7 @@ from fastapi import FastAPI
from fastapi.staticfiles import StaticFiles from fastapi.staticfiles import StaticFiles
from PyQt6 import QtWidgets from PyQt6 import QtWidgets
from PyQt6.QtCore import QThread, QTimer from PyQt6.QtCore import QThread, QTimer
import schedule
# Internal Packages # Internal Packages
from src.configure import configure_server from src.configure import configure_server
@@ -99,10 +100,10 @@ def run():
# Setup Signal Handlers # Setup Signal Handlers
signal.signal(signal.SIGINT, sigint_handler) signal.signal(signal.SIGINT, sigint_handler)
# Invoke python Interpreter every 500ms to handle signals # Invoke Python interpreter every 500ms to handle signals, run scheduled tasks
timer = QTimer() timer = QTimer()
timer.start(500) timer.start(500)
timer.timeout.connect(lambda: None) timer.timeout.connect(schedule.run_pending)
# Start Application # Start Application
server.start() server.start()

View File

@@ -126,4 +126,6 @@ def search(q: str, n: Optional[int] = 5, t: Optional[SearchType] = None, r: Opti
@api.get('/update') @api.get('/update')
def update(t: Optional[SearchType] = None, force: Optional[bool] = False): def update(t: Optional[SearchType] = None, force: Optional[bool] = False):
state.model = configure_search(state.model, state.config, regenerate=force, t=t) state.model = configure_search(state.model, state.config, regenerate=force, t=t)
logger.info("Search Index updated via API call")
return {'status': 'ok', 'message': 'index updated'} return {'status': 'ok', 'message': 'index updated'}