From a08b15235851432fbf1aa0f6e31f18b7a8ac7538 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Mon, 6 Nov 2023 19:26:54 -0800 Subject: [PATCH 01/26] Improve log messages in text_entries and memory leak unit test --- src/khoj/processor/text_to_entries.py | 2 +- tests/test_helpers.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/khoj/processor/text_to_entries.py b/src/khoj/processor/text_to_entries.py index 501ef5d3..4661fd9b 100644 --- a/src/khoj/processor/text_to_entries.py +++ b/src/khoj/processor/text_to_entries.py @@ -93,7 +93,7 @@ class TextToEntries(ABC): num_deleted_entries = 0 if regenerate: - with timer("Prepared dataset for regeneration in", logger): + with timer("Cleared existing dataset for regeneration in", logger): logger.debug(f"Deleting all entries for file type {file_type}") num_deleted_entries = EntryAdapters.delete_all_entries(user, file_type) diff --git a/tests/test_helpers.py b/tests/test_helpers.py index 30499049..fdd29b02 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -64,6 +64,7 @@ def test_encode_docs_memory_leak(): batch_size = 20 embeddings_model = EmbeddingsModel() memory_usage_trend = [] + device = f"{helpers.get_device()}".upper() # Act # Encode random strings repeatedly and record memory usage trend @@ -76,8 +77,9 @@ def test_encode_docs_memory_leak(): # Calculate slope of line fitting memory usage history memory_usage_trend = np.array(memory_usage_trend) slope, _, _, _, _ = linregress(np.arange(len(memory_usage_trend)), memory_usage_trend) + print(f"Memory usage increased at ~{slope:.2f} MB per iteration on {device}") # Assert # If slope is positive memory utilization is increasing # Positive threshold of 2, from observing memory usage trend on MPS vs CPU device - assert slope < 2, f"Memory usage increasing at ~{slope:.2f} MB per iteration" + assert slope < 2, f"Memory leak suspected on {device}. Memory usage increased at ~{slope:.2f} MB per iteration" From 97cf8339aa7c61d3b57801f999ae11742367ab50 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Mon, 6 Nov 2023 21:57:37 -0800 Subject: [PATCH 02/26] Rename Sync button, Force Sync toggle to Save, Save All buttons --- src/interface/desktop/config.html | 11 +++++------ src/interface/desktop/renderer.js | 10 +++++++--- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/interface/desktop/config.html b/src/interface/desktop/config.html index 0629a5f7..3f8e19d9 100644 --- a/src/interface/desktop/config.html +++ b/src/interface/desktop/config.html @@ -91,11 +91,10 @@
- +
-
- - +
+
@@ -336,7 +335,7 @@ padding: 4px; cursor: pointer; } - #sync-data { + button.sync-data { background-color: var(--primary); border: none; color: var(--main-text-color); @@ -351,7 +350,7 @@ box-shadow: 0px 5px 0px var(--background-color); } - #sync-data:hover { + button.sync-data:hover { background-color: var(--primary-hover); box-shadow: 0px 3px 0px var(--background-color); } diff --git a/src/interface/desktop/renderer.js b/src/interface/desktop/renderer.js index 1e1fae32..26765bf0 100644 --- a/src/interface/desktop/renderer.js +++ b/src/interface/desktop/renderer.js @@ -196,9 +196,13 @@ khojKeyInput.addEventListener('blur', async () => { }); const syncButton = document.getElementById('sync-data'); -const syncForceToggle = document.getElementById('sync-force'); syncButton.addEventListener('click', async () => { loadingBar.style.display = 'block'; - const regenerate = syncForceToggle.checked; - await window.syncDataAPI.syncData(regenerate); + await window.syncDataAPI.syncData(false); +}); + +const syncForceButton = document.getElementById('sync-force'); +syncForceButton.addEventListener('click', async () => { + loadingBar.style.display = 'block'; + await window.syncDataAPI.syncData(true); }); From 9f47fc8e34d2bac840f6677d45b5856d38df3b80 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Mon, 6 Nov 2023 21:58:33 -0800 Subject: [PATCH 03/26] Upgrade langchain version since adding support for OCR-ing PDFs --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e87d205e..f6080ce6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,7 +55,7 @@ dependencies = [ "torch == 2.0.1", "uvicorn == 0.17.6", "aiohttp == 3.8.5", - "langchain >= 0.0.187", + "langchain >= 0.0.331", "requests >= 2.26.0", "bs4 >= 0.0.1", "anyio == 3.7.1", From c82cd0862aa79edf92a19c708bef8589f9ab1a18 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Mon, 6 Nov 2023 23:11:22 -0800 Subject: [PATCH 04/26] Delete deprecated content config pages for local files from web client The desktop app now manages syncing local computer files to index The server only manages "cloud" data source like github and notion. --- .../interface/web/content_type_input.html | 159 ------------------ src/khoj/routers/web_client.py | 47 ------ 2 files changed, 206 deletions(-) delete mode 100644 src/khoj/interface/web/content_type_input.html diff --git a/src/khoj/interface/web/content_type_input.html b/src/khoj/interface/web/content_type_input.html deleted file mode 100644 index f8751ddc..00000000 --- a/src/khoj/interface/web/content_type_input.html +++ /dev/null @@ -1,159 +0,0 @@ -{% extends "base_config.html" %} -{% block content %} -
-
-

- {{ content_type|capitalize }} - {{ content_type|capitalize }} -

-
- - - - - - - - - - - -
- - - {% if current_config['input_files'] is none %} - - {% else %} - {% for input_file in current_config['input_files'] %} - - {% endfor %} - {% endif %} - - -
- - - {% if current_config['input_filter'] is none %} - - {% else %} - {% for input_filter in current_config['input_filter'] %} - - {% endfor %} - {% endif %} - - -
-
- - -
-
-
-
- -{% endblock %} diff --git a/src/khoj/routers/web_client.py b/src/khoj/routers/web_client.py index 35603e18..65292ccf 100644 --- a/src/khoj/routers/web_client.py +++ b/src/khoj/routers/web_client.py @@ -9,7 +9,6 @@ from fastapi.responses import HTMLResponse, FileResponse, RedirectResponse from fastapi.templating import Jinja2Templates from starlette.authentication import requires from khoj.utils.rawconfig import ( - TextContentConfig, GithubContentConfig, GithubRepoConfig, NotionContentConfig, @@ -18,14 +17,11 @@ from khoj.utils.rawconfig import ( # Internal Packages from khoj.utils import constants, state from database.adapters import EntryAdapters, get_user_github_config, get_user_notion_config, ConversationAdapters -from database.models import LocalOrgConfig, LocalMarkdownConfig, LocalPdfConfig, LocalPlaintextConfig # Initialize Router web_client = APIRouter() templates = Jinja2Templates(directory=constants.web_directory) -VALID_TEXT_CONTENT_TYPES = ["org", "markdown", "pdf", "plaintext"] - # Create Routes @web_client.get("/", response_class=FileResponse) @@ -109,17 +105,6 @@ def login_page(request: Request): ) -def map_config_to_object(content_type: str): - if content_type == "org": - return LocalOrgConfig - if content_type == "markdown": - return LocalMarkdownConfig - if content_type == "pdf": - return LocalPdfConfig - if content_type == "plaintext": - return LocalPlaintextConfig - - @web_client.get("/config", response_class=HTMLResponse) @requires(["authenticated"], redirect="login_page") def config_page(request: Request): @@ -224,35 +209,3 @@ def notion_config_page(request: Request): "user_photo": user_picture, }, ) - - -@web_client.get("/config/content_type/{content_type}", response_class=HTMLResponse) -@requires(["authenticated"], redirect="login_page") -def content_config_page(request: Request, content_type: str): - if content_type not in VALID_TEXT_CONTENT_TYPES: - return templates.TemplateResponse("config.html", context={"request": request}) - - object = map_config_to_object(content_type) - user = request.user.object - user_picture = request.session.get("user", {}).get("picture") - config = object.objects.filter(user=user).first() - if config == None: - config = object.objects.create(user=user) - - current_config = TextContentConfig( - input_files=config.input_files, - input_filter=config.input_filter, - index_heading_entries=config.index_heading_entries, - ) - current_config = json.loads(current_config.json()) - - return templates.TemplateResponse( - "content_type_input.html", - context={ - "request": request, - "current_config": current_config, - "content_type": content_type, - "username": user.username, - "user_photo": user_picture, - }, - ) From 9ab327a2b6b30891896786c6f53408479dfa243c Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Mon, 6 Nov 2023 23:49:08 -0800 Subject: [PATCH 05/26] Store the data source of each entry in database This will be useful for updating, deleting entries by their data source. Data source can be one of Computer, Github or Notion for now Store each file/entries source in database --- src/database/adapters/__init__.py | 22 ++++++++++++++++--- .../migrations/0012_entry_file_source.py | 21 ++++++++++++++++++ src/database/models/__init__.py | 6 +++++ .../processor/github/github_to_entries.py | 7 +++++- .../processor/markdown/markdown_to_entries.py | 1 + .../processor/notion/notion_to_entries.py | 7 +++++- src/khoj/processor/org_mode/org_to_entries.py | 1 + src/khoj/processor/pdf/pdf_to_entries.py | 1 + .../plaintext/plaintext_to_entries.py | 1 + src/khoj/processor/text_to_entries.py | 4 +++- src/khoj/search_type/text_search.py | 9 ++++---- tests/test_text_search.py | 2 +- 12 files changed, 71 insertions(+), 11 deletions(-) create mode 100644 src/database/migrations/0012_entry_file_source.py diff --git a/src/database/adapters/__init__.py b/src/database/adapters/__init__.py index fa37aa99..69a3c1f4 100644 --- a/src/database/adapters/__init__.py +++ b/src/database/adapters/__init__.py @@ -287,13 +287,21 @@ class EntryAdapters: return deleted_count @staticmethod - def delete_all_entries(user: KhojUser, file_type: str = None): + def delete_all_entries_by_type(user: KhojUser, file_type: str = None): if file_type is None: deleted_count, _ = Entry.objects.filter(user=user).delete() else: deleted_count, _ = Entry.objects.filter(user=user, file_type=file_type).delete() return deleted_count + @staticmethod + def delete_all_entries_by_source(user: KhojUser, file_source: str = None): + if file_source is None: + deleted_count, _ = Entry.objects.filter(user=user).delete() + else: + deleted_count, _ = Entry.objects.filter(user=user, file_source=file_source).delete() + return deleted_count + @staticmethod def get_existing_entry_hashes_by_file(user: KhojUser, file_path: str): return Entry.objects.filter(user=user, file_path=file_path).values_list("hashed_value", flat=True) @@ -318,8 +326,12 @@ class EntryAdapters: return await Entry.objects.filter(user=user, file_path=file_path).adelete() @staticmethod - def aget_all_filenames(user: KhojUser): - return Entry.objects.filter(user=user).distinct("file_path").values_list("file_path", flat=True) + def aget_all_filenames_by_source(user: KhojUser, file_source: str): + return ( + Entry.objects.filter(user=user, file_source=file_source) + .distinct("file_path") + .values_list("file_path", flat=True) + ) @staticmethod async def adelete_all_entries(user: KhojUser): @@ -384,3 +396,7 @@ class EntryAdapters: @staticmethod def get_unique_file_types(user: KhojUser): return Entry.objects.filter(user=user).values_list("file_type", flat=True).distinct() + + @staticmethod + def get_unique_file_source(user: KhojUser): + return Entry.objects.filter(user=user).values_list("file_source", flat=True).distinct() diff --git a/src/database/migrations/0012_entry_file_source.py b/src/database/migrations/0012_entry_file_source.py new file mode 100644 index 00000000..187136ae --- /dev/null +++ b/src/database/migrations/0012_entry_file_source.py @@ -0,0 +1,21 @@ +# Generated by Django 4.2.5 on 2023-11-07 07:24 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("database", "0011_merge_20231102_0138"), + ] + + operations = [ + migrations.AddField( + model_name="entry", + name="file_source", + field=models.CharField( + choices=[("computer", "Computer"), ("notion", "Notion"), ("github", "Github")], + default="computer", + max_length=30, + ), + ), + ] diff --git a/src/database/models/__init__.py b/src/database/models/__init__.py index 5dd9622b..b1be9ded 100644 --- a/src/database/models/__init__.py +++ b/src/database/models/__init__.py @@ -131,11 +131,17 @@ class Entry(BaseModel): GITHUB = "github" CONVERSATION = "conversation" + class EntrySource(models.TextChoices): + COMPUTER = "computer" + NOTION = "notion" + GITHUB = "github" + user = models.ForeignKey(KhojUser, on_delete=models.CASCADE, default=None, null=True, blank=True) embeddings = VectorField(dimensions=384) raw = models.TextField() compiled = models.TextField() heading = models.CharField(max_length=1000, default=None, null=True, blank=True) + file_source = models.CharField(max_length=30, choices=EntrySource.choices, default=EntrySource.COMPUTER) file_type = models.CharField(max_length=30, choices=EntryType.choices, default=EntryType.PLAINTEXT) file_path = models.CharField(max_length=400, default=None, null=True, blank=True) file_name = models.CharField(max_length=400, default=None, null=True, blank=True) diff --git a/src/khoj/processor/github/github_to_entries.py b/src/khoj/processor/github/github_to_entries.py index 14e9b696..56279453 100644 --- a/src/khoj/processor/github/github_to_entries.py +++ b/src/khoj/processor/github/github_to_entries.py @@ -104,7 +104,12 @@ class GithubToEntries(TextToEntries): # Identify, mark and merge any new entries with previous entries with timer("Identify new or updated entries", logger): num_new_embeddings, num_deleted_embeddings = self.update_embeddings( - current_entries, DbEntry.EntryType.GITHUB, key="compiled", logger=logger, user=user + current_entries, + DbEntry.EntryType.GITHUB, + DbEntry.EntrySource.GITHUB, + key="compiled", + logger=logger, + user=user, ) return num_new_embeddings, num_deleted_embeddings diff --git a/src/khoj/processor/markdown/markdown_to_entries.py b/src/khoj/processor/markdown/markdown_to_entries.py index e0b76368..0dd71740 100644 --- a/src/khoj/processor/markdown/markdown_to_entries.py +++ b/src/khoj/processor/markdown/markdown_to_entries.py @@ -47,6 +47,7 @@ class MarkdownToEntries(TextToEntries): num_new_embeddings, num_deleted_embeddings = self.update_embeddings( current_entries, DbEntry.EntryType.MARKDOWN, + DbEntry.EntrySource.COMPUTER, "compiled", logger, deletion_file_names, diff --git a/src/khoj/processor/notion/notion_to_entries.py b/src/khoj/processor/notion/notion_to_entries.py index a4b15d4e..7a88e2a1 100644 --- a/src/khoj/processor/notion/notion_to_entries.py +++ b/src/khoj/processor/notion/notion_to_entries.py @@ -250,7 +250,12 @@ class NotionToEntries(TextToEntries): # Identify, mark and merge any new entries with previous entries with timer("Identify new or updated entries", logger): num_new_embeddings, num_deleted_embeddings = self.update_embeddings( - current_entries, DbEntry.EntryType.NOTION, key="compiled", logger=logger, user=user + current_entries, + DbEntry.EntryType.NOTION, + DbEntry.EntrySource.NOTION, + key="compiled", + logger=logger, + user=user, ) return num_new_embeddings, num_deleted_embeddings diff --git a/src/khoj/processor/org_mode/org_to_entries.py b/src/khoj/processor/org_mode/org_to_entries.py index bf6df6dc..04ce97e4 100644 --- a/src/khoj/processor/org_mode/org_to_entries.py +++ b/src/khoj/processor/org_mode/org_to_entries.py @@ -48,6 +48,7 @@ class OrgToEntries(TextToEntries): num_new_embeddings, num_deleted_embeddings = self.update_embeddings( current_entries, DbEntry.EntryType.ORG, + DbEntry.EntrySource.COMPUTER, "compiled", logger, deletion_file_names, diff --git a/src/khoj/processor/pdf/pdf_to_entries.py b/src/khoj/processor/pdf/pdf_to_entries.py index 81c2250f..3a47096a 100644 --- a/src/khoj/processor/pdf/pdf_to_entries.py +++ b/src/khoj/processor/pdf/pdf_to_entries.py @@ -46,6 +46,7 @@ class PdfToEntries(TextToEntries): num_new_embeddings, num_deleted_embeddings = self.update_embeddings( current_entries, DbEntry.EntryType.PDF, + DbEntry.EntrySource.COMPUTER, "compiled", logger, deletion_file_names, diff --git a/src/khoj/processor/plaintext/plaintext_to_entries.py b/src/khoj/processor/plaintext/plaintext_to_entries.py index fd5e1de2..d42dae30 100644 --- a/src/khoj/processor/plaintext/plaintext_to_entries.py +++ b/src/khoj/processor/plaintext/plaintext_to_entries.py @@ -56,6 +56,7 @@ class PlaintextToEntries(TextToEntries): num_new_embeddings, num_deleted_embeddings = self.update_embeddings( current_entries, DbEntry.EntryType.PLAINTEXT, + DbEntry.EntrySource.COMPUTER, key="compiled", logger=logger, deletion_filenames=deletion_file_names, diff --git a/src/khoj/processor/text_to_entries.py b/src/khoj/processor/text_to_entries.py index 4661fd9b..3d79e02e 100644 --- a/src/khoj/processor/text_to_entries.py +++ b/src/khoj/processor/text_to_entries.py @@ -78,6 +78,7 @@ class TextToEntries(ABC): self, current_entries: List[Entry], file_type: str, + file_source: str, key="compiled", logger: logging.Logger = None, deletion_filenames: Set[str] = None, @@ -95,7 +96,7 @@ class TextToEntries(ABC): if regenerate: with timer("Cleared existing dataset for regeneration in", logger): logger.debug(f"Deleting all entries for file type {file_type}") - num_deleted_entries = EntryAdapters.delete_all_entries(user, file_type) + num_deleted_entries = EntryAdapters.delete_all_entries_by_type(user, file_type) hashes_to_process = set() with timer("Identified entries to add to database in", logger): @@ -132,6 +133,7 @@ class TextToEntries(ABC): compiled=entry.compiled, heading=entry.heading[:1000], # Truncate to max chars of field allowed file_path=entry.file, + file_source=file_source, file_type=file_type, hashed_value=entry_hash, corpus_id=entry.corpus_id, diff --git a/src/khoj/search_type/text_search.py b/src/khoj/search_type/text_search.py index 14f5b770..ba2fc9ec 100644 --- a/src/khoj/search_type/text_search.py +++ b/src/khoj/search_type/text_search.py @@ -204,11 +204,12 @@ def setup( files=files, full_corpus=full_corpus, user=user, regenerate=regenerate ) - file_names = [file_name for file_name in files] + if files: + file_names = [file_name for file_name in files] - logger.info( - f"Deleted {num_deleted_embeddings} entries. Created {num_new_embeddings} new entries for user {user} from files {file_names}" - ) + logger.info( + f"Deleted {num_deleted_embeddings} entries. Created {num_new_embeddings} new entries for user {user} from files {file_names}" + ) def cross_encoder_score(query: str, hits: List[SearchResponse]) -> List[SearchResponse]: diff --git a/tests/test_text_search.py b/tests/test_text_search.py index 7d8c30fb..3d729ab5 100644 --- a/tests/test_text_search.py +++ b/tests/test_text_search.py @@ -58,7 +58,7 @@ def test_get_org_files_with_org_suffixed_dir_doesnt_raise_error(tmp_path, defaul # ---------------------------------------------------------------------------------------------------- @pytest.mark.django_db -def test_text_search_setup_with_empty_file_raises_error( +def test_text_search_setup_with_empty_file_creates_no_entries( org_config_with_only_new_file: LocalOrgConfig, default_user: KhojUser, caplog ): # Arrange From d527b644f4a36f607f46b7ef56c68ffc2a3903db Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Mon, 6 Nov 2023 23:51:43 -0800 Subject: [PATCH 06/26] Update content by source via API. Make web client use this API for config --- docs/github_integration.md | 2 +- docs/notion_integration.md | 2 +- src/khoj/interface/web/config.html | 15 +++++----- ....html => content_source_github_input.html} | 2 +- ....html => content_source_notion_input.html} | 2 +- src/khoj/routers/api.py | 30 ++++++++++--------- src/khoj/routers/web_client.py | 8 ++--- 7 files changed, 32 insertions(+), 29 deletions(-) rename src/khoj/interface/web/{content_type_github_input.html => content_source_github_input.html} (99%) rename src/khoj/interface/web/{content_type_notion_input.html => content_source_notion_input.html} (97%) diff --git a/docs/github_integration.md b/docs/github_integration.md index 6b8dce48..413dd41e 100644 --- a/docs/github_integration.md +++ b/docs/github_integration.md @@ -9,6 +9,6 @@ The Github integration allows you to index as many repositories as you want. It' ## Use the Github plugin 1. Generate a [classic PAT (personal access token)](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens) from [Github](https://github.com/settings/tokens) with `repo` and `admin:org` scopes at least. -2. Navigate to [http://localhost:42110/config/content_type/github](http://localhost:42110/config/content_type/github) to configure your Github settings. Enter in your PAT, along with details for each repository you want to index. +2. Navigate to [http://localhost:42110/config/content-source/github](http://localhost:42110/config/content-source/github) to configure your Github settings. Enter in your PAT, along with details for each repository you want to index. 3. Click `Save`. Go back to the settings page and click `Configure`. 4. Go to [http://localhost:42110/](http://localhost:42110/) and start searching! diff --git a/docs/notion_integration.md b/docs/notion_integration.md index 5fee7ff6..d3b645ca 100644 --- a/docs/notion_integration.md +++ b/docs/notion_integration.md @@ -8,7 +8,7 @@ We haven't setup a fancy integration with OAuth yet, so this integration still r ![setup_new_integration](https://github.com/khoj-ai/khoj/assets/65192171/b056e057-d4dc-47dc-aad3-57b59a22c68b) 3. Share all the workspaces that you want to integrate with the Khoj integration you just made in the previous step ![enable_workspace](https://github.com/khoj-ai/khoj/assets/65192171/98290303-b5b8-4cb0-b32c-f68c6923a3d0) -4. In the first step, you generated an API key. Use the newly generated API Key in your Khoj settings, by default at http://localhost:42110/config/content_type/notion. Click `Save`. +4. In the first step, you generated an API key. Use the newly generated API Key in your Khoj settings, by default at http://localhost:42110/config/content-source/notion. Click `Save`. 5. Click `Configure` in http://localhost:42110/config to index your Notion workspace(s). That's it! You should be ready to start searching and chatting. Make sure you've configured your OpenAI API Key for chat. diff --git a/src/khoj/interface/web/config.html b/src/khoj/interface/web/config.html index b19bbff6..4e77a8ef 100644 --- a/src/khoj/interface/web/config.html +++ b/src/khoj/interface/web/config.html @@ -19,7 +19,7 @@

Set repositories to index

- + {% if current_model_state.content %} Update {% else %} @@ -176,8 +176,9 @@ }) }; - function clearContentType(content_type) { - fetch('/api/config/data/content_type/' + content_type, { + function clearContentType(content_source) { + + fetch('/api/config/data/content-source/' + content_source, { method: 'DELETE', headers: { 'Content-Type': 'application/json', @@ -186,15 +187,15 @@ .then(response => response.json()) .then(data => { if (data.status == "ok") { - var contentTypeClearButton = document.getElementById("clear-" + content_type); + var contentTypeClearButton = document.getElementById("clear-" + content_source); contentTypeClearButton.style.display = "none"; - var configuredIcon = document.getElementById("configured-icon-" + content_type); + var configuredIcon = document.getElementById("configured-icon-" + content_source); if (configuredIcon) { configuredIcon.style.display = "none"; } - var misconfiguredIcon = document.getElementById("misconfigured-icon-" + content_type); + var misconfiguredIcon = document.getElementById("misconfigured-icon-" + content_source); if (misconfiguredIcon) { misconfiguredIcon.style.display = "none"; } diff --git a/src/khoj/interface/web/content_type_github_input.html b/src/khoj/interface/web/content_source_github_input.html similarity index 99% rename from src/khoj/interface/web/content_type_github_input.html rename to src/khoj/interface/web/content_source_github_input.html index 0e41645a..ff82b1f2 100644 --- a/src/khoj/interface/web/content_type_github_input.html +++ b/src/khoj/interface/web/content_source_github_input.html @@ -125,7 +125,7 @@ } const csrfToken = document.cookie.split('; ').find(row => row.startsWith('csrftoken'))?.split('=')[1]; - fetch('/api/config/data/content_type/github', { + fetch('/api/config/data/content-source/github', { method: 'POST', headers: { 'Content-Type': 'application/json', diff --git a/src/khoj/interface/web/content_type_notion_input.html b/src/khoj/interface/web/content_source_notion_input.html similarity index 97% rename from src/khoj/interface/web/content_type_notion_input.html rename to src/khoj/interface/web/content_source_notion_input.html index 965c1ef5..18eb5a7f 100644 --- a/src/khoj/interface/web/content_type_notion_input.html +++ b/src/khoj/interface/web/content_source_notion_input.html @@ -42,7 +42,7 @@ } const csrfToken = document.cookie.split('; ').find(row => row.startsWith('csrftoken'))?.split('=')[1]; - fetch('/api/config/data/content_type/notion', { + fetch('/api/config/data/content-source/notion', { method: 'POST', headers: { 'Content-Type': 'application/json', diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py index 84e63b09..c2002048 100644 --- a/src/khoj/routers/api.py +++ b/src/khoj/routers/api.py @@ -61,11 +61,13 @@ api = APIRouter() logger = logging.getLogger(__name__) -def map_config_to_object(content_type: str): - if content_type == "github": +def map_config_to_object(content_source: str): + if content_source == "github": return GithubConfig - if content_type == "notion": + if content_source == "notion": return NotionConfig + if content_source == "computer": + return "Computer" async def map_config_to_db(config: FullConfig, user: KhojUser): @@ -164,7 +166,7 @@ async def set_config_data( return state.config -@api.post("/config/data/content_type/github", status_code=200) +@api.post("/config/data/content-source/github", status_code=200) @requires(["authenticated"]) async def set_content_config_github_data( request: Request, @@ -192,7 +194,7 @@ async def set_content_config_github_data( return {"status": "ok"} -@api.post("/config/data/content_type/notion", status_code=200) +@api.post("/config/data/content-source/notion", status_code=200) @requires(["authenticated"]) async def set_content_config_notion_data( request: Request, @@ -219,11 +221,11 @@ async def set_content_config_notion_data( return {"status": "ok"} -@api.delete("/config/data/content_type/{content_type}", status_code=200) +@api.delete("/config/data/content-source/{content_source}", status_code=200) @requires(["authenticated"]) -async def remove_content_config_data( +async def remove_content_source_data( request: Request, - content_type: str, + content_source: str, client: Optional[str] = None, ): user = request.user.object @@ -233,15 +235,15 @@ async def remove_content_config_data( telemetry_type="api", api="delete_content_config", client=client, - metadata={"content_type": content_type}, + metadata={"content_source": content_source}, ) - content_object = map_config_to_object(content_type) + content_object = map_config_to_object(content_source) if content_object is None: - raise ValueError(f"Invalid content type: {content_type}") - - await content_object.objects.filter(user=user).adelete() - await sync_to_async(EntryAdapters.delete_all_entries)(user, content_type) + raise ValueError(f"Invalid content source: {content_source}") + elif content_object != "Computer": + await content_object.objects.filter(user=user).adelete() + await sync_to_async(EntryAdapters.delete_all_entries_by_source)(user, content_source) enabled_content = await sync_to_async(EntryAdapters.get_unique_file_types)(user) return {"status": "ok"} diff --git a/src/khoj/routers/web_client.py b/src/khoj/routers/web_client.py index 65292ccf..8016cfce 100644 --- a/src/khoj/routers/web_client.py +++ b/src/khoj/routers/web_client.py @@ -150,7 +150,7 @@ def config_page(request: Request): ) -@web_client.get("/config/content_type/github", response_class=HTMLResponse) +@web_client.get("/config/content-source/github", response_class=HTMLResponse) @requires(["authenticated"], redirect="login_page") def github_config_page(request: Request): user = request.user.object @@ -177,7 +177,7 @@ def github_config_page(request: Request): current_config = {} # type: ignore return templates.TemplateResponse( - "content_type_github_input.html", + "content_source_github_input.html", context={ "request": request, "current_config": current_config, @@ -187,7 +187,7 @@ def github_config_page(request: Request): ) -@web_client.get("/config/content_type/notion", response_class=HTMLResponse) +@web_client.get("/config/content-source/notion", response_class=HTMLResponse) @requires(["authenticated"], redirect="login_page") def notion_config_page(request: Request): user = request.user.object @@ -201,7 +201,7 @@ def notion_config_page(request: Request): current_config = json.loads(current_config.json()) return templates.TemplateResponse( - "content_type_notion_input.html", + "content_source_notion_input.html", context={ "request": request, "current_config": current_config, From 6e957584acd7d1df87b73a70fefe94fc6ab217ec Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Tue, 7 Nov 2023 02:08:06 -0800 Subject: [PATCH 07/26] Create config page on web app to manage computer files indexed by Khoj Remove the table of all files indexed by Khoj. This seems overkill and doesn't match the UI semantics of the other data sources like Github, Notion. Create instead a data source card for computer files with the same update, disable semantics of the Github and Notion data source cards Users can disable each data source from its card on the main config page. They can see/delete individual files indexed from the computer data source once they click into the computer files data source card on the config page --- .../interface/web/assets/icons/computer.png | Bin 0 -> 10517 bytes src/khoj/interface/web/config.html | 114 ++++++------------ .../web/content_source_computer_input.html | 107 ++++++++++++++++ src/khoj/routers/api.py | 25 +--- src/khoj/routers/web_client.py | 35 +++--- 5 files changed, 165 insertions(+), 116 deletions(-) create mode 100644 src/khoj/interface/web/assets/icons/computer.png create mode 100644 src/khoj/interface/web/content_source_computer_input.html diff --git a/src/khoj/interface/web/assets/icons/computer.png b/src/khoj/interface/web/assets/icons/computer.png new file mode 100644 index 0000000000000000000000000000000000000000..12473485486c7dbf1b90dbc3d19ae9cc0e340171 GIT binary patch literal 10517 zcmV+wDeBgVP)005u}1^@s6i_d2*001c5NklH{_ z$7W{cJhM|n$ax4`m3AL?4H)V@2J(S>7E_E zed^KPpMe5#;K-2^V*o})q$WvwAOMi~k3vaq09Z!^a_I1Jc!5J{6z0JfDcoy+HJfsteJp*V!t7brH@rexXI0=!J}F#ylOK@J}|(K1NR=6U{L zS(bPI^2<*nzx?tuwl=p=Y6%ZXO*sBq({Ydb@ifx8oUg{bNJy4pW_lWfgM(02RDoy{ zWGC_q${uC_bWAk(V^)@_SQ(3CBIxTMz|hd}?$2hHWe=r*e*4YAFFyYiMbT$Pd+4$X&dp(V zb{3^7z;;O#zu}8{B-j==az{v2SxZDJN#qe958qhY8_JI@%Mz1Q)4L=*Eh5+DH{EpW z13&)w!$qHb@)1BH%d$UZS18@gV&>l#P60+xOknlp0QSndDXtD#phru(EpAHw6GPB(stf*hGAN zsnN{+$vtF^JT>ar3P#6?NbAo`tX>^_-E0b?_Q(N3X|v9&zX}4LtK?06-Gc3AdG;~ zcS5>&S~Tl+abD8@{EpE-T&=Gxjb;BfHDZdY-$D2{L2!4l`(m}H-5_WuwWlayacxyAV+Y8Rj%(Q#) zy!|IUs$sR5kYMF)Sb@aOz5qPm{5{l`5(*;!oxSrw?j+aEctg@=Y3+JB9KhimFCD@0 z6njv|%Fa25ot*Gl{1?>otFIcSIq$F9*|mB~qizNe?1o5WqO%SNCrgnOrNW7-m9^ZV zrg;Wg`>2|vUs`V~*p;ps1rOU&)SPl(+|~FnK=YNQW*|wa?y=p;H;furxW5CTz!)VN z4#o7%=QrEc}S9D;vfFu?{2>Q^1ru; zix&F*I7UB6#6w;2;RhcMVZ>auRu##tkDX7I#1?0;IXmlVFyM3`$##gUOKl&2^zm>? zcyv{esFS4_%SbZ&=y2R;XXP7E^hvB)H*(7`({4MaG2-( zl6rmYM5O}7Z_PfUP*x|NZ5EZE<xo>^zJCCmD=;?|@ zLDWud+XD^W(^V_{t5>haDmct@*v4u{Nz*UZrBgYC#C5`(J47U5&n+sNuyo4JrQjYI z$uU)pvGqjNxjEoysi&QoZsh}t0-XVw!p{Jy0&v${r~cSZ3cqmHpg3$*yTS{xX$337 z<7uk}=Yl4#&i328@oK(d_7Zo!fAsj#a8|A8Iqhu|j5~O(!pkie0ki-g@67o!Ro~^+ z(gl(`VwTt*c#e^>>T3)ByTAL}n=fw-<9$rX2v%aEU}hc){}t7$lHJg+&6S zIP7Ch39z7MCjMw_d(PZG{p8bI6XbWHh%N?_rYq_uQ|TVN_jEnD?GO9#SOo_mN^+iS zS?HI9vYe}85g+8d+jLCCrBLG`N>nP?l!Cczx>qzU{SVjuuY%8?Ki?iddVFiD`c>Lg zmsh-`MMEZBwDILMVXsBr2|j;ye@Ij2klHYfhTZDq=b2&d^NPicE`8m4TaG=e>N5;{ z`RGJswosI88a?U_J@ zfKhw`zD3O-(f`P(8P&aQhbscLhF)ZBImWf_=_~qZkTwU-q0c}&~YW7PfHIl6{fI3?hdo>K|a?uobve% zK|uot<||<9&nzw)J&tjVhTHT8XS0+_R9a}Z_{(`=w^jW1=LeME25Osa394#!%?YZZL-<)f3z z!v7$3@PSgllIl2|LynC8gyc~22-X%gSDRZwD)ZdQG9o$Q zHv~q(g4d!A+3UC%SOr(;vUTXh6IYSLXV*{gOO2jwt% zskxuj`3l%$;`cLH{>B(R2lHGyfwp?L+(s&FaMEIsCjJR9;`ZsF{Qd0tU3e3>HpkZphq{dWckX&LKWiYFk zk)4fIl@W2^Q;4*?h`KMODygoz$zYBPKrBmX zA=IGc%T-CXRWExzFwc#+k?ZS|t5&9`$CJUe043Y9N!;(AO-nYPdIYW6u4;VmE`)d< z-}+H*Qr1$Mn?~8V&r)|OxA9NoXOvaFjOa6~U6NJ5tE`4uJm3DW>N3$b_wiMlbOF?< z{Pa_O0D4FI?wMdU-~vG5QFAC1st$d1YExyxef=&$&!wYMk;GtCS0gy3~zYD{QQf*{;R*d`H%nn5A%irB}beb zQX}Tc%=ia6b7g1U`h|%(4rdN>QxC`Sn2$4S$2#n`MKEnbBdO1_-mZE+9(ZmeUl&S` z)BA$Zb2y~-8B$NH`S*YD!$(&|G-u)gh4x+T`H9d*w&yd9vw*L>ezl#q0bSjh=Wag# ze7C)&RP`|J6v3{zQ&p{nTnds!kjSdIrw_qdcC@Ddro^yJ{qMvQhWuB0J!IOiv~Fge zGedz=>kEiP?IH>LDX0`c4%rqasKX8U2w$xPEo&rBYGZ(jna*K7H;aoN@rd123C(-Uc)^1 z?AcRySe<9IS{FLVyxD?OW%WMoeI4_m0(?rrXas3F_m>{hTUkZ2mxpJ;nK8n7nYXU; zHE@nL0+G8Im7}nNv%yqmK@C~XmBOPix9usQ;nkfWV!=MSg{5-|%$XEVe3=}Fp$byr z82-M>@+z^eE_)%WdR(vHyuMupZ{tQj`D8iYpHj24LJs*tvX?24LejE-w;M*tcLyRi`RXNWoj4>U+n-*m59OMt!EK&148AG8xo=Icn<#Wt<97 zUt=d7K@ZIOboQ*K={Q(cL6sddx5Sv$(-Uz73#vt;Et;TNZD_gUlEgo%Kg#55UEz~N z0nQlPv2$;&f+zFb>C`1jbC-E5@06rHF{2TPV<|3pFHcL+o2P+oa64|ZB;YI*2s^t7 zS$8^+yxuynNIrY7F&wzUI4{%t3MUwBUmoUqKshe`T%B?0W-FGm!pGAfz@g9`(Q`<7 z=7c~XQzC49LQ&4bIROF=%yTJ=id!&NpI^!Qeq>V9dyQisJvS87Binw`E1KlJmoFwA z%7rbscgY9w$@u*w0X}%ce4S&6Ke^QjUJifF?6RPCyU03WPTllTpUR!+s6tC$0e3HE zsTD-kq~NHQY&`M3532%9O1Svt4xGo)qcU6!o_ZZH_{Uyib;-<|Z^3R>**fhJ4j-0Q z!KY770*M8ncfsr~=@Q}P)y~PG^`iC_G9^#XRvkiHWRy687pEFVM>r$n_$2S1v*M8S zz->nx${}#)1{#epZUUBwf`iZ@&cYbD0SsqijnXG84W{!>8}!hrJ$~|db0>ua)9>l5a*AV=23jHL8VTQd*$jMYvkT(Z^WF)8U*SzF)6izaF^-_qY+J*iD`1*!|vB4_kbM zpQ6tA%<>Sga`J$)D>ExiZ?`Gdz}=fB+SW$F#=K=Wm*B!+hqX23F3W`hoJtEJMWZBd zDF-l}?;#T82RR1`@WThdixRBOxIwXLgvze)~3 zo*5x`6V?G0T$D8|8#PN#ifQliHn^FD?80VeW!VkEawO_bwj~~f&HfJC#tX0X=PIf) zA14B^jwWC2w;&j{9Zm{EF+EOR;zz~`1DkQ*(%<`lLdG0EaX@%yT}55g3Vl zJ^OxN=0u9czwT2i+A0Ffvd@j*(YzetzR7`t;+VJFyt;E3u7WA7XmZ5#Zc{!mVm5KF zURN8jg*=3`!8P!#xu>{@Qq`EQiFh%FcnBmTkll!a{(R5?Xyx2sKw5A{AgU7dgmzwV z5n6Kzlb)UALMXnS_$8kaf3Luj!7^oZdDZlA1a}rArtF49n_RRV2S zpSA*Gr-5|%R!X}VrnrlSc-|WY^ur}{QW&e?un{YBruUohbE~dgIq%nI$=@#%+rM^c z@*j@<8mr*2jF6Bz^}L(GB`v$@PwIe2jvw-X+W!T(7MY08_&mu3y9jE0%@a!XymtiO zRdmn(VTThcAY7A6D2(C2&;~ql2;faefk1z4!NZ%)1dbD)$jC}Iu5XaS|Bk3fHM;`6 zB<>ok&T^^N=B-_B+?$Z&3levs4+4l3GXe>`Zp1Z7`=)E-|ux zwgPI+xG~G>v|Kj78uRc1ZtIuM2y1CxXP6ISh)Xg3E8uVjA+9=|_WAxuK_EW-PBv#c zXgH)=O8+QS2}Ik)eqv-g3lo*JGGx1H*)G9{6{lJ45adnmkHSCCmf$0+A0$5tO%_EP z;A~}@zpz+hm?xwQZOlzcPm;Zlv5GP?JzoBu zpFwyU@~DqdjJ)%AuE@qCc_v;1-yF#D$k$PZ4=uomnm~s6OW>}z*%V|`j;oN*5~c)W zOzOy6AN(I)STfSsl(EA+Hxe4%TwiZ;qRK}1SI^OfqdFf#9e#N{XXOcsUaiyffV|=& zwhKdio@0(>>wJwmVcV#JMrkA`YSY;^(DPCfzN;L3*j9a7pbugmwE1D>dnG> zEo4XN`~bx!65;nepaXw*=TbA(dHJHi5@&RF<@oo{@2xw;_H$puHzH2vxe`c>0M|WU zCLNu~nz&(8!^ow}*prdhBAJ5&td5>IC$^~P^jvwxPv*W%hpS;Mfs4O>s@VWGj28(H z_o{IVR>zSgl9#7>&=^kOy&5l?eJDQ~o*|xLj-U-*N3wgy;V$) zghl~p?`YHc^Fhs4u^6Z!`LnfCH^#&u zE_QdMW$nGp+UlU(F`3#@Wnk2So$v$p-G-~yicqzs?pc z?VXt=A35Kvfhu1;dBg5oR~P%i|6)(^j!4aOus9SyrK>+2)`hC$b~0Ry>{>$d`FOeE z$y)m!j&D16zB}Ig1<)gB-{1xJ(+vC=ScX%k>UUk^5r!>966jtp}^ZZ91b{FO3#RJA^EsG6w~2(^Z9%n?u<!idMgF4?#=4VyIXGp^^HR5VB1w2^&2g(%j5c_twWONDkSeE=*7eCJOZ=q4q)&X@3EE ztb#)t9?^4Lk*0x162^&`!Am8Tln0aO^X;SHN0eoV!F?W

1k z=MD*VZ&%C0UCm#nMYpw=V|V_`)}S-s@V;Ai;@-#8DU-tUTYm79?IQ#)xq~X%2qm^+ zYbPYUPpMbsxDetiphvpDzWw)beKW7u()Re#qb;s)psQ=Q|Ea1TJ(Ze*J^dX`X%Nl63l6V8NPy_U0$)udn~1L^LFaOGN~YRmB_lL zeR5unCa}=(NcJQQR>{jeyRE^|+>d=4-(n#B!Ny---5tjIcQ9}1i3I!N`SX*<^yTGc z@oymW84R>rndQ|1Chvj&k@{z?+ricmn7@ZPd@s^h;J2k|U#^0~D)Qn*DW;*ia-FM% z0hk(;x-pp!7PoSLKtD)DYW})SLCi})t_?Pg2HE2nRr5H(Gx0-pHIS;UrMFP^Z(tR? zz&kFyWbG|h*Cy!-Zy|h=}92Ig3CUVE3sAhhh^_Kr8J_|Rp9+-{enfrcoy8D0z4eC zQmwpt{c2ZCAGsc-vWAI?-dO!Jw&}~C{_GcD{P7?E;mtq)^FN*T{)g+~1kQ1Nc*_gM zTxpLsYhKyk{qFa+U;p*rd^A?T;hM8Y&_7%c*YK9k%yHg`E1TD^E-vNvXWxRyvG3pd z*0(CPzon|5UUS_{%sVcO#oKdSF|CTP6rO*N{0d6V=%Xu&?uZ8?i2}j`vT17vuNE@irf}JJl>Sp_f-5d@v+!bIJck zN5gXLbu6TB!aLA>9-iAm_}QP)LTvtSGrL-7`;aOBJ)3`@T03inqg~g?s3Jw`v1Ff^f<2P5A^uo z#sasXsyIDad`VQjn+EUuN@=@{_&O6{cV#WY&F2=Mv?;=ZdaA!v<;YP@VL)XYzJSEu zZB5Pg6jLAUKz*C4@9W7eh&Jal6QIN+G#zUJAE(be~`P(HPi0Jj{Rrfd|8rrB>$H$v|RsaAc|P4Cb{k+ zv!3xAG}CNOkf{I3_(SqzEkqnt{b`%V{};uip-*2a#x_}g8vki(%K*J`9B03JnKED@ z0|O-m-t`)|!f&|?271W(QB3a?VvGUtA~kmG81BIS&LHktE{kvrj@G((+6}CL6jEuJ z4%~A(Zrpe^44VS<$zS8o zpz*lI!?pFUm&RsSzT*DnmtU!`zy78N{MrvmPwI2eyGSzc51i-w{`@?&(DOVHkvE!| zLO>B?Y6jOiKOF!I{_sG6sE2XW*&tbx3SjF z4lued;cBJP=3D+>BY7LRzs6{q^{e-=L~6+mG@eImJ%fAd-zVSZkjc^d_t#IWO>ZQh zq&J+WPQECLaq2?9>h0|tgXZ@cYn_Xc^seMV6O8L=WffozJdF9FZT9+9m1#P@80mXs z#*E$-#+^@V6DrlTQGcB!yNW^CN*x`YkIg#ktbfVrq(lC!hJH{PYOVh?#(aSAC$)*< z0dparefr5;#&K2yA>$Mcwi2&b(NPrkyVh=rsE<`nL?3+cVL7R(p2J%ED*Cp|LnqC# zP6q+u?ZPPH%Q}Pb)H@*RIh7N!d08|Kg22G@8R+U8UkImW$|(lw2C~4@d&0#`#B)MBkbpjfG|W}2nhRn&O`$GDI{R5T(6{} zIA;W;LGmER=)iMb$M-$u`)7^9C4kk4&zZ&MwBwJ9QC z3<7E%k5}R`DIG;0ur(*5yWsh}w)S>KxG5U_M{%s^0!t)dxtuQow&i3ZI%xB|(FAKY z!SsSQMFivsSd)OI62D&AUaW+)W)s}qmXHrWy3`Sn+fxRD0Ps6=MnFF=^MFf`c&INB z@H$Nd1pi_Vj`+o*h0zStD-7gkWVI|jbl_=q!Lg3syNH%``*f7 zIJg?3-%>dc{ph2Q=750a)UK|s)90LX4#js~Kt_@>bq9m@^zF@7<9Z^@eJOR<)Xl3&468Qd=Al|Hi(ut>u z!m#F`oaTvm;+Q{(IIi|hEyQSj&OoWee&Edo0W&7}0(b{(cqBZ@i7;rlc=;0QIw<5z z838FIppek9wp*9WSHnI_#&LWBlE=0r0Xg^7Dgr~f;L|q7=p$g*tr9~ja*XvXmj`RU z^j%B9nN3D5Uml<)SO=g&5)!^K=2XOOjJS)D{1ks*Ek!^YAnO7nEe0cP0t4;^BW-OV z9O)7?GMinDEJ?Ox>yr>L2Aby$jnP6vh5>mGy8>vIU(PbzD2l=jP`;Ou_aWqa1?2p$ z6{c$RJ@?#`)nhG|27xw1a8qCdU?CVrJO^N@#j6k`9}preXDY{4Ey8%MXIlADK9a3U zJb>B(qz-0M*~3uEJ*m8-fscWAkO$K>`b~t#WRigQ-~T|yrc~5wFv1ZCZmqSR-XR4_UV8amqxrE z4F)9hXoSl!R~ESbTMY+68?OPMV4QfGCQVFhdN9_7M8fEO0Qei2z#kelCZ)$QjR*UX za3tkaU5W|><>&fwfd8X2N5EmjhN&#c@ZrPP2I-aql#oxFT_GHa*47rlIPJ(|NHC;R z+bZ#R_!8HH3Eo36y%{2h_mFH9_Vsuz@$hN_l8^)xPbl|m#4QZ$gv;xJC{qN4K~

  • aXw=eA6-b=){p6n7dkwC+xwEV?BzlJ(cF5Y= z;X-)OyCJw1^penX108^iYX_Rp;vuG|fwW9SPxnYSqV^h6fsPXoHwct~QHc8)`ZMUq zpg+voAVlx7%PuOz(gm~w9SH)!15AsZAFPx5loRwPm;j6i;>=bIwOSVXYt;lPq>z6A XUvqKATJtxd00000NkvXXu0mjfHN?UZ literal 0 HcmV?d00001 diff --git a/src/khoj/interface/web/config.html b/src/khoj/interface/web/config.html index 4e77a8ef..1410f20b 100644 --- a/src/khoj/interface/web/config.html +++ b/src/khoj/interface/web/config.html @@ -3,9 +3,40 @@
    -

    Plugins

    +

    Content

    + -
    -

    Manage Data

    -
    -
    - -
    -
    -
    -
    -
    @@ -363,70 +384,5 @@ } }) } - - // Get all currently indexed files - function getAllFilenames() { - fetch('/api/config/data/all') - .then(response => response.json()) - .then(data => { - var indexedFiles = document.getElementsByClassName("indexed-files")[0]; - indexedFiles.innerHTML = ""; - - if (data.length == 0) { - document.getElementById("delete-all-files").style.display = "none"; - indexedFiles.innerHTML = ""; - } else { - document.getElementById("delete-all-files").style.display = "block"; - } - - for (var filename of data) { - let fileElement = document.createElement("div"); - fileElement.classList.add("file-element"); - - let fileNameElement = document.createElement("div"); - fileNameElement.classList.add("content-name"); - fileNameElement.innerHTML = filename; - fileElement.appendChild(fileNameElement); - - let buttonContainer = document.createElement("div"); - buttonContainer.classList.add("remove-button-container"); - let removeFileButton = document.createElement("button"); - removeFileButton.classList.add("remove-file-button"); - removeFileButton.innerHTML = "🗑️"; - removeFileButton.addEventListener("click", ((filename) => { - return () => { - removeFile(filename); - }; - })(filename)); - buttonContainer.appendChild(removeFileButton); - fileElement.appendChild(buttonContainer); - indexedFiles.appendChild(fileElement); - } - }) - .catch((error) => { - console.error('Error:', error); - }); - } - - // Get all currently indexed files on page load - getAllFilenames(); - - let deleteAllFilesButton = document.getElementById("delete-all-files"); - deleteAllFilesButton.addEventListener("click", function(event) { - event.preventDefault(); - fetch('/api/config/data/all', { - method: 'DELETE', - headers: { - 'Content-Type': 'application/json', - } - }) - .then(response => response.json()) - .then(data => { - if (data.status == "ok") { - getAllFilenames(); - } - }) - }); - {% endblock %} diff --git a/src/khoj/interface/web/content_source_computer_input.html b/src/khoj/interface/web/content_source_computer_input.html new file mode 100644 index 00000000..01992d5e --- /dev/null +++ b/src/khoj/interface/web/content_source_computer_input.html @@ -0,0 +1,107 @@ +{% extends "base_config.html" %} +{% block content %} +
    +
    +

    + files + Files +
    +

    Manage files from your computer

    +

    Download the Khoj Desktop app to sync files from your computer

    +
    +

    +
    +
    + +
    +
    +
    +
    +
    +
    + + +{% endblock %} diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py index c2002048..fabfebe1 100644 --- a/src/khoj/routers/api.py +++ b/src/khoj/routers/api.py @@ -270,10 +270,11 @@ async def remove_file_data( return {"status": "ok"} -@api.get("/config/data/all", response_model=List[str]) +@api.get("/config/data/{content_source}", response_model=List[str]) @requires(["authenticated"]) async def get_all_filenames( request: Request, + content_source: str, client: Optional[str] = None, ): user = request.user.object @@ -285,27 +286,7 @@ async def get_all_filenames( client=client, ) - return await sync_to_async(list)(EntryAdapters.aget_all_filenames(user)) - - -@api.delete("/config/data/all", status_code=200) -@requires(["authenticated"]) -async def remove_all_config_data( - request: Request, - client: Optional[str] = None, -): - user = request.user.object - - update_telemetry_state( - request=request, - telemetry_type="api", - api="delete_all_config", - client=client, - ) - - await EntryAdapters.adelete_all_entries(user) - - return {"status": "ok"} + return await sync_to_async(list)(EntryAdapters.aget_all_filenames_by_source(user, content_source)) @api.post("/config/data/conversation/model", status_code=200) diff --git a/src/khoj/routers/web_client.py b/src/khoj/routers/web_client.py index 8016cfce..3e568bc7 100644 --- a/src/khoj/routers/web_client.py +++ b/src/khoj/routers/web_client.py @@ -110,25 +110,14 @@ def login_page(request: Request): def config_page(request: Request): user = request.user.object user_picture = request.session.get("user", {}).get("picture") - enabled_content = set(EntryAdapters.get_unique_file_types(user).all()) + enabled_content_source = set(EntryAdapters.get_unique_file_source(user).all()) successfully_configured = { - "pdf": ("pdf" in enabled_content), - "markdown": ("markdown" in enabled_content), - "org": ("org" in enabled_content), - "image": False, - "github": ("github" in enabled_content), - "notion": ("notion" in enabled_content), - "plaintext": ("plaintext" in enabled_content), + "computer": ("computer" in enabled_content_source), + "github": ("github" in enabled_content_source), + "notion": ("notion" in enabled_content_source), } - if state.content_index: - successfully_configured.update( - { - "image": state.content_index.image is not None, - } - ) - conversation_options = ConversationAdapters.get_conversation_processor_options().all() all_conversation_options = list() for conversation_option in conversation_options: @@ -209,3 +198,19 @@ def notion_config_page(request: Request): "user_photo": user_picture, }, ) + + +@web_client.get("/config/content-source/computer", response_class=HTMLResponse) +@requires(["authenticated"], redirect="login_page") +def computer_config_page(request: Request): + user = request.user.object + user_picture = request.session.get("user", {}).get("picture") + + return templates.TemplateResponse( + "content_source_computer_input.html", + context={ + "request": request, + "username": user.username, + "user_photo": user_picture, + }, + ) From 404d47f1a1fbf1ea37c539f393695f92b6f94aec Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Tue, 7 Nov 2023 02:20:11 -0800 Subject: [PATCH 08/26] Bubble up content indexing errors to notify user on client apps --- src/khoj/configure.py | 13 +++++++------ src/khoj/routers/indexer.py | 21 ++++++++++++++++----- tests/conftest.py | 2 +- 3 files changed, 24 insertions(+), 12 deletions(-) diff --git a/src/khoj/configure.py b/src/khoj/configure.py index bc9e9bf8..ecd35cf9 100644 --- a/src/khoj/configure.py +++ b/src/khoj/configure.py @@ -1,5 +1,4 @@ # Standard Packages -import sys import logging import json from enum import Enum @@ -109,7 +108,6 @@ def configure_server( state.search_models = configure_search(state.search_models, state.config.search_type) initialize_content(regenerate, search_type, init, user) except Exception as e: - logger.error(f"🚨 Failed to configure search models", exc_info=True) raise e finally: state.config_lock.release() @@ -125,7 +123,7 @@ def initialize_content(regenerate: bool, search_type: Optional[SearchType] = Non else: logger.info("📬 Updating content index...") all_files = collect_files(user=user) - state.content_index = configure_content( + state.content_index, status = configure_content( state.content_index, state.config.content_type, all_files, @@ -134,8 +132,9 @@ def initialize_content(regenerate: bool, search_type: Optional[SearchType] = Non search_type, user=user, ) + if not status: + raise RuntimeError("Failed to update content index") except Exception as e: - logger.error(f"🚨 Failed to index content", exc_info=True) raise e @@ -165,13 +164,15 @@ def update_search_index(): logger.info("📬 Updating content index via Scheduler") for user in get_all_users(): all_files = collect_files(user=user) - state.content_index = configure_content( + state.content_index, success = configure_content( state.content_index, state.config.content_type, all_files, state.search_models, user=user ) all_files = collect_files(user=None) - state.content_index = configure_content( + state.content_index, success = configure_content( state.content_index, state.config.content_type, all_files, state.search_models, user=None ) + if not success: + raise RuntimeError("Failed to update content index") logger.info("📪 Content index updated via Scheduler") except Exception as e: logger.error(f"🚨 Error updating content index via Scheduler: {e}", exc_info=True) diff --git a/src/khoj/routers/indexer.py b/src/khoj/routers/indexer.py index 1bbf53c2..a7a1249d 100644 --- a/src/khoj/routers/indexer.py +++ b/src/khoj/routers/indexer.py @@ -126,7 +126,7 @@ async def update( # Extract required fields from config loop = asyncio.get_event_loop() - state.content_index = await loop.run_in_executor( + state.content_index, success = await loop.run_in_executor( None, configure_content, state.content_index, @@ -138,6 +138,8 @@ async def update( False, user, ) + if not success: + raise RuntimeError("Failed to update content index") logger.info(f"Finished processing batch indexing request") except Exception as e: logger.error(f"Failed to process batch indexing request: {e}", exc_info=True) @@ -145,6 +147,7 @@ async def update( f"🚨 Failed to {force} update {t} content index triggered via API call by {client} client: {e}", exc_info=True, ) + return Response(content="Failed", status_code=500) update_telemetry_state( request=request, @@ -182,18 +185,19 @@ def configure_content( t: Optional[state.SearchType] = None, full_corpus: bool = True, user: KhojUser = None, -) -> Optional[ContentIndex]: +) -> tuple[Optional[ContentIndex], bool]: content_index = ContentIndex() + success = True if t is not None and not t.value in [type.value for type in state.SearchType]: logger.warning(f"🚨 Invalid search type: {t}") - return None + return None, False search_type = t.value if t else None if files is None: logger.warning(f"🚨 No files to process for {search_type} search.") - return None + return None, True try: # Initialize Org Notes Search @@ -209,6 +213,7 @@ def configure_content( ) except Exception as e: logger.error(f"🚨 Failed to setup org: {e}", exc_info=True) + success = False try: # Initialize Markdown Search @@ -225,6 +230,7 @@ def configure_content( except Exception as e: logger.error(f"🚨 Failed to setup markdown: {e}", exc_info=True) + success = False try: # Initialize PDF Search @@ -241,6 +247,7 @@ def configure_content( except Exception as e: logger.error(f"🚨 Failed to setup PDF: {e}", exc_info=True) + success = False try: # Initialize Plaintext Search @@ -257,6 +264,7 @@ def configure_content( except Exception as e: logger.error(f"🚨 Failed to setup plaintext: {e}", exc_info=True) + success = False try: # Initialize Image Search @@ -274,6 +282,7 @@ def configure_content( except Exception as e: logger.error(f"🚨 Failed to setup images: {e}", exc_info=True) + success = False try: github_config = GithubConfig.objects.filter(user=user).prefetch_related("githubrepoconfig").first() @@ -291,6 +300,7 @@ def configure_content( except Exception as e: logger.error(f"🚨 Failed to setup GitHub: {e}", exc_info=True) + success = False try: # Initialize Notion Search @@ -308,12 +318,13 @@ def configure_content( except Exception as e: logger.error(f"🚨 Failed to setup GitHub: {e}", exc_info=True) + success = False # Invalidate Query Cache if user: state.query_cache[user.uuid] = LRU() - return content_index + return content_index, success def load_content( diff --git a/tests/conftest.py b/tests/conftest.py index fbb98476..59104123 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -196,7 +196,7 @@ def chat_client(search_config: SearchConfig, default_user2: KhojUser): # Index Markdown Content for Search all_files = fs_syncer.collect_files(user=default_user2) - state.content_index = configure_content( + state.content_index, _ = configure_content( state.content_index, state.config.content_type, all_files, state.search_models, user=default_user2 ) From 779fa531a5ff806766bc25cda501aaff0ad23afe Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Tue, 7 Nov 2023 03:17:42 -0800 Subject: [PATCH 09/26] Prevent Desktop app triggering multiple simultaneous syncs to server Lock syncing to server if a sync is already in progress. While the sync save button gets disabled while sync is in progress, the background sync job can still trigger a sync in parallel. This sync lock prevents that --- src/interface/desktop/main.js | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/interface/desktop/main.js b/src/interface/desktop/main.js index 1d5c4be2..045144cc 100644 --- a/src/interface/desktop/main.js +++ b/src/interface/desktop/main.js @@ -110,6 +110,15 @@ function filenameToMimeType (filename) { } function pushDataToKhoj (regenerate = false) { + // Don't sync if token or hostURL is not set or if already syncing + if (store.get('khojToken') === '' || store.get('hostURL') === '' || store.get('syncing') === true) { + const win = BrowserWindow.getAllWindows()[0]; + if (win) win.webContents.send('update-state', state); + return; + } else { + store.set('syncing', true); + } + let filesToPush = []; const files = store.get('files') || []; const folders = store.get('folders') || []; @@ -192,11 +201,13 @@ function pushDataToKhoj (regenerate = false) { }) .finally(() => { // Syncing complete + store.set('syncing', false); const win = BrowserWindow.getAllWindows()[0]; if (win) win.webContents.send('update-state', state); }); } else { // Syncing complete + store.set('syncing', false); const win = BrowserWindow.getAllWindows()[0]; if (win) win.webContents.send('update-state', state); } From 7c424e0d5f15dea82cac9932d6425aa2c7b7428c Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Tue, 7 Nov 2023 03:37:16 -0800 Subject: [PATCH 10/26] Enable deleting all indexed desktop files from Khoj via Desktop app --- src/interface/desktop/config.html | 3 +++ src/interface/desktop/main.js | 14 ++++++++++++++ src/interface/desktop/preload.js | 3 ++- src/interface/desktop/renderer.js | 6 ++++++ 4 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/interface/desktop/config.html b/src/interface/desktop/config.html index 3f8e19d9..c63a2a5c 100644 --- a/src/interface/desktop/config.html +++ b/src/interface/desktop/config.html @@ -96,6 +96,9 @@
    +
    + +
    diff --git a/src/interface/desktop/main.js b/src/interface/desktop/main.js index 045144cc..e6524d73 100644 --- a/src/interface/desktop/main.js +++ b/src/interface/desktop/main.js @@ -317,6 +317,19 @@ async function syncData (regenerate = false) { } } +async function deleteAllFiles () { + try { + store.set('files', []); + store.set('folders', []); + pushDataToKhoj(true); + const date = new Date(); + console.log('Pushing data to Khoj at: ', date); + } catch (err) { + console.error(err); + } +} + + let firstRun = true; let win = null; const createWindow = (tab = 'chat.html') => { @@ -397,6 +410,7 @@ app.whenReady().then(() => { ipcMain.handle('syncData', (event, regenerate) => { syncData(regenerate); }); + ipcMain.handle('deleteAllFiles', deleteAllFiles); createWindow() diff --git a/src/interface/desktop/preload.js b/src/interface/desktop/preload.js index 3228fdb0..eb5a6cc2 100644 --- a/src/interface/desktop/preload.js +++ b/src/interface/desktop/preload.js @@ -45,7 +45,8 @@ contextBridge.exposeInMainWorld('hostURLAPI', { }) contextBridge.exposeInMainWorld('syncDataAPI', { - syncData: (regenerate) => ipcRenderer.invoke('syncData', regenerate) + syncData: (regenerate) => ipcRenderer.invoke('syncData', regenerate), + deleteAllFiles: () => ipcRenderer.invoke('deleteAllFiles') }) contextBridge.exposeInMainWorld('tokenAPI', { diff --git a/src/interface/desktop/renderer.js b/src/interface/desktop/renderer.js index 26765bf0..849a8293 100644 --- a/src/interface/desktop/renderer.js +++ b/src/interface/desktop/renderer.js @@ -206,3 +206,9 @@ syncForceButton.addEventListener('click', async () => { loadingBar.style.display = 'block'; await window.syncDataAPI.syncData(true); }); + +const deleteAllButton = document.getElementById('delete-all'); +deleteAllButton.addEventListener('click', async () => { + loadingBar.style.display = 'block'; + await window.syncDataAPI.deleteAllFiles(); +}); From 045c2252d6e25143e7b5648b2d310446af7d7013 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Tue, 7 Nov 2023 04:45:25 -0800 Subject: [PATCH 11/26] Set content enabled status on update via config buttons on web app Previously hitting configure or disable wouldn't update the state of the content cards. It needed page refresh to see if the content was synced correctly. Now cards automatically get set to new state on hitting disable button on card or global configure buttons --- src/khoj/interface/web/config.html | 105 +++++++++++++++++------------ 1 file changed, 62 insertions(+), 43 deletions(-) diff --git a/src/khoj/interface/web/config.html b/src/khoj/interface/web/config.html index 1410f20b..5a2089ca 100644 --- a/src/khoj/interface/web/config.html +++ b/src/khoj/interface/web/config.html @@ -8,11 +8,13 @@
    Computer -

    +

    Files - {% if current_model_state.computer == True %} - Configured - {% endif %} + Configured

    @@ -28,22 +30,23 @@
    - {% if current_model_state.computer %} -
    - -
    - {% endif %} +
    + +
    Github

    Github - {% if current_model_state.github == True %} - Configured - {% endif %} + Configured

    @@ -59,22 +62,24 @@
    - {% if current_model_state.github %} -
    - -
    - {% endif %} +
    + +
    Notion

    Notion - {% if current_model_state.notion == True %} - Configured - {% endif %} + Configured

    @@ -90,13 +95,13 @@
    - {% if current_model_state.notion %} -
    - -
    - {% endif %} +
    + +
    @@ -208,18 +213,11 @@ .then(response => response.json()) .then(data => { if (data.status == "ok") { - var contentTypeClearButton = document.getElementById("clear-" + content_source); - contentTypeClearButton.style.display = "none"; - - var configuredIcon = document.getElementById("configured-icon-" + content_source); - if (configuredIcon) { - configuredIcon.style.display = "none"; - } - - var misconfiguredIcon = document.getElementById("misconfigured-icon-" + content_source); - if (misconfiguredIcon) { - misconfiguredIcon.style.display = "none"; - } + document.getElementById("configured-icon-" + content_source).style.display = "none"; + document.getElementById("clear-" + content_source).style.display = "none"; + } else { + document.getElementById("configured-icon-" + content_source).style.display = ""; + document.getElementById("clear-" + content_source).style.display = ""; } }) }; @@ -265,6 +263,7 @@ if (data.detail != null) { throw new Error(data.detail); } + document.getElementById("status").innerHTML = emoji + " " + successText; document.getElementById("status").style.display = "block"; button.disabled = false; @@ -277,6 +276,26 @@ button.disabled = false; button.innerHTML = '⚠️ Unsuccessful'; }); + + content_sources = ["computer", "github", "notion"]; + content_sources.forEach(content_source => { + fetch(`/api/config/data/${content_source}`, { + method: 'GET', + headers: { + 'Content-Type': 'application/json', + } + }) + .then(response => response.json()) + .then(data => { + if (data.length > 0) { + document.getElementById("configured-icon-" + content_source).style.display = ""; + document.getElementById("clear-" + content_source).style.display = ""; + } else { + document.getElementById("configured-icon-" + content_source).style.display = "none"; + document.getElementById("clear-" + content_source).style.display = "none"; + } + }); + }); } // Setup the results count slider From 156421d30a6a1ee4217644b8757c71726cf12c8f Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Tue, 7 Nov 2023 05:16:41 -0800 Subject: [PATCH 12/26] Show file type icons for each indexed file in config card of web app --- src/khoj/interface/web/base_config.html | 8 +++++-- .../web/content_source_computer_input.html | 22 +++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/src/khoj/interface/web/base_config.html b/src/khoj/interface/web/base_config.html index 8e33677c..619c34c0 100644 --- a/src/khoj/interface/web/base_config.html +++ b/src/khoj/interface/web/base_config.html @@ -209,16 +209,20 @@ border: none; color: var(--flower); padding: 4px; + width: 32px; + margin-bottom: 0px } div.file-element { display: grid; - grid-template-columns: 1fr auto; + grid-template-columns: 1fr 5fr 1fr; border: 1px solid rgb(229, 229, 229); border-radius: 4px; box-shadow: 0px 1px 3px 0px rgba(0,0,0,0.1),0px 1px 2px -1px rgba(0,0,0,0.8); - padding: 4px; + padding: 4px 0; margin-bottom: 8px; + justify-items: center; + align-items: center; } div.remove-button-container { diff --git a/src/khoj/interface/web/content_source_computer_input.html b/src/khoj/interface/web/content_source_computer_input.html index 01992d5e..aba3d8ee 100644 --- a/src/khoj/interface/web/content_source_computer_input.html +++ b/src/khoj/interface/web/content_source_computer_input.html @@ -23,6 +23,12 @@ #desktop-client { font-weight: normal; } + .indexed-files { + width: 100%; + } + .content-name { + font-size: smaller; + }