From 1a0923538e17060d050cc22167275ffef124339d Mon Sep 17 00:00:00 2001 From: sabaimran Date: Wed, 22 Jan 2025 19:03:25 -0800 Subject: [PATCH 1/6] Release Khoj version 1.35.0 --- manifest.json | 2 +- src/interface/desktop/package.json | 2 +- src/interface/emacs/khoj.el | 2 +- src/interface/obsidian/manifest.json | 2 +- src/interface/obsidian/package.json | 2 +- src/interface/obsidian/versions.json | 3 ++- src/interface/web/package.json | 2 +- versions.json | 3 ++- 8 files changed, 10 insertions(+), 8 deletions(-) diff --git a/manifest.json b/manifest.json index 6c8ad3bf..4fb4234d 100644 --- a/manifest.json +++ b/manifest.json @@ -1,7 +1,7 @@ { "id": "khoj", "name": "Khoj", - "version": "1.34.0", + "version": "1.35.0", "minAppVersion": "0.15.0", "description": "Your Second Brain", "author": "Khoj Inc.", diff --git a/src/interface/desktop/package.json b/src/interface/desktop/package.json index e7d0a4c2..c9d661c7 100644 --- a/src/interface/desktop/package.json +++ b/src/interface/desktop/package.json @@ -1,6 +1,6 @@ { "name": "Khoj", - "version": "1.34.0", + "version": "1.35.0", "description": "Your Second Brain", "author": "Khoj Inc. ", "license": "GPL-3.0-or-later", diff --git a/src/interface/emacs/khoj.el b/src/interface/emacs/khoj.el index d832fc51..e0879d5d 100644 --- a/src/interface/emacs/khoj.el +++ b/src/interface/emacs/khoj.el @@ -6,7 +6,7 @@ ;; Saba Imran ;; Description: Your Second Brain ;; Keywords: search, chat, ai, org-mode, outlines, markdown, pdf, image -;; Version: 1.34.0 +;; Version: 1.35.0 ;; Package-Requires: ((emacs "27.1") (transient "0.3.0") (dash "2.19.1")) ;; URL: https://github.com/khoj-ai/khoj/tree/master/src/interface/emacs diff --git a/src/interface/obsidian/manifest.json b/src/interface/obsidian/manifest.json index 6c8ad3bf..4fb4234d 100644 --- a/src/interface/obsidian/manifest.json +++ b/src/interface/obsidian/manifest.json @@ -1,7 +1,7 @@ { "id": "khoj", "name": "Khoj", - "version": "1.34.0", + "version": "1.35.0", "minAppVersion": "0.15.0", "description": "Your Second Brain", "author": "Khoj Inc.", diff --git a/src/interface/obsidian/package.json b/src/interface/obsidian/package.json index 2ef8b849..894ea030 100644 --- a/src/interface/obsidian/package.json +++ b/src/interface/obsidian/package.json @@ -1,6 +1,6 @@ { "name": "Khoj", - "version": "1.34.0", + "version": "1.35.0", "description": "Your Second Brain", "author": "Debanjum Singh Solanky, Saba Imran ", "license": "GPL-3.0-or-later", diff --git a/src/interface/obsidian/versions.json b/src/interface/obsidian/versions.json index 47ff6efa..e32dd18f 100644 --- a/src/interface/obsidian/versions.json +++ b/src/interface/obsidian/versions.json @@ -109,5 +109,6 @@ "1.33.0": "0.15.0", "1.33.1": "0.15.0", "1.33.2": "0.15.0", - "1.34.0": "0.15.0" + "1.34.0": "0.15.0", + "1.35.0": "0.15.0" } diff --git a/src/interface/web/package.json b/src/interface/web/package.json index b7f6bb5d..abb64738 100644 --- a/src/interface/web/package.json +++ b/src/interface/web/package.json @@ -1,6 +1,6 @@ { "name": "khoj-ai", - "version": "1.34.0", + "version": "1.35.0", "private": true, "scripts": { "dev": "next dev", diff --git a/versions.json b/versions.json index 47ff6efa..e32dd18f 100644 --- a/versions.json +++ b/versions.json @@ -109,5 +109,6 @@ "1.33.0": "0.15.0", "1.33.1": "0.15.0", "1.33.2": "0.15.0", - "1.34.0": "0.15.0" + "1.34.0": "0.15.0", + "1.35.0": "0.15.0" } From fd90842d38c0de91acaea5177fab1d43f877107a Mon Sep 17 00:00:00 2001 From: sabaimran Date: Wed, 22 Jan 2025 19:07:54 -0800 Subject: [PATCH 2/6] Bump postgresql server dev version to 16 for latest ubuntu --- .github/workflows/run_evals.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run_evals.yml b/.github/workflows/run_evals.yml index 9695cfef..914a0835 100644 --- a/.github/workflows/run_evals.yml +++ b/.github/workflows/run_evals.yml @@ -77,7 +77,7 @@ jobs: run: | # install postgres and other dependencies sudo apt update && sudo apt install -y git python3-pip libegl1 sqlite3 libsqlite3-dev libsqlite3-0 ffmpeg libsm6 libxext6 - sudo apt install -y postgresql postgresql-client && sudo apt install -y postgresql-server-dev-14 + sudo apt install -y postgresql postgresql-client && sudo apt install -y postgresql-server-dev-16 # upgrade pip python -m ensurepip --upgrade && python -m pip install --upgrade pip # install terrarium for code sandbox From 5a3a8970802c141a7df19f4ce5aa9416ab484033 Mon Sep 17 00:00:00 2001 From: sabaimran Date: Wed, 22 Jan 2025 19:50:22 -0800 Subject: [PATCH 3/6] Temporarily move logic to associate entry and fileobject objects into the management command, out of automatic migrations --- .../commands/delete_orphaned_fileobjects.py | 47 +++++++++++++ .../migrations/0079_entry_file_object.py | 69 ++++++++++--------- 2 files changed, 82 insertions(+), 34 deletions(-) diff --git a/src/khoj/database/management/commands/delete_orphaned_fileobjects.py b/src/khoj/database/management/commands/delete_orphaned_fileobjects.py index 99d45c6f..09a8ac29 100644 --- a/src/khoj/database/management/commands/delete_orphaned_fileobjects.py +++ b/src/khoj/database/management/commands/delete_orphaned_fileobjects.py @@ -1,4 +1,5 @@ from django.core.management.base import BaseCommand +from django.db import transaction from django.db.models import Exists, OuterRef from khoj.database.models import Entry, FileObject @@ -15,6 +16,52 @@ class Command(BaseCommand): ) def handle(self, *args, **options): + apply = options["apply"] + + mode = "UPDATE" if apply else "DRY RUN" + self.stdout.write(f"[{mode}] Processing entries with null file_objects...") + + # Create lookup dictionary of all file objects + file_objects_map = {(fo.user_id, fo.file_name): fo for fo in FileObject.objects.all()} + + chunk_size = 1000 + processed = 0 + processed_entry_ids = set() + + while True: + entries = list( + Entry.objects.select_related("user") + .filter(file_object__isnull=True) + .exclude(id__in=processed_entry_ids) + .only("id", "user", "file_path")[:chunk_size] + ) + + if not entries: + break + + processed_entry_ids.update([entry.id for entry in entries]) + entries_to_update = [] + + for entry in entries: + try: + file_object = file_objects_map.get((entry.user_id, entry.file_path)) + if file_object: + entry.file_object = file_object + entries_to_update.append(entry) + except Exception as e: + self.stdout.write(self.style.WARNING(f"Error processing entry {entry.id}: {str(e)}")) + continue + + if entries_to_update and apply: + with transaction.atomic(): + Entry.objects.bulk_update(entries_to_update, ["file_object"], batch_size=chunk_size) + + processed += len(entries) + self.stdout.write(f"Processed {processed} entries") + + action = "Updated" if apply else "Would update" + self.stdout.write(self.style.SUCCESS(f"{action} {len(processed_entry_ids)} entries")) + # Find FileObjects with no related entries using subquery orphaned_files = FileObject.objects.annotate( has_entries=Exists(Entry.objects.filter(file_object=OuterRef("pk"))) diff --git a/src/khoj/database/migrations/0079_entry_file_object.py b/src/khoj/database/migrations/0079_entry_file_object.py index 3846dd9d..d9a6166f 100644 --- a/src/khoj/database/migrations/0079_entry_file_object.py +++ b/src/khoj/database/migrations/0079_entry_file_object.py @@ -5,49 +5,50 @@ from django.db import migrations, models def migrate_entry_objects(apps, schema_editor): - Entry = apps.get_model("database", "Entry") - FileObject = apps.get_model("database", "FileObject") - db_alias = schema_editor.connection.alias + pass + # Entry = apps.get_model("database", "Entry") + # FileObject = apps.get_model("database", "FileObject") + # db_alias = schema_editor.connection.alias - # Create lookup dictionary of all file objects - file_objects_map = {(fo.user_id, fo.file_name): fo for fo in FileObject.objects.using(db_alias).all()} + # # Create lookup dictionary of all file objects + # file_objects_map = {(fo.user_id, fo.file_name): fo for fo in FileObject.objects.using(db_alias).all()} - # Process entries in chunks of 1000 - chunk_size = 1000 - processed = 0 + # # Process entries in chunks of 1000 + # chunk_size = 1000 + # processed = 0 - processed_entry_ids = set() + # processed_entry_ids = set() - while True: - entries = list( - Entry.objects.using(db_alias) - .select_related("user") - .filter(file_object__isnull=True) - .exclude(id__in=processed_entry_ids) - .only("id", "user", "file_path")[:chunk_size] - ) + # while True: + # entries = list( + # Entry.objects.using(db_alias) + # .select_related("user") + # .filter(file_object__isnull=True) + # .exclude(id__in=processed_entry_ids) + # .only("id", "user", "file_path")[:chunk_size] + # ) - if not entries: - break + # if not entries: + # break - processed_entry_ids.update([entry.id for entry in entries]) + # processed_entry_ids.update([entry.id for entry in entries]) - entries_to_update = [] - for entry in entries: - try: - file_object = file_objects_map.get((entry.user_id, entry.file_path)) - if file_object: - entry.file_object = file_object - entries_to_update.append(entry) - except Exception as e: - print(f"Error processing entry {entry.id}: {str(e)}") - continue + # entries_to_update = [] + # for entry in entries: + # try: + # file_object = file_objects_map.get((entry.user_id, entry.file_path)) + # if file_object: + # entry.file_object = file_object + # entries_to_update.append(entry) + # except Exception as e: + # print(f"Error processing entry {entry.id}: {str(e)}") + # continue - if entries_to_update: - Entry.objects.using(db_alias).bulk_update(entries_to_update, ["file_object"], batch_size=chunk_size) + # if entries_to_update: + # Entry.objects.using(db_alias).bulk_update(entries_to_update, ["file_object"], batch_size=chunk_size) - processed += len(entries) - print(f"Processed {processed} entries") + # processed += len(entries) + # print(f"Processed {processed} entries") def reverse_migration(apps, schema_editor): From 9fc825d7a627555e3b4054ec5c02e0da7b4f98ea Mon Sep 17 00:00:00 2001 From: sabaimran Date: Wed, 22 Jan 2025 19:51:28 -0800 Subject: [PATCH 4/6] Release Khoj version 1.35.1 --- manifest.json | 2 +- src/interface/desktop/package.json | 2 +- src/interface/emacs/khoj.el | 2 +- src/interface/obsidian/manifest.json | 2 +- src/interface/obsidian/package.json | 2 +- src/interface/obsidian/versions.json | 3 ++- src/interface/web/package.json | 2 +- versions.json | 3 ++- 8 files changed, 10 insertions(+), 8 deletions(-) diff --git a/manifest.json b/manifest.json index 4fb4234d..aa3524d1 100644 --- a/manifest.json +++ b/manifest.json @@ -1,7 +1,7 @@ { "id": "khoj", "name": "Khoj", - "version": "1.35.0", + "version": "1.35.1", "minAppVersion": "0.15.0", "description": "Your Second Brain", "author": "Khoj Inc.", diff --git a/src/interface/desktop/package.json b/src/interface/desktop/package.json index c9d661c7..6a5a9613 100644 --- a/src/interface/desktop/package.json +++ b/src/interface/desktop/package.json @@ -1,6 +1,6 @@ { "name": "Khoj", - "version": "1.35.0", + "version": "1.35.1", "description": "Your Second Brain", "author": "Khoj Inc. ", "license": "GPL-3.0-or-later", diff --git a/src/interface/emacs/khoj.el b/src/interface/emacs/khoj.el index e0879d5d..fd354295 100644 --- a/src/interface/emacs/khoj.el +++ b/src/interface/emacs/khoj.el @@ -6,7 +6,7 @@ ;; Saba Imran ;; Description: Your Second Brain ;; Keywords: search, chat, ai, org-mode, outlines, markdown, pdf, image -;; Version: 1.35.0 +;; Version: 1.35.1 ;; Package-Requires: ((emacs "27.1") (transient "0.3.0") (dash "2.19.1")) ;; URL: https://github.com/khoj-ai/khoj/tree/master/src/interface/emacs diff --git a/src/interface/obsidian/manifest.json b/src/interface/obsidian/manifest.json index 4fb4234d..aa3524d1 100644 --- a/src/interface/obsidian/manifest.json +++ b/src/interface/obsidian/manifest.json @@ -1,7 +1,7 @@ { "id": "khoj", "name": "Khoj", - "version": "1.35.0", + "version": "1.35.1", "minAppVersion": "0.15.0", "description": "Your Second Brain", "author": "Khoj Inc.", diff --git a/src/interface/obsidian/package.json b/src/interface/obsidian/package.json index 894ea030..d810a2f8 100644 --- a/src/interface/obsidian/package.json +++ b/src/interface/obsidian/package.json @@ -1,6 +1,6 @@ { "name": "Khoj", - "version": "1.35.0", + "version": "1.35.1", "description": "Your Second Brain", "author": "Debanjum Singh Solanky, Saba Imran ", "license": "GPL-3.0-or-later", diff --git a/src/interface/obsidian/versions.json b/src/interface/obsidian/versions.json index e32dd18f..cfbf0600 100644 --- a/src/interface/obsidian/versions.json +++ b/src/interface/obsidian/versions.json @@ -110,5 +110,6 @@ "1.33.1": "0.15.0", "1.33.2": "0.15.0", "1.34.0": "0.15.0", - "1.35.0": "0.15.0" + "1.35.0": "0.15.0", + "1.35.1": "0.15.0" } diff --git a/src/interface/web/package.json b/src/interface/web/package.json index abb64738..5a4a5b07 100644 --- a/src/interface/web/package.json +++ b/src/interface/web/package.json @@ -1,6 +1,6 @@ { "name": "khoj-ai", - "version": "1.35.0", + "version": "1.35.1", "private": true, "scripts": { "dev": "next dev", diff --git a/versions.json b/versions.json index e32dd18f..cfbf0600 100644 --- a/versions.json +++ b/versions.json @@ -110,5 +110,6 @@ "1.33.1": "0.15.0", "1.33.2": "0.15.0", "1.34.0": "0.15.0", - "1.35.0": "0.15.0" + "1.35.0": "0.15.0", + "1.35.1": "0.15.0" } From 938ef0a27ba8ad14d170268297a411a27c78ddb4 Mon Sep 17 00:00:00 2001 From: sabaimran Date: Wed, 22 Jan 2025 20:48:37 -0800 Subject: [PATCH 5/6] Update DB migration for better memory and speed efficiency --- .../commands/delete_orphaned_fileobjects.py | 46 ------------- .../migrations/0079_entry_file_object.py | 68 +++++++++---------- 2 files changed, 31 insertions(+), 83 deletions(-) diff --git a/src/khoj/database/management/commands/delete_orphaned_fileobjects.py b/src/khoj/database/management/commands/delete_orphaned_fileobjects.py index 09a8ac29..f7efa1dd 100644 --- a/src/khoj/database/management/commands/delete_orphaned_fileobjects.py +++ b/src/khoj/database/management/commands/delete_orphaned_fileobjects.py @@ -16,52 +16,6 @@ class Command(BaseCommand): ) def handle(self, *args, **options): - apply = options["apply"] - - mode = "UPDATE" if apply else "DRY RUN" - self.stdout.write(f"[{mode}] Processing entries with null file_objects...") - - # Create lookup dictionary of all file objects - file_objects_map = {(fo.user_id, fo.file_name): fo for fo in FileObject.objects.all()} - - chunk_size = 1000 - processed = 0 - processed_entry_ids = set() - - while True: - entries = list( - Entry.objects.select_related("user") - .filter(file_object__isnull=True) - .exclude(id__in=processed_entry_ids) - .only("id", "user", "file_path")[:chunk_size] - ) - - if not entries: - break - - processed_entry_ids.update([entry.id for entry in entries]) - entries_to_update = [] - - for entry in entries: - try: - file_object = file_objects_map.get((entry.user_id, entry.file_path)) - if file_object: - entry.file_object = file_object - entries_to_update.append(entry) - except Exception as e: - self.stdout.write(self.style.WARNING(f"Error processing entry {entry.id}: {str(e)}")) - continue - - if entries_to_update and apply: - with transaction.atomic(): - Entry.objects.bulk_update(entries_to_update, ["file_object"], batch_size=chunk_size) - - processed += len(entries) - self.stdout.write(f"Processed {processed} entries") - - action = "Updated" if apply else "Would update" - self.stdout.write(self.style.SUCCESS(f"{action} {len(processed_entry_ids)} entries")) - # Find FileObjects with no related entries using subquery orphaned_files = FileObject.objects.annotate( has_entries=Exists(Entry.objects.filter(file_object=OuterRef("pk"))) diff --git a/src/khoj/database/migrations/0079_entry_file_object.py b/src/khoj/database/migrations/0079_entry_file_object.py index d9a6166f..1b15b0a6 100644 --- a/src/khoj/database/migrations/0079_entry_file_object.py +++ b/src/khoj/database/migrations/0079_entry_file_object.py @@ -5,50 +5,44 @@ from django.db import migrations, models def migrate_entry_objects(apps, schema_editor): - pass - # Entry = apps.get_model("database", "Entry") - # FileObject = apps.get_model("database", "FileObject") - # db_alias = schema_editor.connection.alias + Entry = apps.get_model("database", "Entry") + FileObject = apps.get_model("database", "FileObject") + db_alias = schema_editor.connection.alias - # # Create lookup dictionary of all file objects - # file_objects_map = {(fo.user_id, fo.file_name): fo for fo in FileObject.objects.using(db_alias).all()} + # Process file objects in chunks + chunk_size = 1000 + processed = 0 + processed_file_ids = set() - # # Process entries in chunks of 1000 - # chunk_size = 1000 - # processed = 0 + while True: + file_objects = list( + FileObject.objects.using(db_alias) + .exclude(id__in=processed_file_ids) + .select_related("user") + .only("id", "user", "file_name")[:chunk_size] + ) - # processed_entry_ids = set() + if not file_objects: + break - # while True: - # entries = list( - # Entry.objects.using(db_alias) - # .select_related("user") - # .filter(file_object__isnull=True) - # .exclude(id__in=processed_entry_ids) - # .only("id", "user", "file_path")[:chunk_size] - # ) + processed_file_ids.update([fo.id for fo in file_objects]) - # if not entries: - # break + for file_object in file_objects: + try: + # Find all entries matching this file object + matching_entries = Entry.objects.using(db_alias).filter( + user_id=file_object.user_id, file_path=file_object.file_name, file_object__isnull=True + ) - # processed_entry_ids.update([entry.id for entry in entries]) + if matching_entries.exists(): + # Update all matching entries in bulk + matching_entries.update(file_object=file_object) + except Exception as e: + print(f"Error processing file object {file_object.id}: {str(e)}") + continue - # entries_to_update = [] - # for entry in entries: - # try: - # file_object = file_objects_map.get((entry.user_id, entry.file_path)) - # if file_object: - # entry.file_object = file_object - # entries_to_update.append(entry) - # except Exception as e: - # print(f"Error processing entry {entry.id}: {str(e)}") - # continue - - # if entries_to_update: - # Entry.objects.using(db_alias).bulk_update(entries_to_update, ["file_object"], batch_size=chunk_size) - - # processed += len(entries) - # print(f"Processed {processed} entries") + processed += len(file_objects) + print(f"Processed {processed} file objects") def reverse_migration(apps, schema_editor): From a3b5ec4737c1acf52738d803bc98d5d936e6abeb Mon Sep 17 00:00:00 2001 From: sabaimran Date: Wed, 22 Jan 2025 21:42:14 -0800 Subject: [PATCH 6/6] Release Khoj version 1.35.2 --- manifest.json | 2 +- src/interface/desktop/package.json | 2 +- src/interface/emacs/khoj.el | 2 +- src/interface/obsidian/manifest.json | 2 +- src/interface/obsidian/package.json | 2 +- src/interface/obsidian/versions.json | 3 ++- src/interface/web/package.json | 2 +- versions.json | 3 ++- 8 files changed, 10 insertions(+), 8 deletions(-) diff --git a/manifest.json b/manifest.json index aa3524d1..8cdf3f2f 100644 --- a/manifest.json +++ b/manifest.json @@ -1,7 +1,7 @@ { "id": "khoj", "name": "Khoj", - "version": "1.35.1", + "version": "1.35.2", "minAppVersion": "0.15.0", "description": "Your Second Brain", "author": "Khoj Inc.", diff --git a/src/interface/desktop/package.json b/src/interface/desktop/package.json index 6a5a9613..d43bd1a6 100644 --- a/src/interface/desktop/package.json +++ b/src/interface/desktop/package.json @@ -1,6 +1,6 @@ { "name": "Khoj", - "version": "1.35.1", + "version": "1.35.2", "description": "Your Second Brain", "author": "Khoj Inc. ", "license": "GPL-3.0-or-later", diff --git a/src/interface/emacs/khoj.el b/src/interface/emacs/khoj.el index fd354295..b77c70e9 100644 --- a/src/interface/emacs/khoj.el +++ b/src/interface/emacs/khoj.el @@ -6,7 +6,7 @@ ;; Saba Imran ;; Description: Your Second Brain ;; Keywords: search, chat, ai, org-mode, outlines, markdown, pdf, image -;; Version: 1.35.1 +;; Version: 1.35.2 ;; Package-Requires: ((emacs "27.1") (transient "0.3.0") (dash "2.19.1")) ;; URL: https://github.com/khoj-ai/khoj/tree/master/src/interface/emacs diff --git a/src/interface/obsidian/manifest.json b/src/interface/obsidian/manifest.json index aa3524d1..8cdf3f2f 100644 --- a/src/interface/obsidian/manifest.json +++ b/src/interface/obsidian/manifest.json @@ -1,7 +1,7 @@ { "id": "khoj", "name": "Khoj", - "version": "1.35.1", + "version": "1.35.2", "minAppVersion": "0.15.0", "description": "Your Second Brain", "author": "Khoj Inc.", diff --git a/src/interface/obsidian/package.json b/src/interface/obsidian/package.json index d810a2f8..8861f177 100644 --- a/src/interface/obsidian/package.json +++ b/src/interface/obsidian/package.json @@ -1,6 +1,6 @@ { "name": "Khoj", - "version": "1.35.1", + "version": "1.35.2", "description": "Your Second Brain", "author": "Debanjum Singh Solanky, Saba Imran ", "license": "GPL-3.0-or-later", diff --git a/src/interface/obsidian/versions.json b/src/interface/obsidian/versions.json index cfbf0600..21346aa4 100644 --- a/src/interface/obsidian/versions.json +++ b/src/interface/obsidian/versions.json @@ -111,5 +111,6 @@ "1.33.2": "0.15.0", "1.34.0": "0.15.0", "1.35.0": "0.15.0", - "1.35.1": "0.15.0" + "1.35.1": "0.15.0", + "1.35.2": "0.15.0" } diff --git a/src/interface/web/package.json b/src/interface/web/package.json index 5a4a5b07..05270d36 100644 --- a/src/interface/web/package.json +++ b/src/interface/web/package.json @@ -1,6 +1,6 @@ { "name": "khoj-ai", - "version": "1.35.1", + "version": "1.35.2", "private": true, "scripts": { "dev": "next dev", diff --git a/versions.json b/versions.json index cfbf0600..21346aa4 100644 --- a/versions.json +++ b/versions.json @@ -111,5 +111,6 @@ "1.33.2": "0.15.0", "1.34.0": "0.15.0", "1.35.0": "0.15.0", - "1.35.1": "0.15.0" + "1.35.1": "0.15.0", + "1.35.2": "0.15.0" }