mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-07 21:29:13 +00:00
Temporarily move logic to associate entry and fileobject objects into the management command, out of automatic migrations
This commit is contained in:
@@ -1,4 +1,5 @@
|
|||||||
from django.core.management.base import BaseCommand
|
from django.core.management.base import BaseCommand
|
||||||
|
from django.db import transaction
|
||||||
from django.db.models import Exists, OuterRef
|
from django.db.models import Exists, OuterRef
|
||||||
|
|
||||||
from khoj.database.models import Entry, FileObject
|
from khoj.database.models import Entry, FileObject
|
||||||
@@ -15,6 +16,52 @@ class Command(BaseCommand):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
|
apply = options["apply"]
|
||||||
|
|
||||||
|
mode = "UPDATE" if apply else "DRY RUN"
|
||||||
|
self.stdout.write(f"[{mode}] Processing entries with null file_objects...")
|
||||||
|
|
||||||
|
# Create lookup dictionary of all file objects
|
||||||
|
file_objects_map = {(fo.user_id, fo.file_name): fo for fo in FileObject.objects.all()}
|
||||||
|
|
||||||
|
chunk_size = 1000
|
||||||
|
processed = 0
|
||||||
|
processed_entry_ids = set()
|
||||||
|
|
||||||
|
while True:
|
||||||
|
entries = list(
|
||||||
|
Entry.objects.select_related("user")
|
||||||
|
.filter(file_object__isnull=True)
|
||||||
|
.exclude(id__in=processed_entry_ids)
|
||||||
|
.only("id", "user", "file_path")[:chunk_size]
|
||||||
|
)
|
||||||
|
|
||||||
|
if not entries:
|
||||||
|
break
|
||||||
|
|
||||||
|
processed_entry_ids.update([entry.id for entry in entries])
|
||||||
|
entries_to_update = []
|
||||||
|
|
||||||
|
for entry in entries:
|
||||||
|
try:
|
||||||
|
file_object = file_objects_map.get((entry.user_id, entry.file_path))
|
||||||
|
if file_object:
|
||||||
|
entry.file_object = file_object
|
||||||
|
entries_to_update.append(entry)
|
||||||
|
except Exception as e:
|
||||||
|
self.stdout.write(self.style.WARNING(f"Error processing entry {entry.id}: {str(e)}"))
|
||||||
|
continue
|
||||||
|
|
||||||
|
if entries_to_update and apply:
|
||||||
|
with transaction.atomic():
|
||||||
|
Entry.objects.bulk_update(entries_to_update, ["file_object"], batch_size=chunk_size)
|
||||||
|
|
||||||
|
processed += len(entries)
|
||||||
|
self.stdout.write(f"Processed {processed} entries")
|
||||||
|
|
||||||
|
action = "Updated" if apply else "Would update"
|
||||||
|
self.stdout.write(self.style.SUCCESS(f"{action} {len(processed_entry_ids)} entries"))
|
||||||
|
|
||||||
# Find FileObjects with no related entries using subquery
|
# Find FileObjects with no related entries using subquery
|
||||||
orphaned_files = FileObject.objects.annotate(
|
orphaned_files = FileObject.objects.annotate(
|
||||||
has_entries=Exists(Entry.objects.filter(file_object=OuterRef("pk")))
|
has_entries=Exists(Entry.objects.filter(file_object=OuterRef("pk")))
|
||||||
|
|||||||
@@ -5,49 +5,50 @@ from django.db import migrations, models
|
|||||||
|
|
||||||
|
|
||||||
def migrate_entry_objects(apps, schema_editor):
|
def migrate_entry_objects(apps, schema_editor):
|
||||||
Entry = apps.get_model("database", "Entry")
|
pass
|
||||||
FileObject = apps.get_model("database", "FileObject")
|
# Entry = apps.get_model("database", "Entry")
|
||||||
db_alias = schema_editor.connection.alias
|
# FileObject = apps.get_model("database", "FileObject")
|
||||||
|
# db_alias = schema_editor.connection.alias
|
||||||
|
|
||||||
# Create lookup dictionary of all file objects
|
# # Create lookup dictionary of all file objects
|
||||||
file_objects_map = {(fo.user_id, fo.file_name): fo for fo in FileObject.objects.using(db_alias).all()}
|
# file_objects_map = {(fo.user_id, fo.file_name): fo for fo in FileObject.objects.using(db_alias).all()}
|
||||||
|
|
||||||
# Process entries in chunks of 1000
|
# # Process entries in chunks of 1000
|
||||||
chunk_size = 1000
|
# chunk_size = 1000
|
||||||
processed = 0
|
# processed = 0
|
||||||
|
|
||||||
processed_entry_ids = set()
|
# processed_entry_ids = set()
|
||||||
|
|
||||||
while True:
|
# while True:
|
||||||
entries = list(
|
# entries = list(
|
||||||
Entry.objects.using(db_alias)
|
# Entry.objects.using(db_alias)
|
||||||
.select_related("user")
|
# .select_related("user")
|
||||||
.filter(file_object__isnull=True)
|
# .filter(file_object__isnull=True)
|
||||||
.exclude(id__in=processed_entry_ids)
|
# .exclude(id__in=processed_entry_ids)
|
||||||
.only("id", "user", "file_path")[:chunk_size]
|
# .only("id", "user", "file_path")[:chunk_size]
|
||||||
)
|
# )
|
||||||
|
|
||||||
if not entries:
|
# if not entries:
|
||||||
break
|
# break
|
||||||
|
|
||||||
processed_entry_ids.update([entry.id for entry in entries])
|
# processed_entry_ids.update([entry.id for entry in entries])
|
||||||
|
|
||||||
entries_to_update = []
|
# entries_to_update = []
|
||||||
for entry in entries:
|
# for entry in entries:
|
||||||
try:
|
# try:
|
||||||
file_object = file_objects_map.get((entry.user_id, entry.file_path))
|
# file_object = file_objects_map.get((entry.user_id, entry.file_path))
|
||||||
if file_object:
|
# if file_object:
|
||||||
entry.file_object = file_object
|
# entry.file_object = file_object
|
||||||
entries_to_update.append(entry)
|
# entries_to_update.append(entry)
|
||||||
except Exception as e:
|
# except Exception as e:
|
||||||
print(f"Error processing entry {entry.id}: {str(e)}")
|
# print(f"Error processing entry {entry.id}: {str(e)}")
|
||||||
continue
|
# continue
|
||||||
|
|
||||||
if entries_to_update:
|
# if entries_to_update:
|
||||||
Entry.objects.using(db_alias).bulk_update(entries_to_update, ["file_object"], batch_size=chunk_size)
|
# Entry.objects.using(db_alias).bulk_update(entries_to_update, ["file_object"], batch_size=chunk_size)
|
||||||
|
|
||||||
processed += len(entries)
|
# processed += len(entries)
|
||||||
print(f"Processed {processed} entries")
|
# print(f"Processed {processed} entries")
|
||||||
|
|
||||||
|
|
||||||
def reverse_migration(apps, schema_editor):
|
def reverse_migration(apps, schema_editor):
|
||||||
|
|||||||
Reference in New Issue
Block a user