mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 13:18:18 +00:00
Remove unsuppported NUL char from file, chat before save to DB
This commit is contained in:
@@ -72,6 +72,8 @@ from khoj.search_filter.word_filter import WordFilter
|
||||
from khoj.utils import state
|
||||
from khoj.utils.config import OfflineChatProcessorModel
|
||||
from khoj.utils.helpers import (
|
||||
clean_object_for_db,
|
||||
clean_text_for_db,
|
||||
generate_random_internal_agent_name,
|
||||
generate_random_name,
|
||||
in_debug_mode,
|
||||
@@ -1032,7 +1034,7 @@ class ConversationAdapters:
|
||||
user=user, client=client_application, id=conversation_id
|
||||
).afirst()
|
||||
if conversation:
|
||||
conversation.title = title
|
||||
conversation.title = clean_text_for_db(title)
|
||||
await conversation.asave()
|
||||
return conversation
|
||||
return None
|
||||
@@ -1432,14 +1434,15 @@ class ConversationAdapters:
|
||||
await Conversation.objects.filter(user=user, client=client_application).order_by("-updated_at").afirst()
|
||||
)
|
||||
|
||||
cleaned_conversation_log = clean_object_for_db(conversation_log)
|
||||
if conversation:
|
||||
conversation.conversation_log = conversation_log
|
||||
conversation.conversation_log = cleaned_conversation_log
|
||||
conversation.slug = slug
|
||||
conversation.updated_at = django_timezone.now()
|
||||
await conversation.asave()
|
||||
else:
|
||||
await Conversation.objects.acreate(
|
||||
user=user, conversation_log=conversation_log, client=client_application, slug=slug
|
||||
user=user, conversation_log=cleaned_conversation_log, client=client_application, slug=slug
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
@@ -1610,6 +1613,7 @@ class ConversationAdapters:
|
||||
conversation_log = conversation.conversation_log
|
||||
updated_log = [msg for msg in conversation_log["chat"] if msg.get("turnId") != turn_id]
|
||||
conversation.conversation_log["chat"] = updated_log
|
||||
conversation.conversation_log = clean_object_for_db(conversation.conversation_log)
|
||||
conversation.save()
|
||||
return True
|
||||
|
||||
@@ -1617,13 +1621,15 @@ class ConversationAdapters:
|
||||
class FileObjectAdapters:
|
||||
@staticmethod
|
||||
def update_raw_text(file_object: FileObject, new_raw_text: str):
|
||||
file_object.raw_text = new_raw_text
|
||||
cleaned_raw_text = clean_text_for_db(new_raw_text)
|
||||
file_object.raw_text = cleaned_raw_text
|
||||
file_object.save()
|
||||
|
||||
@staticmethod
|
||||
@require_valid_user
|
||||
def create_file_object(user: KhojUser, file_name: str, raw_text: str):
|
||||
return FileObject.objects.create(user=user, file_name=file_name, raw_text=raw_text)
|
||||
cleaned_raw_text = clean_text_for_db(raw_text)
|
||||
return FileObject.objects.create(user=user, file_name=file_name, raw_text=cleaned_raw_text)
|
||||
|
||||
@staticmethod
|
||||
@require_valid_user
|
||||
@@ -1647,13 +1653,15 @@ class FileObjectAdapters:
|
||||
|
||||
@staticmethod
|
||||
async def aupdate_raw_text(file_object: FileObject, new_raw_text: str):
|
||||
file_object.raw_text = new_raw_text
|
||||
cleaned_raw_text = clean_text_for_db(new_raw_text)
|
||||
file_object.raw_text = cleaned_raw_text
|
||||
await file_object.asave()
|
||||
|
||||
@staticmethod
|
||||
@arequire_valid_user
|
||||
async def acreate_file_object(user: KhojUser, file_name: str, raw_text: str):
|
||||
return await FileObject.objects.acreate(user=user, file_name=file_name, raw_text=raw_text)
|
||||
cleaned_raw_text = clean_text_for_db(raw_text)
|
||||
return await FileObject.objects.acreate(user=user, file_name=file_name, raw_text=cleaned_raw_text)
|
||||
|
||||
@staticmethod
|
||||
@arequire_valid_user
|
||||
|
||||
@@ -71,6 +71,7 @@ from khoj.routers.storage import upload_user_image_to_bucket
|
||||
from khoj.utils import state
|
||||
from khoj.utils.helpers import (
|
||||
ConversationCommand,
|
||||
clean_text_for_db,
|
||||
command_descriptions,
|
||||
convert_image_to_webp,
|
||||
get_country_code_from_timezone,
|
||||
@@ -631,7 +632,7 @@ async def generate_chat_title(
|
||||
raise HTTPException(status_code=404, detail="Conversation not found")
|
||||
|
||||
new_title = await acreate_title_from_history(request.user.object, conversation=conversation)
|
||||
conversation.slug = new_title[:200]
|
||||
conversation.slug = clean_text_for_db(new_title[:200])
|
||||
|
||||
await conversation.asave()
|
||||
|
||||
|
||||
@@ -833,3 +833,26 @@ def normalize_email(email: str, check_deliverability=False) -> tuple[str, bool]:
|
||||
return valid_email.normalized, True
|
||||
except (EmailNotValidError, EmailUndeliverableError):
|
||||
return lower_email, False
|
||||
|
||||
|
||||
def clean_text_for_db(text):
|
||||
"""Remove characters that PostgreSQL DB cannot store in text fields.
|
||||
|
||||
PostgreSQL text fields cannot contain NUL (0x00) characters.
|
||||
This is a database-level constraint.
|
||||
"""
|
||||
if not isinstance(text, str):
|
||||
return text
|
||||
return text.replace("\x00", "")
|
||||
|
||||
|
||||
def clean_object_for_db(data):
|
||||
"""Recursively clean PostgreSQL-incompatible characters from nested data structures."""
|
||||
if isinstance(data, str):
|
||||
return clean_text_for_db(data)
|
||||
elif isinstance(data, dict):
|
||||
return {k: clean_object_for_db(v) for k, v in data.items()}
|
||||
elif isinstance(data, list):
|
||||
return [clean_object_for_db(item) for item in data]
|
||||
else:
|
||||
return data
|
||||
|
||||
Reference in New Issue
Block a user