mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-08 05:39:13 +00:00
Remove unsuppported NUL char from file, chat before save to DB
This commit is contained in:
@@ -72,6 +72,8 @@ from khoj.search_filter.word_filter import WordFilter
|
|||||||
from khoj.utils import state
|
from khoj.utils import state
|
||||||
from khoj.utils.config import OfflineChatProcessorModel
|
from khoj.utils.config import OfflineChatProcessorModel
|
||||||
from khoj.utils.helpers import (
|
from khoj.utils.helpers import (
|
||||||
|
clean_object_for_db,
|
||||||
|
clean_text_for_db,
|
||||||
generate_random_internal_agent_name,
|
generate_random_internal_agent_name,
|
||||||
generate_random_name,
|
generate_random_name,
|
||||||
in_debug_mode,
|
in_debug_mode,
|
||||||
@@ -1032,7 +1034,7 @@ class ConversationAdapters:
|
|||||||
user=user, client=client_application, id=conversation_id
|
user=user, client=client_application, id=conversation_id
|
||||||
).afirst()
|
).afirst()
|
||||||
if conversation:
|
if conversation:
|
||||||
conversation.title = title
|
conversation.title = clean_text_for_db(title)
|
||||||
await conversation.asave()
|
await conversation.asave()
|
||||||
return conversation
|
return conversation
|
||||||
return None
|
return None
|
||||||
@@ -1432,14 +1434,15 @@ class ConversationAdapters:
|
|||||||
await Conversation.objects.filter(user=user, client=client_application).order_by("-updated_at").afirst()
|
await Conversation.objects.filter(user=user, client=client_application).order_by("-updated_at").afirst()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cleaned_conversation_log = clean_object_for_db(conversation_log)
|
||||||
if conversation:
|
if conversation:
|
||||||
conversation.conversation_log = conversation_log
|
conversation.conversation_log = cleaned_conversation_log
|
||||||
conversation.slug = slug
|
conversation.slug = slug
|
||||||
conversation.updated_at = django_timezone.now()
|
conversation.updated_at = django_timezone.now()
|
||||||
await conversation.asave()
|
await conversation.asave()
|
||||||
else:
|
else:
|
||||||
await Conversation.objects.acreate(
|
await Conversation.objects.acreate(
|
||||||
user=user, conversation_log=conversation_log, client=client_application, slug=slug
|
user=user, conversation_log=cleaned_conversation_log, client=client_application, slug=slug
|
||||||
)
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -1610,6 +1613,7 @@ class ConversationAdapters:
|
|||||||
conversation_log = conversation.conversation_log
|
conversation_log = conversation.conversation_log
|
||||||
updated_log = [msg for msg in conversation_log["chat"] if msg.get("turnId") != turn_id]
|
updated_log = [msg for msg in conversation_log["chat"] if msg.get("turnId") != turn_id]
|
||||||
conversation.conversation_log["chat"] = updated_log
|
conversation.conversation_log["chat"] = updated_log
|
||||||
|
conversation.conversation_log = clean_object_for_db(conversation.conversation_log)
|
||||||
conversation.save()
|
conversation.save()
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@@ -1617,13 +1621,15 @@ class ConversationAdapters:
|
|||||||
class FileObjectAdapters:
|
class FileObjectAdapters:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def update_raw_text(file_object: FileObject, new_raw_text: str):
|
def update_raw_text(file_object: FileObject, new_raw_text: str):
|
||||||
file_object.raw_text = new_raw_text
|
cleaned_raw_text = clean_text_for_db(new_raw_text)
|
||||||
|
file_object.raw_text = cleaned_raw_text
|
||||||
file_object.save()
|
file_object.save()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@require_valid_user
|
@require_valid_user
|
||||||
def create_file_object(user: KhojUser, file_name: str, raw_text: str):
|
def create_file_object(user: KhojUser, file_name: str, raw_text: str):
|
||||||
return FileObject.objects.create(user=user, file_name=file_name, raw_text=raw_text)
|
cleaned_raw_text = clean_text_for_db(raw_text)
|
||||||
|
return FileObject.objects.create(user=user, file_name=file_name, raw_text=cleaned_raw_text)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@require_valid_user
|
@require_valid_user
|
||||||
@@ -1647,13 +1653,15 @@ class FileObjectAdapters:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
async def aupdate_raw_text(file_object: FileObject, new_raw_text: str):
|
async def aupdate_raw_text(file_object: FileObject, new_raw_text: str):
|
||||||
file_object.raw_text = new_raw_text
|
cleaned_raw_text = clean_text_for_db(new_raw_text)
|
||||||
|
file_object.raw_text = cleaned_raw_text
|
||||||
await file_object.asave()
|
await file_object.asave()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@arequire_valid_user
|
@arequire_valid_user
|
||||||
async def acreate_file_object(user: KhojUser, file_name: str, raw_text: str):
|
async def acreate_file_object(user: KhojUser, file_name: str, raw_text: str):
|
||||||
return await FileObject.objects.acreate(user=user, file_name=file_name, raw_text=raw_text)
|
cleaned_raw_text = clean_text_for_db(raw_text)
|
||||||
|
return await FileObject.objects.acreate(user=user, file_name=file_name, raw_text=cleaned_raw_text)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@arequire_valid_user
|
@arequire_valid_user
|
||||||
|
|||||||
@@ -71,6 +71,7 @@ from khoj.routers.storage import upload_user_image_to_bucket
|
|||||||
from khoj.utils import state
|
from khoj.utils import state
|
||||||
from khoj.utils.helpers import (
|
from khoj.utils.helpers import (
|
||||||
ConversationCommand,
|
ConversationCommand,
|
||||||
|
clean_text_for_db,
|
||||||
command_descriptions,
|
command_descriptions,
|
||||||
convert_image_to_webp,
|
convert_image_to_webp,
|
||||||
get_country_code_from_timezone,
|
get_country_code_from_timezone,
|
||||||
@@ -631,7 +632,7 @@ async def generate_chat_title(
|
|||||||
raise HTTPException(status_code=404, detail="Conversation not found")
|
raise HTTPException(status_code=404, detail="Conversation not found")
|
||||||
|
|
||||||
new_title = await acreate_title_from_history(request.user.object, conversation=conversation)
|
new_title = await acreate_title_from_history(request.user.object, conversation=conversation)
|
||||||
conversation.slug = new_title[:200]
|
conversation.slug = clean_text_for_db(new_title[:200])
|
||||||
|
|
||||||
await conversation.asave()
|
await conversation.asave()
|
||||||
|
|
||||||
|
|||||||
@@ -833,3 +833,26 @@ def normalize_email(email: str, check_deliverability=False) -> tuple[str, bool]:
|
|||||||
return valid_email.normalized, True
|
return valid_email.normalized, True
|
||||||
except (EmailNotValidError, EmailUndeliverableError):
|
except (EmailNotValidError, EmailUndeliverableError):
|
||||||
return lower_email, False
|
return lower_email, False
|
||||||
|
|
||||||
|
|
||||||
|
def clean_text_for_db(text):
|
||||||
|
"""Remove characters that PostgreSQL DB cannot store in text fields.
|
||||||
|
|
||||||
|
PostgreSQL text fields cannot contain NUL (0x00) characters.
|
||||||
|
This is a database-level constraint.
|
||||||
|
"""
|
||||||
|
if not isinstance(text, str):
|
||||||
|
return text
|
||||||
|
return text.replace("\x00", "")
|
||||||
|
|
||||||
|
|
||||||
|
def clean_object_for_db(data):
|
||||||
|
"""Recursively clean PostgreSQL-incompatible characters from nested data structures."""
|
||||||
|
if isinstance(data, str):
|
||||||
|
return clean_text_for_db(data)
|
||||||
|
elif isinstance(data, dict):
|
||||||
|
return {k: clean_object_for_db(v) for k, v in data.items()}
|
||||||
|
elif isinstance(data, list):
|
||||||
|
return [clean_object_for_db(item) for item in data]
|
||||||
|
else:
|
||||||
|
return data
|
||||||
|
|||||||
Reference in New Issue
Block a user