mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-10 13:26:13 +00:00
Collect files to index in single dict to simplify index/update controller
Simplifies code while maintaining typing
This commit is contained in:
@@ -63,37 +63,23 @@ async def update(
|
|||||||
),
|
),
|
||||||
):
|
):
|
||||||
user = request.user.object
|
user = request.user.object
|
||||||
|
index_files: Dict[str, Dict[str, str]] = {"org": {}, "markdown": {}, "pdf": {}, "plaintext": {}}
|
||||||
try:
|
try:
|
||||||
logger.info(f"📬 Updating content index via API call by {client} client")
|
logger.info(f"📬 Updating content index via API call by {client} client")
|
||||||
org_files: Dict[str, str] = {}
|
|
||||||
markdown_files: Dict[str, str] = {}
|
|
||||||
pdf_files: Dict[str, bytes] = {}
|
|
||||||
plaintext_files: Dict[str, str] = {}
|
|
||||||
|
|
||||||
for file in files:
|
for file in files:
|
||||||
file_type, encoding = get_file_type(file.content_type)
|
file_type, encoding = get_file_type(file.content_type)
|
||||||
dict_to_update = None
|
if file_type in index_files:
|
||||||
if file_type == "org":
|
index_files[file_type][file.filename] = (
|
||||||
dict_to_update = org_files
|
|
||||||
elif file_type == "markdown":
|
|
||||||
dict_to_update = markdown_files
|
|
||||||
elif file_type == "pdf":
|
|
||||||
dict_to_update = pdf_files # type: ignore
|
|
||||||
elif file_type == "plaintext":
|
|
||||||
dict_to_update = plaintext_files
|
|
||||||
|
|
||||||
if dict_to_update is not None:
|
|
||||||
dict_to_update[file.filename] = (
|
|
||||||
file.file.read().decode("utf-8") if encoding == "utf-8" else file.file.read() # type: ignore
|
file.file.read().decode("utf-8") if encoding == "utf-8" else file.file.read() # type: ignore
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
logger.warning(f"Skipped indexing unsupported file type sent by {client} client: {file.filename}")
|
logger.warning(f"Skipped indexing unsupported file type sent by {client} client: {file.filename}")
|
||||||
|
|
||||||
indexer_input = IndexerInput(
|
indexer_input = IndexerInput(
|
||||||
org=org_files,
|
org=index_files["org"],
|
||||||
markdown=markdown_files,
|
markdown=index_files["markdown"],
|
||||||
pdf=pdf_files,
|
pdf=index_files["pdf"],
|
||||||
plaintext=plaintext_files,
|
plaintext=index_files["plaintext"],
|
||||||
)
|
)
|
||||||
|
|
||||||
if state.config == None:
|
if state.config == None:
|
||||||
@@ -143,10 +129,10 @@ async def update(
|
|||||||
return Response(content="Failed", status_code=500)
|
return Response(content="Failed", status_code=500)
|
||||||
|
|
||||||
indexing_metadata = {
|
indexing_metadata = {
|
||||||
"num_org": len(org_files),
|
"num_org": len(index_files["org"]),
|
||||||
"num_markdown": len(markdown_files),
|
"num_markdown": len(index_files["markdown"]),
|
||||||
"num_pdf": len(pdf_files),
|
"num_pdf": len(index_files["pdf"]),
|
||||||
"num_plaintext": len(plaintext_files),
|
"num_plaintext": len(index_files["plaintext"]),
|
||||||
}
|
}
|
||||||
|
|
||||||
update_telemetry_state(
|
update_telemetry_state(
|
||||||
|
|||||||
Reference in New Issue
Block a user