Collect files to index in single dict to simplify index/update controller

Simplifies code while maintaining typing
This commit is contained in:
Debanjum Singh Solanky
2024-01-09 15:10:41 +05:30
parent efe41aaaca
commit 5f9ac5a630

View File

@@ -63,37 +63,23 @@ async def update(
), ),
): ):
user = request.user.object user = request.user.object
index_files: Dict[str, Dict[str, str]] = {"org": {}, "markdown": {}, "pdf": {}, "plaintext": {}}
try: try:
logger.info(f"📬 Updating content index via API call by {client} client") logger.info(f"📬 Updating content index via API call by {client} client")
org_files: Dict[str, str] = {}
markdown_files: Dict[str, str] = {}
pdf_files: Dict[str, bytes] = {}
plaintext_files: Dict[str, str] = {}
for file in files: for file in files:
file_type, encoding = get_file_type(file.content_type) file_type, encoding = get_file_type(file.content_type)
dict_to_update = None if file_type in index_files:
if file_type == "org": index_files[file_type][file.filename] = (
dict_to_update = org_files
elif file_type == "markdown":
dict_to_update = markdown_files
elif file_type == "pdf":
dict_to_update = pdf_files # type: ignore
elif file_type == "plaintext":
dict_to_update = plaintext_files
if dict_to_update is not None:
dict_to_update[file.filename] = (
file.file.read().decode("utf-8") if encoding == "utf-8" else file.file.read() # type: ignore file.file.read().decode("utf-8") if encoding == "utf-8" else file.file.read() # type: ignore
) )
else: else:
logger.warning(f"Skipped indexing unsupported file type sent by {client} client: {file.filename}") logger.warning(f"Skipped indexing unsupported file type sent by {client} client: {file.filename}")
indexer_input = IndexerInput( indexer_input = IndexerInput(
org=org_files, org=index_files["org"],
markdown=markdown_files, markdown=index_files["markdown"],
pdf=pdf_files, pdf=index_files["pdf"],
plaintext=plaintext_files, plaintext=index_files["plaintext"],
) )
if state.config == None: if state.config == None:
@@ -143,10 +129,10 @@ async def update(
return Response(content="Failed", status_code=500) return Response(content="Failed", status_code=500)
indexing_metadata = { indexing_metadata = {
"num_org": len(org_files), "num_org": len(index_files["org"]),
"num_markdown": len(markdown_files), "num_markdown": len(index_files["markdown"]),
"num_pdf": len(pdf_files), "num_pdf": len(index_files["pdf"]),
"num_plaintext": len(plaintext_files), "num_plaintext": len(index_files["plaintext"]),
} }
update_telemetry_state( update_telemetry_state(