mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-08 05:39:13 +00:00
Add context uri field to deeplink line number in original doc
This commit is contained in:
@@ -23,6 +23,7 @@ logger = logging.getLogger(__name__)
|
|||||||
class Context(PydanticBaseModel):
|
class Context(PydanticBaseModel):
|
||||||
compiled: str
|
compiled: str
|
||||||
file: str
|
file: str
|
||||||
|
uri: str
|
||||||
query: Optional[str] = None
|
query: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -99,6 +99,7 @@ class TextToEntries(ABC):
|
|||||||
entry.raw = compiled_entry_chunk if raw_is_compiled else TextToEntries.clean_field(entry.raw)
|
entry.raw = compiled_entry_chunk if raw_is_compiled else TextToEntries.clean_field(entry.raw)
|
||||||
entry.heading = TextToEntries.clean_field(entry.heading)
|
entry.heading = TextToEntries.clean_field(entry.heading)
|
||||||
entry.file = TextToEntries.clean_field(entry.file)
|
entry.file = TextToEntries.clean_field(entry.file)
|
||||||
|
entry_uri = TextToEntries.clean_field(entry_uri)
|
||||||
|
|
||||||
chunked_entries.append(
|
chunked_entries.append(
|
||||||
Entry(
|
Entry(
|
||||||
@@ -107,6 +108,7 @@ class TextToEntries(ABC):
|
|||||||
heading=entry.heading,
|
heading=entry.heading,
|
||||||
file=entry.file,
|
file=entry.file,
|
||||||
corpus_id=corpus_id,
|
corpus_id=corpus_id,
|
||||||
|
uri=entry_uri,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -192,6 +194,7 @@ class TextToEntries(ABC):
|
|||||||
file_type=file_type,
|
file_type=file_type,
|
||||||
hashed_value=entry_hash,
|
hashed_value=entry_hash,
|
||||||
corpus_id=entry.corpus_id,
|
corpus_id=entry.corpus_id,
|
||||||
|
url=entry.uri,
|
||||||
search_model=model,
|
search_model=model,
|
||||||
file_object=file_object,
|
file_object=file_object,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -646,7 +646,7 @@ def generate_chatml_messages_with_context(
|
|||||||
|
|
||||||
if not is_none_or_empty(chat.context):
|
if not is_none_or_empty(chat.context):
|
||||||
references = "\n\n".join(
|
references = "\n\n".join(
|
||||||
{f"# File: {item.file}\n## {item.compiled}\n" for item in chat.context or [] if isinstance(item, dict)}
|
{f"# URI: {item.uri}\n## {item.compiled}\n" for item in chat.context or [] if isinstance(item, dict)}
|
||||||
)
|
)
|
||||||
message_context += [{"type": "text", "text": f"{prompts.notes_conversation.format(references=references)}"}]
|
message_context += [{"type": "text", "text": f"{prompts.notes_conversation.format(references=references)}"}]
|
||||||
|
|
||||||
|
|||||||
@@ -1265,6 +1265,7 @@ async def search_documents(
|
|||||||
"query": item.additional["query"],
|
"query": item.additional["query"],
|
||||||
"compiled": item["entry"],
|
"compiled": item["entry"],
|
||||||
"file": item.additional["file"],
|
"file": item.additional["file"],
|
||||||
|
"uri": item.additional["uri"],
|
||||||
}
|
}
|
||||||
for item in search_results
|
for item in search_results
|
||||||
]
|
]
|
||||||
@@ -2867,6 +2868,7 @@ async def view_file_content(
|
|||||||
{
|
{
|
||||||
"query": query,
|
"query": query,
|
||||||
"file": path,
|
"file": path,
|
||||||
|
"uri": path,
|
||||||
"compiled": filtered_text,
|
"compiled": filtered_text,
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
@@ -2878,7 +2880,7 @@ async def view_file_content(
|
|||||||
logger.error(error_msg, exc_info=True)
|
logger.error(error_msg, exc_info=True)
|
||||||
|
|
||||||
# Return an error result in the expected format
|
# Return an error result in the expected format
|
||||||
yield [{"query": query, "file": path, "compiled": error_msg}]
|
yield [{"query": query, "file": path, "uri": path, "compiled": error_msg}]
|
||||||
|
|
||||||
|
|
||||||
async def grep_files(
|
async def grep_files(
|
||||||
@@ -2982,7 +2984,7 @@ async def grep_files(
|
|||||||
max_results,
|
max_results,
|
||||||
)
|
)
|
||||||
if not line_matches:
|
if not line_matches:
|
||||||
yield {"query": query, "file": path_prefix, "compiled": "No matches found."}
|
yield {"query": query, "file": path_prefix, "uri": path_prefix, "compiled": "No matches found."}
|
||||||
return
|
return
|
||||||
|
|
||||||
# Truncate matched lines list if too long
|
# Truncate matched lines list if too long
|
||||||
@@ -2991,7 +2993,7 @@ async def grep_files(
|
|||||||
f"... {len(line_matches) - max_results} more results found. Use stricter regex or path to narrow down results."
|
f"... {len(line_matches) - max_results} more results found. Use stricter regex or path to narrow down results."
|
||||||
]
|
]
|
||||||
|
|
||||||
yield {"query": query, "file": path_prefix or "", "compiled": "\n".join(line_matches)}
|
yield {"query": query, "file": path_prefix, "uri": path_prefix, "compiled": "\n".join(line_matches)}
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error_msg = f"Error using grep files tool: {str(e)}"
|
error_msg = f"Error using grep files tool: {str(e)}"
|
||||||
@@ -3000,6 +3002,7 @@ async def grep_files(
|
|||||||
{
|
{
|
||||||
"query": _generate_query(0, 0, path_prefix or "", regex_pattern, lines_before, lines_after),
|
"query": _generate_query(0, 0, path_prefix or "", regex_pattern, lines_before, lines_after),
|
||||||
"file": path_prefix,
|
"file": path_prefix,
|
||||||
|
"uri": path_prefix,
|
||||||
"compiled": error_msg,
|
"compiled": error_msg,
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
@@ -3032,7 +3035,7 @@ async def list_files(
|
|||||||
file_objects = await FileObjectAdapters.aget_file_objects_by_path_prefix(user, path)
|
file_objects = await FileObjectAdapters.aget_file_objects_by_path_prefix(user, path)
|
||||||
|
|
||||||
if not file_objects:
|
if not file_objects:
|
||||||
yield {"query": _generate_query(0, path, pattern), "file": path, "compiled": "No files found."}
|
yield {"query": _generate_query(0, path, pattern), "file": path, "uri": path, "compiled": "No files found."}
|
||||||
return
|
return
|
||||||
|
|
||||||
# Extract file names from file objects
|
# Extract file names from file objects
|
||||||
@@ -3047,7 +3050,7 @@ async def list_files(
|
|||||||
|
|
||||||
query = _generate_query(len(files), path, pattern)
|
query = _generate_query(len(files), path, pattern)
|
||||||
if not files:
|
if not files:
|
||||||
yield {"query": query, "file": path, "compiled": "No files found."}
|
yield {"query": query, "file": path, "uri": path, "compiled": "No files found."}
|
||||||
return
|
return
|
||||||
|
|
||||||
# Truncate the list if it's too long
|
# Truncate the list if it's too long
|
||||||
@@ -3057,9 +3060,9 @@ async def list_files(
|
|||||||
f"... {len(files) - max_files} more files found. Use glob pattern to narrow down results."
|
f"... {len(files) - max_files} more files found. Use glob pattern to narrow down results."
|
||||||
]
|
]
|
||||||
|
|
||||||
yield {"query": query, "file": path, "compiled": "\n- ".join(files)}
|
yield {"query": query, "file": path, "uri": path, "compiled": "\n- ".join(files)}
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error_msg = f"Error listing files in {path}: {str(e)}"
|
error_msg = f"Error listing files in {path}: {str(e)}"
|
||||||
logger.error(error_msg, exc_info=True)
|
logger.error(error_msg, exc_info=True)
|
||||||
yield {"query": query, "file": path, "compiled": error_msg}
|
yield {"query": query, "file": path, "uri": path, "compiled": error_msg}
|
||||||
|
|||||||
@@ -157,6 +157,7 @@ def collate_results(hits, dedupe=True):
|
|||||||
"additional": {
|
"additional": {
|
||||||
"source": hit.file_source,
|
"source": hit.file_source,
|
||||||
"file": hit.file_path,
|
"file": hit.file_path,
|
||||||
|
"uri": hit.url,
|
||||||
"compiled": hit.compiled,
|
"compiled": hit.compiled,
|
||||||
"heading": hit.heading,
|
"heading": hit.heading,
|
||||||
},
|
},
|
||||||
@@ -180,6 +181,7 @@ def deduplicated_search_responses(hits: List[SearchResponse]):
|
|||||||
"additional": {
|
"additional": {
|
||||||
"source": hit.additional["source"],
|
"source": hit.additional["source"],
|
||||||
"file": hit.additional["file"],
|
"file": hit.additional["file"],
|
||||||
|
"uri": hit.additional["uri"],
|
||||||
"query": hit.additional["query"],
|
"query": hit.additional["query"],
|
||||||
"compiled": hit.additional["compiled"],
|
"compiled": hit.additional["compiled"],
|
||||||
"heading": hit.additional["heading"],
|
"heading": hit.additional["heading"],
|
||||||
|
|||||||
@@ -176,6 +176,7 @@ class Entry:
|
|||||||
compiled: str
|
compiled: str
|
||||||
heading: Optional[str]
|
heading: Optional[str]
|
||||||
file: Optional[str]
|
file: Optional[str]
|
||||||
|
uri: Optional[str] = None
|
||||||
corpus_id: str
|
corpus_id: str
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
@@ -184,6 +185,7 @@ class Entry:
|
|||||||
compiled: str = None,
|
compiled: str = None,
|
||||||
heading: Optional[str] = None,
|
heading: Optional[str] = None,
|
||||||
file: Optional[str] = None,
|
file: Optional[str] = None,
|
||||||
|
uri: Optional[str] = None,
|
||||||
corpus_id: uuid.UUID = None,
|
corpus_id: uuid.UUID = None,
|
||||||
):
|
):
|
||||||
self.raw = raw
|
self.raw = raw
|
||||||
@@ -191,6 +193,14 @@ class Entry:
|
|||||||
self.heading = heading
|
self.heading = heading
|
||||||
self.file = file
|
self.file = file
|
||||||
self.corpus_id = str(corpus_id)
|
self.corpus_id = str(corpus_id)
|
||||||
|
if uri:
|
||||||
|
self.uri = uri
|
||||||
|
elif file and (file.startswith("http") or file.startswith("file://")):
|
||||||
|
self.uri = file
|
||||||
|
elif file:
|
||||||
|
self.uri = f"file://{file}"
|
||||||
|
else:
|
||||||
|
self.uri = None
|
||||||
|
|
||||||
def to_json(self) -> str:
|
def to_json(self) -> str:
|
||||||
return json.dumps(self.__dict__, ensure_ascii=False)
|
return json.dumps(self.__dict__, ensure_ascii=False)
|
||||||
@@ -206,4 +216,5 @@ class Entry:
|
|||||||
file=dictionary.get("file", None),
|
file=dictionary.get("file", None),
|
||||||
heading=dictionary.get("heading", None),
|
heading=dictionary.get("heading", None),
|
||||||
corpus_id=dictionary.get("corpus_id", None),
|
corpus_id=dictionary.get("corpus_id", None),
|
||||||
|
uri=dictionary.get("uri", None),
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user