mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 13:18:18 +00:00
Add context uri field to deeplink line number in original doc
This commit is contained in:
@@ -23,6 +23,7 @@ logger = logging.getLogger(__name__)
|
||||
class Context(PydanticBaseModel):
|
||||
compiled: str
|
||||
file: str
|
||||
uri: str
|
||||
query: Optional[str] = None
|
||||
|
||||
|
||||
|
||||
@@ -99,6 +99,7 @@ class TextToEntries(ABC):
|
||||
entry.raw = compiled_entry_chunk if raw_is_compiled else TextToEntries.clean_field(entry.raw)
|
||||
entry.heading = TextToEntries.clean_field(entry.heading)
|
||||
entry.file = TextToEntries.clean_field(entry.file)
|
||||
entry_uri = TextToEntries.clean_field(entry_uri)
|
||||
|
||||
chunked_entries.append(
|
||||
Entry(
|
||||
@@ -107,6 +108,7 @@ class TextToEntries(ABC):
|
||||
heading=entry.heading,
|
||||
file=entry.file,
|
||||
corpus_id=corpus_id,
|
||||
uri=entry_uri,
|
||||
)
|
||||
)
|
||||
|
||||
@@ -192,6 +194,7 @@ class TextToEntries(ABC):
|
||||
file_type=file_type,
|
||||
hashed_value=entry_hash,
|
||||
corpus_id=entry.corpus_id,
|
||||
url=entry.uri,
|
||||
search_model=model,
|
||||
file_object=file_object,
|
||||
)
|
||||
|
||||
@@ -646,7 +646,7 @@ def generate_chatml_messages_with_context(
|
||||
|
||||
if not is_none_or_empty(chat.context):
|
||||
references = "\n\n".join(
|
||||
{f"# File: {item.file}\n## {item.compiled}\n" for item in chat.context or [] if isinstance(item, dict)}
|
||||
{f"# URI: {item.uri}\n## {item.compiled}\n" for item in chat.context or [] if isinstance(item, dict)}
|
||||
)
|
||||
message_context += [{"type": "text", "text": f"{prompts.notes_conversation.format(references=references)}"}]
|
||||
|
||||
|
||||
@@ -1265,6 +1265,7 @@ async def search_documents(
|
||||
"query": item.additional["query"],
|
||||
"compiled": item["entry"],
|
||||
"file": item.additional["file"],
|
||||
"uri": item.additional["uri"],
|
||||
}
|
||||
for item in search_results
|
||||
]
|
||||
@@ -2867,6 +2868,7 @@ async def view_file_content(
|
||||
{
|
||||
"query": query,
|
||||
"file": path,
|
||||
"uri": path,
|
||||
"compiled": filtered_text,
|
||||
}
|
||||
]
|
||||
@@ -2878,7 +2880,7 @@ async def view_file_content(
|
||||
logger.error(error_msg, exc_info=True)
|
||||
|
||||
# Return an error result in the expected format
|
||||
yield [{"query": query, "file": path, "compiled": error_msg}]
|
||||
yield [{"query": query, "file": path, "uri": path, "compiled": error_msg}]
|
||||
|
||||
|
||||
async def grep_files(
|
||||
@@ -2982,7 +2984,7 @@ async def grep_files(
|
||||
max_results,
|
||||
)
|
||||
if not line_matches:
|
||||
yield {"query": query, "file": path_prefix, "compiled": "No matches found."}
|
||||
yield {"query": query, "file": path_prefix, "uri": path_prefix, "compiled": "No matches found."}
|
||||
return
|
||||
|
||||
# Truncate matched lines list if too long
|
||||
@@ -2991,7 +2993,7 @@ async def grep_files(
|
||||
f"... {len(line_matches) - max_results} more results found. Use stricter regex or path to narrow down results."
|
||||
]
|
||||
|
||||
yield {"query": query, "file": path_prefix or "", "compiled": "\n".join(line_matches)}
|
||||
yield {"query": query, "file": path_prefix, "uri": path_prefix, "compiled": "\n".join(line_matches)}
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error using grep files tool: {str(e)}"
|
||||
@@ -3000,6 +3002,7 @@ async def grep_files(
|
||||
{
|
||||
"query": _generate_query(0, 0, path_prefix or "", regex_pattern, lines_before, lines_after),
|
||||
"file": path_prefix,
|
||||
"uri": path_prefix,
|
||||
"compiled": error_msg,
|
||||
}
|
||||
]
|
||||
@@ -3032,7 +3035,7 @@ async def list_files(
|
||||
file_objects = await FileObjectAdapters.aget_file_objects_by_path_prefix(user, path)
|
||||
|
||||
if not file_objects:
|
||||
yield {"query": _generate_query(0, path, pattern), "file": path, "compiled": "No files found."}
|
||||
yield {"query": _generate_query(0, path, pattern), "file": path, "uri": path, "compiled": "No files found."}
|
||||
return
|
||||
|
||||
# Extract file names from file objects
|
||||
@@ -3047,7 +3050,7 @@ async def list_files(
|
||||
|
||||
query = _generate_query(len(files), path, pattern)
|
||||
if not files:
|
||||
yield {"query": query, "file": path, "compiled": "No files found."}
|
||||
yield {"query": query, "file": path, "uri": path, "compiled": "No files found."}
|
||||
return
|
||||
|
||||
# Truncate the list if it's too long
|
||||
@@ -3057,9 +3060,9 @@ async def list_files(
|
||||
f"... {len(files) - max_files} more files found. Use glob pattern to narrow down results."
|
||||
]
|
||||
|
||||
yield {"query": query, "file": path, "compiled": "\n- ".join(files)}
|
||||
yield {"query": query, "file": path, "uri": path, "compiled": "\n- ".join(files)}
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error listing files in {path}: {str(e)}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
yield {"query": query, "file": path, "compiled": error_msg}
|
||||
yield {"query": query, "file": path, "uri": path, "compiled": error_msg}
|
||||
|
||||
@@ -157,6 +157,7 @@ def collate_results(hits, dedupe=True):
|
||||
"additional": {
|
||||
"source": hit.file_source,
|
||||
"file": hit.file_path,
|
||||
"uri": hit.url,
|
||||
"compiled": hit.compiled,
|
||||
"heading": hit.heading,
|
||||
},
|
||||
@@ -180,6 +181,7 @@ def deduplicated_search_responses(hits: List[SearchResponse]):
|
||||
"additional": {
|
||||
"source": hit.additional["source"],
|
||||
"file": hit.additional["file"],
|
||||
"uri": hit.additional["uri"],
|
||||
"query": hit.additional["query"],
|
||||
"compiled": hit.additional["compiled"],
|
||||
"heading": hit.additional["heading"],
|
||||
|
||||
@@ -176,6 +176,7 @@ class Entry:
|
||||
compiled: str
|
||||
heading: Optional[str]
|
||||
file: Optional[str]
|
||||
uri: Optional[str] = None
|
||||
corpus_id: str
|
||||
|
||||
def __init__(
|
||||
@@ -184,6 +185,7 @@ class Entry:
|
||||
compiled: str = None,
|
||||
heading: Optional[str] = None,
|
||||
file: Optional[str] = None,
|
||||
uri: Optional[str] = None,
|
||||
corpus_id: uuid.UUID = None,
|
||||
):
|
||||
self.raw = raw
|
||||
@@ -191,6 +193,14 @@ class Entry:
|
||||
self.heading = heading
|
||||
self.file = file
|
||||
self.corpus_id = str(corpus_id)
|
||||
if uri:
|
||||
self.uri = uri
|
||||
elif file and (file.startswith("http") or file.startswith("file://")):
|
||||
self.uri = file
|
||||
elif file:
|
||||
self.uri = f"file://{file}"
|
||||
else:
|
||||
self.uri = None
|
||||
|
||||
def to_json(self) -> str:
|
||||
return json.dumps(self.__dict__, ensure_ascii=False)
|
||||
@@ -206,4 +216,5 @@ class Entry:
|
||||
file=dictionary.get("file", None),
|
||||
heading=dictionary.get("heading", None),
|
||||
corpus_id=dictionary.get("corpus_id", None),
|
||||
uri=dictionary.get("uri", None),
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user