Add context uri field to deeplink line number in original doc

This commit is contained in:
Debanjum
2025-06-23 19:04:06 -07:00
parent 820b4523fd
commit e90ab5341a
6 changed files with 28 additions and 8 deletions

View File

@@ -23,6 +23,7 @@ logger = logging.getLogger(__name__)
class Context(PydanticBaseModel):
compiled: str
file: str
uri: str
query: Optional[str] = None

View File

@@ -99,6 +99,7 @@ class TextToEntries(ABC):
entry.raw = compiled_entry_chunk if raw_is_compiled else TextToEntries.clean_field(entry.raw)
entry.heading = TextToEntries.clean_field(entry.heading)
entry.file = TextToEntries.clean_field(entry.file)
entry_uri = TextToEntries.clean_field(entry_uri)
chunked_entries.append(
Entry(
@@ -107,6 +108,7 @@ class TextToEntries(ABC):
heading=entry.heading,
file=entry.file,
corpus_id=corpus_id,
uri=entry_uri,
)
)
@@ -192,6 +194,7 @@ class TextToEntries(ABC):
file_type=file_type,
hashed_value=entry_hash,
corpus_id=entry.corpus_id,
url=entry.uri,
search_model=model,
file_object=file_object,
)

View File

@@ -646,7 +646,7 @@ def generate_chatml_messages_with_context(
if not is_none_or_empty(chat.context):
references = "\n\n".join(
{f"# File: {item.file}\n## {item.compiled}\n" for item in chat.context or [] if isinstance(item, dict)}
{f"# URI: {item.uri}\n## {item.compiled}\n" for item in chat.context or [] if isinstance(item, dict)}
)
message_context += [{"type": "text", "text": f"{prompts.notes_conversation.format(references=references)}"}]

View File

@@ -1265,6 +1265,7 @@ async def search_documents(
"query": item.additional["query"],
"compiled": item["entry"],
"file": item.additional["file"],
"uri": item.additional["uri"],
}
for item in search_results
]
@@ -2867,6 +2868,7 @@ async def view_file_content(
{
"query": query,
"file": path,
"uri": path,
"compiled": filtered_text,
}
]
@@ -2878,7 +2880,7 @@ async def view_file_content(
logger.error(error_msg, exc_info=True)
# Return an error result in the expected format
yield [{"query": query, "file": path, "compiled": error_msg}]
yield [{"query": query, "file": path, "uri": path, "compiled": error_msg}]
async def grep_files(
@@ -2982,7 +2984,7 @@ async def grep_files(
max_results,
)
if not line_matches:
yield {"query": query, "file": path_prefix, "compiled": "No matches found."}
yield {"query": query, "file": path_prefix, "uri": path_prefix, "compiled": "No matches found."}
return
# Truncate matched lines list if too long
@@ -2991,7 +2993,7 @@ async def grep_files(
f"... {len(line_matches) - max_results} more results found. Use stricter regex or path to narrow down results."
]
yield {"query": query, "file": path_prefix or "", "compiled": "\n".join(line_matches)}
yield {"query": query, "file": path_prefix, "uri": path_prefix, "compiled": "\n".join(line_matches)}
except Exception as e:
error_msg = f"Error using grep files tool: {str(e)}"
@@ -3000,6 +3002,7 @@ async def grep_files(
{
"query": _generate_query(0, 0, path_prefix or "", regex_pattern, lines_before, lines_after),
"file": path_prefix,
"uri": path_prefix,
"compiled": error_msg,
}
]
@@ -3032,7 +3035,7 @@ async def list_files(
file_objects = await FileObjectAdapters.aget_file_objects_by_path_prefix(user, path)
if not file_objects:
yield {"query": _generate_query(0, path, pattern), "file": path, "compiled": "No files found."}
yield {"query": _generate_query(0, path, pattern), "file": path, "uri": path, "compiled": "No files found."}
return
# Extract file names from file objects
@@ -3047,7 +3050,7 @@ async def list_files(
query = _generate_query(len(files), path, pattern)
if not files:
yield {"query": query, "file": path, "compiled": "No files found."}
yield {"query": query, "file": path, "uri": path, "compiled": "No files found."}
return
# Truncate the list if it's too long
@@ -3057,9 +3060,9 @@ async def list_files(
f"... {len(files) - max_files} more files found. Use glob pattern to narrow down results."
]
yield {"query": query, "file": path, "compiled": "\n- ".join(files)}
yield {"query": query, "file": path, "uri": path, "compiled": "\n- ".join(files)}
except Exception as e:
error_msg = f"Error listing files in {path}: {str(e)}"
logger.error(error_msg, exc_info=True)
yield {"query": query, "file": path, "compiled": error_msg}
yield {"query": query, "file": path, "uri": path, "compiled": error_msg}

View File

@@ -157,6 +157,7 @@ def collate_results(hits, dedupe=True):
"additional": {
"source": hit.file_source,
"file": hit.file_path,
"uri": hit.url,
"compiled": hit.compiled,
"heading": hit.heading,
},
@@ -180,6 +181,7 @@ def deduplicated_search_responses(hits: List[SearchResponse]):
"additional": {
"source": hit.additional["source"],
"file": hit.additional["file"],
"uri": hit.additional["uri"],
"query": hit.additional["query"],
"compiled": hit.additional["compiled"],
"heading": hit.additional["heading"],

View File

@@ -176,6 +176,7 @@ class Entry:
compiled: str
heading: Optional[str]
file: Optional[str]
uri: Optional[str] = None
corpus_id: str
def __init__(
@@ -184,6 +185,7 @@ class Entry:
compiled: str = None,
heading: Optional[str] = None,
file: Optional[str] = None,
uri: Optional[str] = None,
corpus_id: uuid.UUID = None,
):
self.raw = raw
@@ -191,6 +193,14 @@ class Entry:
self.heading = heading
self.file = file
self.corpus_id = str(corpus_id)
if uri:
self.uri = uri
elif file and (file.startswith("http") or file.startswith("file://")):
self.uri = file
elif file:
self.uri = f"file://{file}"
else:
self.uri = None
def to_json(self) -> str:
return json.dumps(self.__dict__, ensure_ascii=False)
@@ -206,4 +216,5 @@ class Entry:
file=dictionary.get("file", None),
heading=dictionary.get("heading", None),
corpus_id=dictionary.get("corpus_id", None),
uri=dictionary.get("uri", None),
)