Make regex search tool results look more like grep results

This commit is contained in:
Debanjum
2025-08-18 16:52:59 -07:00
parent 9a8c707f84
commit 452c794e93
3 changed files with 18 additions and 13 deletions

View File

@@ -3023,6 +3023,7 @@ async def grep_files(
file_matches = await FileObjectAdapters.aget_file_objects_by_regex(user, db_pattern, path_prefix)
line_matches = []
line_matches_count = 0
for file_object in file_matches:
lines = file_object.raw_text.split("\n")
matched_line_numbers = []
@@ -3031,6 +3032,7 @@ async def grep_files(
for i, line in enumerate(lines, 1):
if regex.search(line):
matched_line_numbers.append(i)
line_matches_count += len(matched_line_numbers)
# Build context for each match
for line_num in matched_line_numbers:
@@ -3047,10 +3049,10 @@ async def grep_files(
if current_line_num == line_num:
# This is the matching line, mark it
context_lines.append(f"{file_object.file_name}:{current_line_num}:> {line_content}")
context_lines.append(f"{file_object.file_name}:{current_line_num}: {line_content}")
else:
# This is a context line
context_lines.append(f"{file_object.file_name}:{current_line_num}: {line_content}")
context_lines.append(f"{file_object.file_name}-{current_line_num}- {line_content}")
# Add separator between matches if showing context
if lines_before > 0 or lines_after > 0:
@@ -3065,7 +3067,7 @@ async def grep_files(
# Check if no results found
max_results = 1000
query = _generate_query(
len([m for m in line_matches if ":>" in m]),
line_matches_count,
len(file_matches),
path_prefix,
regex_pattern,

View File

@@ -613,9 +613,12 @@ tools_for_research_llm = {
Helpful to answer questions for which all relevant notes or documents are needed to complete the search. Example: "Notes that mention Tom".
You need to know all the correct keywords or regex patterns for this tool to be useful.
REMEMBER:
IMPORTANT:
- The regex pattern will ONLY match content on a single line. Multi-line matches are NOT supported (even if you use \\n).
TIPS:
- The output follows a grep-like format. Matches are prefixed with the file path and line number. Useful to combine with viewing file around specific line numbers.
An optional path prefix can restrict search to specific files/directories.
Use lines_before, lines_after to show context around matches.
"""

View File

@@ -46,8 +46,8 @@ async def test_grep_files_simple_match(default_user: KhojUser):
assert len(results) == 1
result = results[0]
assert "Found 2 matches for 'hello' in 1 documents" in result["query"]
assert "test.txt:1:> hello world" in result["compiled"]
assert "test.txt:3:> hello again" in result["compiled"]
assert "test.txt:1: hello world" in result["compiled"]
assert "test.txt:3: hello again" in result["compiled"]
@pytest.mark.django_db
@@ -110,7 +110,7 @@ async def test_grep_files_with_path_prefix(default_user: KhojUser):
result = results[0]
assert "Found 1 matches for 'hello' in 1 documents" in result["query"]
assert "in dir1/" in result["query"]
assert "dir1/test1.txt:1:> hello from dir1" in result["compiled"]
assert "dir1/test1.txt:1: hello from dir1" in result["compiled"]
assert "dir2/test2.txt" not in result["compiled"]
@@ -142,9 +142,9 @@ async def test_grep_files_with_context(default_user: KhojUser):
result = results[0]
assert "Found 1 matches for 'match' in 1 documents" in result["query"]
assert "Showing 1 lines before and 1 lines after" in result["query"]
assert "test.txt:2: line 2" in result["compiled"]
assert "test.txt:3:> line 3 (match)" in result["compiled"]
assert "test.txt:4: line 4" in result["compiled"]
assert "test.txt-2- line 2" in result["compiled"]
assert "test.txt:3: line 3 (match)" in result["compiled"]
assert "test.txt-4- line 4" in result["compiled"]
assert "line 1" not in result["compiled"]
assert "line 5" not in result["compiled"]
@@ -199,8 +199,8 @@ async def test_grep_files_multiple_files(default_user: KhojUser):
assert len(results) == 1
result = results[0]
assert "Found 2 matches for 'hello' in 2 documents" in result["query"]
assert "file1.txt:1:> hello from file1" in result["compiled"]
assert "file2.txt:1:> hello from file2" in result["compiled"]
assert "file1.txt:1: hello from file1" in result["compiled"]
assert "file2.txt:1: hello from file2" in result["compiled"]
@pytest.mark.parametrize(
@@ -272,4 +272,4 @@ async def test_grep_files_financial_entries_regex_patterns(
# All patterns should find the sailing entry
assert f"Found {expected_matches} matches" in result["query"]
assert 'ledger.txt:8:> 1984-06-24 * "Center for Boats" "Sailing" #bob' in result["compiled"]
assert 'ledger.txt:8: 1984-06-24 * "Center for Boats" "Sailing" #bob' in result["compiled"]