mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 05:29:12 +00:00
Make regex search tool results look more like grep results
This commit is contained in:
@@ -3023,6 +3023,7 @@ async def grep_files(
|
||||
file_matches = await FileObjectAdapters.aget_file_objects_by_regex(user, db_pattern, path_prefix)
|
||||
|
||||
line_matches = []
|
||||
line_matches_count = 0
|
||||
for file_object in file_matches:
|
||||
lines = file_object.raw_text.split("\n")
|
||||
matched_line_numbers = []
|
||||
@@ -3031,6 +3032,7 @@ async def grep_files(
|
||||
for i, line in enumerate(lines, 1):
|
||||
if regex.search(line):
|
||||
matched_line_numbers.append(i)
|
||||
line_matches_count += len(matched_line_numbers)
|
||||
|
||||
# Build context for each match
|
||||
for line_num in matched_line_numbers:
|
||||
@@ -3047,10 +3049,10 @@ async def grep_files(
|
||||
|
||||
if current_line_num == line_num:
|
||||
# This is the matching line, mark it
|
||||
context_lines.append(f"{file_object.file_name}:{current_line_num}:> {line_content}")
|
||||
context_lines.append(f"{file_object.file_name}:{current_line_num}: {line_content}")
|
||||
else:
|
||||
# This is a context line
|
||||
context_lines.append(f"{file_object.file_name}:{current_line_num}: {line_content}")
|
||||
context_lines.append(f"{file_object.file_name}-{current_line_num}- {line_content}")
|
||||
|
||||
# Add separator between matches if showing context
|
||||
if lines_before > 0 or lines_after > 0:
|
||||
@@ -3065,7 +3067,7 @@ async def grep_files(
|
||||
# Check if no results found
|
||||
max_results = 1000
|
||||
query = _generate_query(
|
||||
len([m for m in line_matches if ":>" in m]),
|
||||
line_matches_count,
|
||||
len(file_matches),
|
||||
path_prefix,
|
||||
regex_pattern,
|
||||
|
||||
@@ -613,9 +613,12 @@ tools_for_research_llm = {
|
||||
Helpful to answer questions for which all relevant notes or documents are needed to complete the search. Example: "Notes that mention Tom".
|
||||
You need to know all the correct keywords or regex patterns for this tool to be useful.
|
||||
|
||||
REMEMBER:
|
||||
IMPORTANT:
|
||||
- The regex pattern will ONLY match content on a single line. Multi-line matches are NOT supported (even if you use \\n).
|
||||
|
||||
TIPS:
|
||||
- The output follows a grep-like format. Matches are prefixed with the file path and line number. Useful to combine with viewing file around specific line numbers.
|
||||
|
||||
An optional path prefix can restrict search to specific files/directories.
|
||||
Use lines_before, lines_after to show context around matches.
|
||||
"""
|
||||
|
||||
@@ -46,8 +46,8 @@ async def test_grep_files_simple_match(default_user: KhojUser):
|
||||
assert len(results) == 1
|
||||
result = results[0]
|
||||
assert "Found 2 matches for 'hello' in 1 documents" in result["query"]
|
||||
assert "test.txt:1:> hello world" in result["compiled"]
|
||||
assert "test.txt:3:> hello again" in result["compiled"]
|
||||
assert "test.txt:1: hello world" in result["compiled"]
|
||||
assert "test.txt:3: hello again" in result["compiled"]
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@@ -110,7 +110,7 @@ async def test_grep_files_with_path_prefix(default_user: KhojUser):
|
||||
result = results[0]
|
||||
assert "Found 1 matches for 'hello' in 1 documents" in result["query"]
|
||||
assert "in dir1/" in result["query"]
|
||||
assert "dir1/test1.txt:1:> hello from dir1" in result["compiled"]
|
||||
assert "dir1/test1.txt:1: hello from dir1" in result["compiled"]
|
||||
assert "dir2/test2.txt" not in result["compiled"]
|
||||
|
||||
|
||||
@@ -142,9 +142,9 @@ async def test_grep_files_with_context(default_user: KhojUser):
|
||||
result = results[0]
|
||||
assert "Found 1 matches for 'match' in 1 documents" in result["query"]
|
||||
assert "Showing 1 lines before and 1 lines after" in result["query"]
|
||||
assert "test.txt:2: line 2" in result["compiled"]
|
||||
assert "test.txt:3:> line 3 (match)" in result["compiled"]
|
||||
assert "test.txt:4: line 4" in result["compiled"]
|
||||
assert "test.txt-2- line 2" in result["compiled"]
|
||||
assert "test.txt:3: line 3 (match)" in result["compiled"]
|
||||
assert "test.txt-4- line 4" in result["compiled"]
|
||||
assert "line 1" not in result["compiled"]
|
||||
assert "line 5" not in result["compiled"]
|
||||
|
||||
@@ -199,8 +199,8 @@ async def test_grep_files_multiple_files(default_user: KhojUser):
|
||||
assert len(results) == 1
|
||||
result = results[0]
|
||||
assert "Found 2 matches for 'hello' in 2 documents" in result["query"]
|
||||
assert "file1.txt:1:> hello from file1" in result["compiled"]
|
||||
assert "file2.txt:1:> hello from file2" in result["compiled"]
|
||||
assert "file1.txt:1: hello from file1" in result["compiled"]
|
||||
assert "file2.txt:1: hello from file2" in result["compiled"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
@@ -272,4 +272,4 @@ async def test_grep_files_financial_entries_regex_patterns(
|
||||
|
||||
# All patterns should find the sailing entry
|
||||
assert f"Found {expected_matches} matches" in result["query"]
|
||||
assert 'ledger.txt:8:> 1984-06-24 * "Center for Boats" "Sailing" #bob' in result["compiled"]
|
||||
assert 'ledger.txt:8: 1984-06-24 * "Center for Boats" "Sailing" #bob' in result["compiled"]
|
||||
|
||||
Reference in New Issue
Block a user