mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 13:18:18 +00:00
Make regex search tool results look more like grep results
This commit is contained in:
@@ -3023,6 +3023,7 @@ async def grep_files(
|
|||||||
file_matches = await FileObjectAdapters.aget_file_objects_by_regex(user, db_pattern, path_prefix)
|
file_matches = await FileObjectAdapters.aget_file_objects_by_regex(user, db_pattern, path_prefix)
|
||||||
|
|
||||||
line_matches = []
|
line_matches = []
|
||||||
|
line_matches_count = 0
|
||||||
for file_object in file_matches:
|
for file_object in file_matches:
|
||||||
lines = file_object.raw_text.split("\n")
|
lines = file_object.raw_text.split("\n")
|
||||||
matched_line_numbers = []
|
matched_line_numbers = []
|
||||||
@@ -3031,6 +3032,7 @@ async def grep_files(
|
|||||||
for i, line in enumerate(lines, 1):
|
for i, line in enumerate(lines, 1):
|
||||||
if regex.search(line):
|
if regex.search(line):
|
||||||
matched_line_numbers.append(i)
|
matched_line_numbers.append(i)
|
||||||
|
line_matches_count += len(matched_line_numbers)
|
||||||
|
|
||||||
# Build context for each match
|
# Build context for each match
|
||||||
for line_num in matched_line_numbers:
|
for line_num in matched_line_numbers:
|
||||||
@@ -3047,10 +3049,10 @@ async def grep_files(
|
|||||||
|
|
||||||
if current_line_num == line_num:
|
if current_line_num == line_num:
|
||||||
# This is the matching line, mark it
|
# This is the matching line, mark it
|
||||||
context_lines.append(f"{file_object.file_name}:{current_line_num}:> {line_content}")
|
context_lines.append(f"{file_object.file_name}:{current_line_num}: {line_content}")
|
||||||
else:
|
else:
|
||||||
# This is a context line
|
# This is a context line
|
||||||
context_lines.append(f"{file_object.file_name}:{current_line_num}: {line_content}")
|
context_lines.append(f"{file_object.file_name}-{current_line_num}- {line_content}")
|
||||||
|
|
||||||
# Add separator between matches if showing context
|
# Add separator between matches if showing context
|
||||||
if lines_before > 0 or lines_after > 0:
|
if lines_before > 0 or lines_after > 0:
|
||||||
@@ -3065,7 +3067,7 @@ async def grep_files(
|
|||||||
# Check if no results found
|
# Check if no results found
|
||||||
max_results = 1000
|
max_results = 1000
|
||||||
query = _generate_query(
|
query = _generate_query(
|
||||||
len([m for m in line_matches if ":>" in m]),
|
line_matches_count,
|
||||||
len(file_matches),
|
len(file_matches),
|
||||||
path_prefix,
|
path_prefix,
|
||||||
regex_pattern,
|
regex_pattern,
|
||||||
|
|||||||
@@ -613,9 +613,12 @@ tools_for_research_llm = {
|
|||||||
Helpful to answer questions for which all relevant notes or documents are needed to complete the search. Example: "Notes that mention Tom".
|
Helpful to answer questions for which all relevant notes or documents are needed to complete the search. Example: "Notes that mention Tom".
|
||||||
You need to know all the correct keywords or regex patterns for this tool to be useful.
|
You need to know all the correct keywords or regex patterns for this tool to be useful.
|
||||||
|
|
||||||
REMEMBER:
|
IMPORTANT:
|
||||||
- The regex pattern will ONLY match content on a single line. Multi-line matches are NOT supported (even if you use \\n).
|
- The regex pattern will ONLY match content on a single line. Multi-line matches are NOT supported (even if you use \\n).
|
||||||
|
|
||||||
|
TIPS:
|
||||||
|
- The output follows a grep-like format. Matches are prefixed with the file path and line number. Useful to combine with viewing file around specific line numbers.
|
||||||
|
|
||||||
An optional path prefix can restrict search to specific files/directories.
|
An optional path prefix can restrict search to specific files/directories.
|
||||||
Use lines_before, lines_after to show context around matches.
|
Use lines_before, lines_after to show context around matches.
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -46,8 +46,8 @@ async def test_grep_files_simple_match(default_user: KhojUser):
|
|||||||
assert len(results) == 1
|
assert len(results) == 1
|
||||||
result = results[0]
|
result = results[0]
|
||||||
assert "Found 2 matches for 'hello' in 1 documents" in result["query"]
|
assert "Found 2 matches for 'hello' in 1 documents" in result["query"]
|
||||||
assert "test.txt:1:> hello world" in result["compiled"]
|
assert "test.txt:1: hello world" in result["compiled"]
|
||||||
assert "test.txt:3:> hello again" in result["compiled"]
|
assert "test.txt:3: hello again" in result["compiled"]
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.django_db
|
@pytest.mark.django_db
|
||||||
@@ -110,7 +110,7 @@ async def test_grep_files_with_path_prefix(default_user: KhojUser):
|
|||||||
result = results[0]
|
result = results[0]
|
||||||
assert "Found 1 matches for 'hello' in 1 documents" in result["query"]
|
assert "Found 1 matches for 'hello' in 1 documents" in result["query"]
|
||||||
assert "in dir1/" in result["query"]
|
assert "in dir1/" in result["query"]
|
||||||
assert "dir1/test1.txt:1:> hello from dir1" in result["compiled"]
|
assert "dir1/test1.txt:1: hello from dir1" in result["compiled"]
|
||||||
assert "dir2/test2.txt" not in result["compiled"]
|
assert "dir2/test2.txt" not in result["compiled"]
|
||||||
|
|
||||||
|
|
||||||
@@ -142,9 +142,9 @@ async def test_grep_files_with_context(default_user: KhojUser):
|
|||||||
result = results[0]
|
result = results[0]
|
||||||
assert "Found 1 matches for 'match' in 1 documents" in result["query"]
|
assert "Found 1 matches for 'match' in 1 documents" in result["query"]
|
||||||
assert "Showing 1 lines before and 1 lines after" in result["query"]
|
assert "Showing 1 lines before and 1 lines after" in result["query"]
|
||||||
assert "test.txt:2: line 2" in result["compiled"]
|
assert "test.txt-2- line 2" in result["compiled"]
|
||||||
assert "test.txt:3:> line 3 (match)" in result["compiled"]
|
assert "test.txt:3: line 3 (match)" in result["compiled"]
|
||||||
assert "test.txt:4: line 4" in result["compiled"]
|
assert "test.txt-4- line 4" in result["compiled"]
|
||||||
assert "line 1" not in result["compiled"]
|
assert "line 1" not in result["compiled"]
|
||||||
assert "line 5" not in result["compiled"]
|
assert "line 5" not in result["compiled"]
|
||||||
|
|
||||||
@@ -199,8 +199,8 @@ async def test_grep_files_multiple_files(default_user: KhojUser):
|
|||||||
assert len(results) == 1
|
assert len(results) == 1
|
||||||
result = results[0]
|
result = results[0]
|
||||||
assert "Found 2 matches for 'hello' in 2 documents" in result["query"]
|
assert "Found 2 matches for 'hello' in 2 documents" in result["query"]
|
||||||
assert "file1.txt:1:> hello from file1" in result["compiled"]
|
assert "file1.txt:1: hello from file1" in result["compiled"]
|
||||||
assert "file2.txt:1:> hello from file2" in result["compiled"]
|
assert "file2.txt:1: hello from file2" in result["compiled"]
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
@@ -272,4 +272,4 @@ async def test_grep_files_financial_entries_regex_patterns(
|
|||||||
|
|
||||||
# All patterns should find the sailing entry
|
# All patterns should find the sailing entry
|
||||||
assert f"Found {expected_matches} matches" in result["query"]
|
assert f"Found {expected_matches} matches" in result["query"]
|
||||||
assert 'ledger.txt:8:> 1984-06-24 * "Center for Boats" "Sailing" #bob' in result["compiled"]
|
assert 'ledger.txt:8: 1984-06-24 * "Center for Boats" "Sailing" #bob' in result["compiled"]
|
||||||
|
|||||||
Reference in New Issue
Block a user