mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 21:19:12 +00:00
Let research chat director decide which webpage to read, if any
Make webpages to read automatically on search_online configurable via a argument. Set it to default to 1, so other callers of the function are unaffected. But iterative chat director can still decide which, if any, webpages to read based on the online search it performs
This commit is contained in:
@@ -46,8 +46,7 @@ OLOSTEP_QUERY_PARAMS = {
|
||||
"expandHtml": "False",
|
||||
}
|
||||
|
||||
# TODO: Should this be 0 to let advanced model decide which web pages to read?
|
||||
MAX_WEBPAGES_TO_READ = 1
|
||||
DEFAULT_MAX_WEBPAGES_TO_READ = 1
|
||||
|
||||
|
||||
async def search_online(
|
||||
@@ -58,6 +57,7 @@ async def search_online(
|
||||
subscribed: bool = False,
|
||||
send_status_func: Optional[Callable] = None,
|
||||
custom_filters: List[str] = [],
|
||||
max_webpages_to_read: int = DEFAULT_MAX_WEBPAGES_TO_READ,
|
||||
uploaded_image_url: str = None,
|
||||
agent: Agent = None,
|
||||
):
|
||||
@@ -91,7 +91,7 @@ async def search_online(
|
||||
webpages = {
|
||||
(organic.get("link"), subquery, organic.get("content"))
|
||||
for subquery in response_dict
|
||||
for organic in response_dict[subquery].get("organic", [])[:MAX_WEBPAGES_TO_READ]
|
||||
for organic in response_dict[subquery].get("organic", [])[:max_webpages_to_read]
|
||||
if "answerBox" not in response_dict[subquery]
|
||||
}
|
||||
|
||||
|
||||
@@ -199,6 +199,7 @@ async def execute_information_collection(
|
||||
subscribed,
|
||||
send_status_func,
|
||||
[],
|
||||
max_webpages_to_read=0,
|
||||
uploaded_image_url=uploaded_image_url,
|
||||
agent=agent,
|
||||
):
|
||||
|
||||
Reference in New Issue
Block a user