From a52a06ad9d123d96056e5a200b4b4931774db687 Mon Sep 17 00:00:00 2001 From: Debanjum Date: Tue, 26 Aug 2025 17:59:33 -0700 Subject: [PATCH] Prefer olostep over firecrawl for webpage read by default Default to Olostep as faster and higher webpage read success rate. Fallback logic will use Firecrawl if Olostep fails. --- src/khoj/database/adapters/__init__.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/khoj/database/adapters/__init__.py b/src/khoj/database/adapters/__init__.py index e1711025..bf046e42 100644 --- a/src/khoj/database/adapters/__init__.py +++ b/src/khoj/database/adapters/__init__.py @@ -1398,16 +1398,6 @@ class ConversationAdapters: enabled_scrapers = [scraper async for scraper in WebScraper.objects.all().order_by("priority").aiterator()] if not enabled_scrapers: # Use scrapers enabled via environment variables - if os.getenv("FIRECRAWL_API_KEY"): - api_url = os.getenv("FIRECRAWL_API_URL", "https://api.firecrawl.dev") - enabled_scrapers.append( - WebScraper( - type=WebScraper.WebScraperType.FIRECRAWL, - name=WebScraper.WebScraperType.FIRECRAWL.capitalize(), - api_key=os.getenv("FIRECRAWL_API_KEY"), - api_url=api_url, - ) - ) if os.getenv("OLOSTEP_API_KEY"): api_url = os.getenv("OLOSTEP_API_URL", "https://agent.olostep.com/olostep-p2p-incomingAPI") enabled_scrapers.append( @@ -1418,6 +1408,16 @@ class ConversationAdapters: api_url=api_url, ) ) + if os.getenv("FIRECRAWL_API_KEY"): + api_url = os.getenv("FIRECRAWL_API_URL", "https://api.firecrawl.dev") + enabled_scrapers.append( + WebScraper( + type=WebScraper.WebScraperType.FIRECRAWL, + name=WebScraper.WebScraperType.FIRECRAWL.capitalize(), + api_key=os.getenv("FIRECRAWL_API_KEY"), + api_url=api_url, + ) + ) # Jina is the default fallback scrapers to use as it does not require an API key api_url = os.getenv("JINA_READER_API_URL", "https://r.jina.ai/") enabled_scrapers.append(