mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 21:19:12 +00:00
Prefer olostep over firecrawl for webpage read by default
Default to Olostep as faster and higher webpage read success rate. Fallback logic will use Firecrawl if Olostep fails.
This commit is contained in:
@@ -1398,16 +1398,6 @@ class ConversationAdapters:
|
||||
enabled_scrapers = [scraper async for scraper in WebScraper.objects.all().order_by("priority").aiterator()]
|
||||
if not enabled_scrapers:
|
||||
# Use scrapers enabled via environment variables
|
||||
if os.getenv("FIRECRAWL_API_KEY"):
|
||||
api_url = os.getenv("FIRECRAWL_API_URL", "https://api.firecrawl.dev")
|
||||
enabled_scrapers.append(
|
||||
WebScraper(
|
||||
type=WebScraper.WebScraperType.FIRECRAWL,
|
||||
name=WebScraper.WebScraperType.FIRECRAWL.capitalize(),
|
||||
api_key=os.getenv("FIRECRAWL_API_KEY"),
|
||||
api_url=api_url,
|
||||
)
|
||||
)
|
||||
if os.getenv("OLOSTEP_API_KEY"):
|
||||
api_url = os.getenv("OLOSTEP_API_URL", "https://agent.olostep.com/olostep-p2p-incomingAPI")
|
||||
enabled_scrapers.append(
|
||||
@@ -1418,6 +1408,16 @@ class ConversationAdapters:
|
||||
api_url=api_url,
|
||||
)
|
||||
)
|
||||
if os.getenv("FIRECRAWL_API_KEY"):
|
||||
api_url = os.getenv("FIRECRAWL_API_URL", "https://api.firecrawl.dev")
|
||||
enabled_scrapers.append(
|
||||
WebScraper(
|
||||
type=WebScraper.WebScraperType.FIRECRAWL,
|
||||
name=WebScraper.WebScraperType.FIRECRAWL.capitalize(),
|
||||
api_key=os.getenv("FIRECRAWL_API_KEY"),
|
||||
api_url=api_url,
|
||||
)
|
||||
)
|
||||
# Jina is the default fallback scrapers to use as it does not require an API key
|
||||
api_url = os.getenv("JINA_READER_API_URL", "https://r.jina.ai/")
|
||||
enabled_scrapers.append(
|
||||
|
||||
Reference in New Issue
Block a user