Allow directly reading web pages, even when SERP not enabled (#676)

### Overview
Khoj can now read website directly without needing to go through the search step first

### Details
- Parallelize simple webpage read and extractor
- Rename extract_content online results field to web pages
- Tweak prompts to extract information from webpages, online results
- Test select webpage as data source and extract web urls chat actors

- Render webpage read in chat response references on Web, Desktop apps
- Pass multiple webpages with their urls in online results context

- Support webpage command in chat API
- Add webpage chat command for read web pages requested by user
- Create chat actor for directly reading webpages based on user message
This commit is contained in:
Debanjum
2024-03-24 16:25:25 +05:30
committed by GitHub
11 changed files with 237 additions and 48 deletions

View File

@@ -87,7 +87,7 @@
function generateOnlineReference(reference, index) {
// Generate HTML for Chat Reference
let title = reference.title;
let title = reference.title || reference.link;
let link = reference.link;
let snippet = reference.snippet;
let question = reference.question;
@@ -191,6 +191,15 @@
referenceSection.appendChild(polishedReference);
}
}
if (onlineReference.webpages && onlineReference.webpages.length > 0) {
numOnlineReferences += onlineReference.webpages.length;
for (let index in onlineReference.webpages) {
let reference = onlineReference.webpages[index];
let polishedReference = generateOnlineReference(reference, index);
referenceSection.appendChild(polishedReference);
}
}
}
return numOnlineReferences;