mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-09 21:29:11 +00:00
Initial commit: add a dedicated page for managing the knowledge base
- One current issue in the Khoj application is that managing the files being referenced as the user's knowledge base is slightly opaque and difficult to access - Add a migration for associating the fileobjects directly with the Entry objects, making it easier to get data via foreign key - Add the new page that shows all indexed files in the search view, also allowing you to upload new docs directly from that page - Support new APIs for getting / deleting files
This commit is contained in:
@@ -94,3 +94,33 @@ export function useDebounce<T>(value: T, delay: number): T {
|
|||||||
|
|
||||||
return debouncedValue;
|
return debouncedValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export const formatDateTime = (isoString: string): string => {
|
||||||
|
try {
|
||||||
|
const date = new Date(isoString);
|
||||||
|
const now = new Date();
|
||||||
|
const diffInMinutes = Math.floor((now.getTime() - date.getTime()) / 60000);
|
||||||
|
|
||||||
|
// Show relative time for recent dates
|
||||||
|
if (diffInMinutes < 1) return "just now";
|
||||||
|
if (diffInMinutes < 60) return `${diffInMinutes} minutes ago`;
|
||||||
|
if (diffInMinutes < 120) return "1 hour ago";
|
||||||
|
if (diffInMinutes < 1440) return `${Math.floor(diffInMinutes / 60)} hours ago`;
|
||||||
|
|
||||||
|
// For older dates, show full formatted date
|
||||||
|
const formatter = new Intl.DateTimeFormat("en-US", {
|
||||||
|
month: "long",
|
||||||
|
day: "numeric",
|
||||||
|
year: "numeric",
|
||||||
|
hour: "numeric",
|
||||||
|
minute: "2-digit",
|
||||||
|
hour12: true,
|
||||||
|
timeZoneName: "short",
|
||||||
|
});
|
||||||
|
|
||||||
|
return formatter.format(date);
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Error formatting date:", error);
|
||||||
|
return isoString;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ import {
|
|||||||
KhojSearchLogo,
|
KhojSearchLogo,
|
||||||
} from "../logo/khojLogo";
|
} from "../logo/khojLogo";
|
||||||
import { Gear } from "@phosphor-icons/react/dist/ssr";
|
import { Gear } from "@phosphor-icons/react/dist/ssr";
|
||||||
import { Plus } from "@phosphor-icons/react";
|
import { Book, Plus } from "@phosphor-icons/react";
|
||||||
import { useEffect, useState } from "react";
|
import { useEffect, useState } from "react";
|
||||||
import AllConversations from "../allConversations/allConversations";
|
import AllConversations from "../allConversations/allConversations";
|
||||||
import FooterMenu from "../navMenu/navMenu";
|
import FooterMenu from "../navMenu/navMenu";
|
||||||
@@ -26,6 +26,7 @@ import { useIsMobileWidth } from "@/app/common/utils";
|
|||||||
import { UserPlusIcon } from "lucide-react";
|
import { UserPlusIcon } from "lucide-react";
|
||||||
import { useAuthenticatedData } from "@/app/common/auth";
|
import { useAuthenticatedData } from "@/app/common/auth";
|
||||||
import LoginPrompt from "../loginPrompt/loginPrompt";
|
import LoginPrompt from "../loginPrompt/loginPrompt";
|
||||||
|
import { url } from "inspector";
|
||||||
|
|
||||||
// Menu items.
|
// Menu items.
|
||||||
const items = [
|
const items = [
|
||||||
@@ -54,6 +55,11 @@ const items = [
|
|||||||
url: "/settings",
|
url: "/settings",
|
||||||
icon: Gear,
|
icon: Gear,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
title: "Knowledge Base",
|
||||||
|
url: "/knowledge",
|
||||||
|
icon: Book,
|
||||||
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
const SIDEBAR_KEYBOARD_SHORTCUT = "b";
|
const SIDEBAR_KEYBOARD_SHORTCUT = "b";
|
||||||
|
|||||||
93
src/interface/web/app/knowledge/page.tsx
Normal file
93
src/interface/web/app/knowledge/page.tsx
Normal file
@@ -0,0 +1,93 @@
|
|||||||
|
"use client";
|
||||||
|
|
||||||
|
import { useState, useEffect } from "react";
|
||||||
|
import { SidebarInset, SidebarProvider, SidebarTrigger } from "@/components/ui/sidebar";
|
||||||
|
import { AppSidebar } from "../components/appSidebar/appSidebar";
|
||||||
|
import { Separator } from "@/components/ui/separator";
|
||||||
|
import { KhojLogoType } from "../components/logo/khojLogo";
|
||||||
|
import { Card, CardHeader, CardTitle, CardContent } from "@/components/ui/card";
|
||||||
|
import { useIsMobileWidth } from "../common/utils";
|
||||||
|
import { InlineLoading } from "../components/loading/loading";
|
||||||
|
|
||||||
|
interface FileObject {
|
||||||
|
file_name: string;
|
||||||
|
raw_text: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export default function KnowledgeBase() {
|
||||||
|
const [files, setFiles] = useState<FileObject[]>([]);
|
||||||
|
const [loading, setLoading] = useState(true);
|
||||||
|
const [error, setError] = useState<string | null>(null);
|
||||||
|
const isMobileWidth = useIsMobileWidth();
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
const fetchFiles = async () => {
|
||||||
|
try {
|
||||||
|
const response = await fetch("/api/content/all");
|
||||||
|
if (!response.ok) throw new Error("Failed to fetch files");
|
||||||
|
|
||||||
|
const filesList = await response.json();
|
||||||
|
if (Array.isArray(filesList)) {
|
||||||
|
setFiles(filesList.toSorted());
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
setError("Failed to load files");
|
||||||
|
console.error("Error fetching files:", error);
|
||||||
|
} finally {
|
||||||
|
setLoading(false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
fetchFiles();
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
return (
|
||||||
|
<SidebarProvider>
|
||||||
|
<AppSidebar conversationId={""} />
|
||||||
|
<SidebarInset>
|
||||||
|
<header className="flex h-16 shrink-0 items-center gap-2 border-b px-4">
|
||||||
|
<SidebarTrigger className="-ml-1" />
|
||||||
|
<Separator orientation="vertical" className="mr-2 h-4" />
|
||||||
|
{isMobileWidth ? (
|
||||||
|
<a className="p-0 no-underline" href="/">
|
||||||
|
<KhojLogoType className="h-auto w-16" />
|
||||||
|
</a>
|
||||||
|
) : (
|
||||||
|
<h2 className="text-lg">Knowledge Base</h2>
|
||||||
|
)}
|
||||||
|
</header>
|
||||||
|
<main>
|
||||||
|
<div className="md:w-3/4 sm:w-full mx-auto pt-6 md:pt-8">
|
||||||
|
{loading && (
|
||||||
|
<div className="mt-4 flex items-center justify-center">
|
||||||
|
<InlineLoading
|
||||||
|
className="mt-4"
|
||||||
|
message={"Loading"}
|
||||||
|
iconClassName="h-5 w-5"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{error && <div className="text-red-500">{error}</div>}
|
||||||
|
|
||||||
|
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">
|
||||||
|
{files.map((file, index) => (
|
||||||
|
<Card key={index}>
|
||||||
|
<CardHeader>
|
||||||
|
<CardTitle className="text-sm font-medium">
|
||||||
|
{file.file_name.split("/").pop()}
|
||||||
|
</CardTitle>
|
||||||
|
</CardHeader>
|
||||||
|
<CardContent>
|
||||||
|
<p className="text-sm text-muted-foreground">
|
||||||
|
{file.raw_text.slice(0, 100)}...
|
||||||
|
</p>
|
||||||
|
</CardContent>
|
||||||
|
</Card>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</main>
|
||||||
|
</SidebarInset>
|
||||||
|
</SidebarProvider>
|
||||||
|
);
|
||||||
|
}
|
||||||
@@ -2,6 +2,7 @@ import type { Metadata } from "next";
|
|||||||
|
|
||||||
import "../globals.css";
|
import "../globals.css";
|
||||||
import { ContentSecurityPolicy } from "../common/layoutHelper";
|
import { ContentSecurityPolicy } from "../common/layoutHelper";
|
||||||
|
import { Toaster } from "@/components/ui/toaster";
|
||||||
|
|
||||||
export const metadata: Metadata = {
|
export const metadata: Metadata = {
|
||||||
title: "Khoj AI - Search",
|
title: "Khoj AI - Search",
|
||||||
@@ -35,7 +36,10 @@ export default function RootLayout({
|
|||||||
return (
|
return (
|
||||||
<html>
|
<html>
|
||||||
<ContentSecurityPolicy />
|
<ContentSecurityPolicy />
|
||||||
<body>{children}</body>
|
<body>
|
||||||
|
{children}
|
||||||
|
<Toaster />
|
||||||
|
</body>
|
||||||
</html>
|
</html>
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -24,16 +24,52 @@ import {
|
|||||||
MagnifyingGlass,
|
MagnifyingGlass,
|
||||||
NoteBlank,
|
NoteBlank,
|
||||||
NotionLogo,
|
NotionLogo,
|
||||||
|
Eye,
|
||||||
|
Trash,
|
||||||
|
ArrowsOutSimple,
|
||||||
|
DotsThreeVertical,
|
||||||
|
Waveform,
|
||||||
|
Plus,
|
||||||
} from "@phosphor-icons/react";
|
} from "@phosphor-icons/react";
|
||||||
import { Button } from "@/components/ui/button";
|
import { Button } from "@/components/ui/button";
|
||||||
import Link from "next/link";
|
import Link from "next/link";
|
||||||
import { getIconFromFilename } from "../common/iconUtils";
|
import { getIconFromFilename } from "../common/iconUtils";
|
||||||
import { useIsMobileWidth } from "../common/utils";
|
import { formatDateTime, useIsMobileWidth } from "../common/utils";
|
||||||
import { SidebarInset, SidebarProvider, SidebarTrigger } from "@/components/ui/sidebar";
|
import { SidebarInset, SidebarProvider, SidebarTrigger } from "@/components/ui/sidebar";
|
||||||
import { AppSidebar } from "../components/appSidebar/appSidebar";
|
import { AppSidebar } from "../components/appSidebar/appSidebar";
|
||||||
import { Separator } from "@/components/ui/separator";
|
import { Separator } from "@/components/ui/separator";
|
||||||
import { KhojLogoType } from "../components/logo/khojLogo";
|
import { KhojLogoType } from "../components/logo/khojLogo";
|
||||||
|
import { InlineLoading } from "../components/loading/loading";
|
||||||
|
import {
|
||||||
|
AlertDialog,
|
||||||
|
AlertDialogContent,
|
||||||
|
AlertDialogDescription,
|
||||||
|
AlertDialogFooter,
|
||||||
|
AlertDialogHeader,
|
||||||
|
AlertDialogTitle,
|
||||||
|
AlertDialogCancel,
|
||||||
|
AlertDialogAction,
|
||||||
|
AlertDialogTrigger,
|
||||||
|
} from "@/components/ui/alert-dialog";
|
||||||
|
import {
|
||||||
|
Dialog,
|
||||||
|
DialogContent,
|
||||||
|
DialogHeader,
|
||||||
|
DialogTitle,
|
||||||
|
DialogTrigger,
|
||||||
|
} from "@/components/ui/dialog";
|
||||||
|
import { useToast } from "@/components/ui/use-toast";
|
||||||
|
import { Scroll } from "lucide-react";
|
||||||
|
import {
|
||||||
|
DropdownMenu,
|
||||||
|
DropdownMenuContent,
|
||||||
|
DropdownMenuItem,
|
||||||
|
DropdownMenuLabel,
|
||||||
|
DropdownMenuTrigger,
|
||||||
|
} from "@/components/ui/dropdown-menu";
|
||||||
|
import { uploadDataForIndexing } from "../common/chatFunctions";
|
||||||
|
import { CommandDialog } from "@/components/ui/command";
|
||||||
|
import { Progress } from "@/components/ui/progress";
|
||||||
interface AdditionalData {
|
interface AdditionalData {
|
||||||
file: string;
|
file: string;
|
||||||
source: string;
|
source: string;
|
||||||
@@ -49,6 +85,12 @@ interface SearchResult {
|
|||||||
"corpus-id": string;
|
"corpus-id": string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface FileObject {
|
||||||
|
file_name: string;
|
||||||
|
raw_text: string;
|
||||||
|
updated_at: string;
|
||||||
|
}
|
||||||
|
|
||||||
function getNoteTypeIcon(source: string) {
|
function getNoteTypeIcon(source: string) {
|
||||||
if (source === "notion") {
|
if (source === "notion") {
|
||||||
return <NotionLogo className="text-muted-foreground" />;
|
return <NotionLogo className="text-muted-foreground" />;
|
||||||
@@ -92,7 +134,7 @@ function Note(props: NoteResultProps) {
|
|||||||
const fileIcon = getIconFromFilename(fileName || ".txt", "h-4 w-4 inline mr-2");
|
const fileIcon = getIconFromFilename(fileName || ".txt", "h-4 w-4 inline mr-2");
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<Card className="bg-secondary h-full shadow-sm rounded-lg border border-muted mb-4">
|
<Card className="bg-secondary h-full shadow-sm rounded-lg border border-muted mb-4 animate-fade-in-up">
|
||||||
<CardHeader>
|
<CardHeader>
|
||||||
<CardTitle className="inline-flex gap-2">
|
<CardTitle className="inline-flex gap-2">
|
||||||
{getNoteTypeIcon(note.additional.source)}
|
{getNoteTypeIcon(note.additional.source)}
|
||||||
@@ -139,7 +181,7 @@ function focusNote(note: SearchResult) {
|
|||||||
const fileIcon = getIconFromFilename(fileName || ".txt", "h-4 w-4 inline mr-2");
|
const fileIcon = getIconFromFilename(fileName || ".txt", "h-4 w-4 inline mr-2");
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<Card className="bg-secondary h-full shadow-sm rounded-lg bg-gradient-to-b from-background to-slate-50 dark:to-gray-950 border border-muted mb-4">
|
<Card className="bg-secondary h-full shadow-sm rounded-lg border border-muted mb-4">
|
||||||
<CardHeader>
|
<CardHeader>
|
||||||
<CardTitle>{fileName}</CardTitle>
|
<CardTitle>{fileName}</CardTitle>
|
||||||
</CardHeader>
|
</CardHeader>
|
||||||
@@ -167,27 +209,147 @@ function focusNote(note: SearchResult) {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const UploadFiles: React.FC<{
|
||||||
|
onClose: () => void;
|
||||||
|
setUploadedFiles: (files: string[]) => void;
|
||||||
|
}> = ({ onClose, setUploadedFiles }) => {
|
||||||
|
const [syncedFiles, setSyncedFiles] = useState<string[]>([]);
|
||||||
|
const [selectedFiles, setSelectedFiles] = useState<string[]>([]);
|
||||||
|
const [searchQuery, setSearchQuery] = useState("");
|
||||||
|
const [isDragAndDropping, setIsDragAndDropping] = useState(false);
|
||||||
|
|
||||||
|
const [warning, setWarning] = useState<string | null>(null);
|
||||||
|
const [error, setError] = useState<string | null>(null);
|
||||||
|
const [uploading, setUploading] = useState(false);
|
||||||
|
const [progressValue, setProgressValue] = useState(0);
|
||||||
|
const fileInputRef = useRef<HTMLInputElement>(null);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (!uploading) {
|
||||||
|
setProgressValue(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (uploading) {
|
||||||
|
const interval = setInterval(() => {
|
||||||
|
setProgressValue((prev) => {
|
||||||
|
const increment = Math.floor(Math.random() * 5) + 1; // Generates a random number between 1 and 5
|
||||||
|
const nextValue = prev + increment;
|
||||||
|
return nextValue < 100 ? nextValue : 100; // Ensures progress does not exceed 100
|
||||||
|
});
|
||||||
|
}, 800);
|
||||||
|
return () => clearInterval(interval);
|
||||||
|
}
|
||||||
|
}, [uploading]);
|
||||||
|
|
||||||
|
const filteredFiles = syncedFiles.filter((file) =>
|
||||||
|
file.toLowerCase().includes(searchQuery.toLowerCase()),
|
||||||
|
);
|
||||||
|
|
||||||
|
function handleDragOver(event: React.DragEvent<HTMLDivElement>) {
|
||||||
|
event.preventDefault();
|
||||||
|
setIsDragAndDropping(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
function handleDragLeave(event: React.DragEvent<HTMLDivElement>) {
|
||||||
|
event.preventDefault();
|
||||||
|
setIsDragAndDropping(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
function handleDragAndDropFiles(event: React.DragEvent<HTMLDivElement>) {
|
||||||
|
event.preventDefault();
|
||||||
|
setIsDragAndDropping(false);
|
||||||
|
|
||||||
|
if (!event.dataTransfer.files) return;
|
||||||
|
|
||||||
|
uploadFiles(event.dataTransfer.files);
|
||||||
|
}
|
||||||
|
|
||||||
|
function openFileInput() {
|
||||||
|
if (fileInputRef && fileInputRef.current) {
|
||||||
|
fileInputRef.current.click();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function handleFileChange(event: React.ChangeEvent<HTMLInputElement>) {
|
||||||
|
if (!event.target.files) return;
|
||||||
|
|
||||||
|
uploadFiles(event.target.files);
|
||||||
|
}
|
||||||
|
|
||||||
|
function uploadFiles(files: FileList) {
|
||||||
|
uploadDataForIndexing(files, setWarning, setUploading, setError, setUploadedFiles);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div
|
||||||
|
className={`flex flex-col h-full`}
|
||||||
|
onDragOver={handleDragOver}
|
||||||
|
onDragLeave={handleDragLeave}
|
||||||
|
onDrop={handleDragAndDropFiles}
|
||||||
|
onClick={openFileInput}
|
||||||
|
>
|
||||||
|
<input
|
||||||
|
type="file"
|
||||||
|
multiple
|
||||||
|
ref={fileInputRef}
|
||||||
|
style={{ display: "none" }}
|
||||||
|
onChange={handleFileChange}
|
||||||
|
/>
|
||||||
|
<div className="flex-none p-4">
|
||||||
|
{uploading && (
|
||||||
|
<Progress
|
||||||
|
indicatorColor="bg-slate-500"
|
||||||
|
className="w-full h-2 rounded-full"
|
||||||
|
value={progressValue}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
<div
|
||||||
|
className={`flex-none p-4 bg-secondary border-b ${isDragAndDropping ? "animate-pulse" : ""} rounded-lg`}
|
||||||
|
>
|
||||||
|
<div className="flex items-center justify-center w-full h-32 border-2 border-dashed border-gray-300 rounded-lg">
|
||||||
|
{isDragAndDropping ? (
|
||||||
|
<div className="flex items-center justify-center w-full h-full">
|
||||||
|
<Waveform className="h-6 w-6 mr-2" />
|
||||||
|
<span>Drop files to upload</span>
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
<div className="flex items-center justify-center w-full h-full">
|
||||||
|
<Plus className="h-6 w-6 mr-2" />
|
||||||
|
<span>Drag and drop files here</span>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
export default function Search() {
|
export default function Search() {
|
||||||
const [searchQuery, setSearchQuery] = useState("");
|
const [searchQuery, setSearchQuery] = useState("");
|
||||||
const [searchResults, setSearchResults] = useState<SearchResult[] | null>(null);
|
const [searchResults, setSearchResults] = useState<SearchResult[] | null>(null);
|
||||||
const [searchResultsLoading, setSearchResultsLoading] = useState(false);
|
const [searchResultsLoading, setSearchResultsLoading] = useState(false);
|
||||||
const [focusSearchResult, setFocusSearchResult] = useState<SearchResult | null>(null);
|
const [focusSearchResult, setFocusSearchResult] = useState<SearchResult | null>(null);
|
||||||
const [exampleQuery, setExampleQuery] = useState("");
|
const [files, setFiles] = useState<FileObject[]>([]);
|
||||||
|
const [error, setError] = useState<string | null>(null);
|
||||||
|
const [fileObjectsLoading, setFileObjectsLoading] = useState(true);
|
||||||
const searchTimeoutRef = useRef<NodeJS.Timeout | null>(null);
|
const searchTimeoutRef = useRef<NodeJS.Timeout | null>(null);
|
||||||
|
const [selectedFile, setSelectedFile] = useState<string | null>(null);
|
||||||
|
const [selectedFileFullText, setSelectedFileFullText] = useState<string | null>(null);
|
||||||
|
const [isDeleting, setIsDeleting] = useState(false);
|
||||||
|
const [uploadedFiles, setUploadedFiles] = useState<string[]>([]);
|
||||||
|
const [selectedFiles, setSelectedFiles] = useState<string[]>([]);
|
||||||
|
const [filteredFiles, setFilteredFiles] = useState<string[]>([]);
|
||||||
|
|
||||||
|
const { toast } = useToast();
|
||||||
|
|
||||||
const isMobileWidth = useIsMobileWidth();
|
const isMobileWidth = useIsMobileWidth();
|
||||||
|
|
||||||
useEffect(() => {
|
|
||||||
setExampleQuery(
|
|
||||||
naturalLanguageSearchQueryExamples[
|
|
||||||
Math.floor(Math.random() * naturalLanguageSearchQueryExamples.length)
|
|
||||||
],
|
|
||||||
);
|
|
||||||
}, []);
|
|
||||||
|
|
||||||
function search() {
|
function search() {
|
||||||
if (searchResultsLoading || !searchQuery.trim()) return;
|
if (searchResultsLoading || !searchQuery.trim()) return;
|
||||||
|
|
||||||
|
setSearchResultsLoading(true);
|
||||||
|
|
||||||
const apiUrl = `/api/search?q=${encodeURIComponent(searchQuery)}&client=web`;
|
const apiUrl = `/api/search?q=${encodeURIComponent(searchQuery)}&client=web`;
|
||||||
fetch(apiUrl, {
|
fetch(apiUrl, {
|
||||||
method: "GET",
|
method: "GET",
|
||||||
@@ -205,8 +367,69 @@ export default function Search() {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const deleteSelected = async () => {
|
||||||
|
let filesToDelete = selectedFiles.length > 0 ? selectedFiles : filteredFiles;
|
||||||
|
|
||||||
|
if (filesToDelete.length === 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await fetch("/api/content/files", {
|
||||||
|
method: "DELETE",
|
||||||
|
headers: {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
body: JSON.stringify({ files: filesToDelete }),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) throw new Error("Failed to delete files");
|
||||||
|
|
||||||
|
// Update the syncedFiles state
|
||||||
|
setUploadedFiles((prevFiles) =>
|
||||||
|
prevFiles.filter((file) => !filesToDelete.includes(file)),
|
||||||
|
);
|
||||||
|
|
||||||
|
// Reset selectedFiles
|
||||||
|
setSelectedFiles([]);
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Error deleting files:", error);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const fetchFiles = async () => {
|
||||||
|
try {
|
||||||
|
const response = await fetch("/api/content/all");
|
||||||
|
if (!response.ok) throw new Error("Failed to fetch files");
|
||||||
|
|
||||||
|
const filesList = await response.json();
|
||||||
|
if (Array.isArray(filesList)) {
|
||||||
|
setFiles(filesList.toSorted());
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
setError("Failed to load files");
|
||||||
|
console.error("Error fetching files:", error);
|
||||||
|
} finally {
|
||||||
|
setFileObjectsLoading(false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const fetchSpecificFile = async (fileName: string) => {
|
||||||
|
try {
|
||||||
|
const response = await fetch(`/api/content/file?file_name=${fileName}`);
|
||||||
|
if (!response.ok) throw new Error("Failed to fetch file");
|
||||||
|
|
||||||
|
const file = await response.json();
|
||||||
|
setSelectedFileFullText(file.raw_text);
|
||||||
|
} catch (error) {
|
||||||
|
setError("Failed to load file");
|
||||||
|
console.error("Error fetching file:", error);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (!searchQuery.trim()) {
|
if (!searchQuery.trim()) {
|
||||||
|
setSearchResults(null);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -229,6 +452,48 @@ export default function Search() {
|
|||||||
};
|
};
|
||||||
}, [searchQuery]);
|
}, [searchQuery]);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (selectedFile) {
|
||||||
|
fetchSpecificFile(selectedFile);
|
||||||
|
}
|
||||||
|
}, [selectedFile]);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
fetchFiles();
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (uploadedFiles.length > 0) {
|
||||||
|
fetchFiles();
|
||||||
|
}
|
||||||
|
}, [uploadedFiles]);
|
||||||
|
|
||||||
|
const handleDelete = async (fileName: string) => {
|
||||||
|
setIsDeleting(true);
|
||||||
|
try {
|
||||||
|
const response = await fetch(`/api/content/file?filename=${fileName}`, {
|
||||||
|
method: "DELETE",
|
||||||
|
});
|
||||||
|
if (!response.ok) throw new Error("Failed to delete file");
|
||||||
|
toast({
|
||||||
|
title: "File deleted",
|
||||||
|
description: `File ${fileName} has been deleted`,
|
||||||
|
variant: "default",
|
||||||
|
});
|
||||||
|
|
||||||
|
// Refresh files list
|
||||||
|
fetchFiles();
|
||||||
|
} catch (error) {
|
||||||
|
toast({
|
||||||
|
title: "Error deleting file",
|
||||||
|
description: `Failed to delete file ${fileName}`,
|
||||||
|
variant: "destructive",
|
||||||
|
});
|
||||||
|
} finally {
|
||||||
|
setIsDeleting(false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<SidebarProvider>
|
<SidebarProvider>
|
||||||
<AppSidebar conversationId={""} />
|
<AppSidebar conversationId={""} />
|
||||||
@@ -251,20 +516,34 @@ export default function Search() {
|
|||||||
<div className="flex justify-between items-center border-2 border-muted p-1 gap-1 rounded-lg">
|
<div className="flex justify-between items-center border-2 border-muted p-1 gap-1 rounded-lg">
|
||||||
<Input
|
<Input
|
||||||
autoFocus={true}
|
autoFocus={true}
|
||||||
className="border-none pl-4"
|
className="border-none pl-4 focus-visible:ring-transparent focus-visible:ring-offset-transparent"
|
||||||
onChange={(e) => setSearchQuery(e.currentTarget.value)}
|
onChange={(e) => setSearchQuery(e.currentTarget.value)}
|
||||||
onKeyDown={(e) => e.key === "Enter" && search()}
|
onKeyDown={(e) => e.key === "Enter" && search()}
|
||||||
type="search"
|
type="search"
|
||||||
placeholder="Search Documents"
|
placeholder="Search Documents"
|
||||||
/>
|
/>
|
||||||
<button
|
<Button
|
||||||
className="px-2 gap-2 inline-flex items-center rounded border-l border-gray-300 hover:text-gray-500"
|
className="px-2 gap-2 inline-flex rounded-none items-center border-l border-gray-300 hover:text-gray-500"
|
||||||
|
variant={"ghost"}
|
||||||
onClick={() => search()}
|
onClick={() => search()}
|
||||||
>
|
>
|
||||||
<MagnifyingGlass className="h-4 w-4" />
|
<MagnifyingGlass className="h-4 w-4" />
|
||||||
<span>Find</span>
|
<span>Find</span>
|
||||||
</button>
|
</Button>
|
||||||
</div>
|
</div>
|
||||||
|
<UploadFiles
|
||||||
|
onClose={() => {}}
|
||||||
|
setUploadedFiles={setUploadedFiles}
|
||||||
|
/>
|
||||||
|
{searchResultsLoading && (
|
||||||
|
<div className="mt-4 flex items-center justify-center">
|
||||||
|
<InlineLoading
|
||||||
|
className="mt-4"
|
||||||
|
message={"Searching"}
|
||||||
|
iconClassName="h-5 w-5"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
{focusSearchResult && (
|
{focusSearchResult && (
|
||||||
<div className="mt-4">
|
<div className="mt-4">
|
||||||
<Button
|
<Button
|
||||||
@@ -279,6 +558,7 @@ export default function Search() {
|
|||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
{!focusSearchResult &&
|
{!focusSearchResult &&
|
||||||
|
!searchResultsLoading &&
|
||||||
searchResults &&
|
searchResults &&
|
||||||
searchResults.length > 0 && (
|
searchResults.length > 0 && (
|
||||||
<div className="mt-4 max-w-[92vw] break-all">
|
<div className="mt-4 max-w-[92vw] break-all">
|
||||||
@@ -297,23 +577,149 @@ export default function Search() {
|
|||||||
</ScrollArea>
|
</ScrollArea>
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
{searchResults == null && (
|
{searchResults === null && (
|
||||||
<Card className="flex flex-col items-center justify-center border-none shadow-none">
|
<div className="w-full mt-4">
|
||||||
<CardHeader className="flex flex-col items-center justify-center">
|
{fileObjectsLoading && (
|
||||||
<CardDescription className="border-muted-foreground border w-fit rounded-lg mb-2 text-center text-lg p-4">
|
<div className="mt-4 flex items-center justify-center">
|
||||||
<FileMagnifyingGlass
|
<InlineLoading
|
||||||
weight="fill"
|
className="mt-4"
|
||||||
className="text-muted-foreground h-10 w-10"
|
message={"Loading"}
|
||||||
|
iconClassName="h-5 w-5"
|
||||||
/>
|
/>
|
||||||
</CardDescription>
|
</div>
|
||||||
<CardTitle className="text-center">
|
)}
|
||||||
Search across your documents
|
{error && <div className="text-red-500">{error}</div>}
|
||||||
</CardTitle>
|
|
||||||
</CardHeader>
|
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">
|
||||||
<CardContent className="text-muted-foreground items-center justify-center text-center flex">
|
{files.map((file, index) => (
|
||||||
<Lightbulb className="inline mr-2" /> {exampleQuery}
|
<Card
|
||||||
</CardContent>
|
key={index}
|
||||||
</Card>
|
className="animate-fade-in-up bg-secondary h-52"
|
||||||
|
>
|
||||||
|
<CardHeader className="p-2">
|
||||||
|
<CardTitle
|
||||||
|
className="flex items-center gap-2"
|
||||||
|
title={file.file_name}
|
||||||
|
>
|
||||||
|
<div className="text-sm font-medium truncate hover:text-clip hover:whitespace-normal">
|
||||||
|
{file.file_name.split("/").pop()}
|
||||||
|
</div>
|
||||||
|
<DropdownMenu>
|
||||||
|
<DropdownMenuTrigger>
|
||||||
|
<Button variant={"ghost"}>
|
||||||
|
<DotsThreeVertical className="h-4 w-4" />
|
||||||
|
</Button>
|
||||||
|
</DropdownMenuTrigger>
|
||||||
|
<DropdownMenuContent className="flex flex-col gap-0 w-fit">
|
||||||
|
<DropdownMenuItem className="p-0">
|
||||||
|
<AlertDialog>
|
||||||
|
<AlertDialogTrigger>
|
||||||
|
<Button
|
||||||
|
variant={
|
||||||
|
"ghost"
|
||||||
|
}
|
||||||
|
className="flex items-center gap-2 p-1 text-sm"
|
||||||
|
>
|
||||||
|
<Trash className="h-4 w-4" />
|
||||||
|
<span className="text-xs">
|
||||||
|
Delete
|
||||||
|
</span>
|
||||||
|
</Button>
|
||||||
|
</AlertDialogTrigger>
|
||||||
|
<AlertDialogContent>
|
||||||
|
<AlertDialogHeader>
|
||||||
|
<AlertDialogTitle>
|
||||||
|
Delete File
|
||||||
|
</AlertDialogTitle>
|
||||||
|
</AlertDialogHeader>
|
||||||
|
<AlertDialogDescription>
|
||||||
|
Are you sure you
|
||||||
|
want to delete
|
||||||
|
this file?
|
||||||
|
</AlertDialogDescription>
|
||||||
|
<AlertDialogFooter>
|
||||||
|
<AlertDialogCancel>
|
||||||
|
Cancel
|
||||||
|
</AlertDialogCancel>
|
||||||
|
<AlertDialogAction
|
||||||
|
onClick={() =>
|
||||||
|
handleDelete(
|
||||||
|
file.file_name,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
>
|
||||||
|
{isDeleting
|
||||||
|
? "Deleting..."
|
||||||
|
: "Delete"}
|
||||||
|
</AlertDialogAction>
|
||||||
|
</AlertDialogFooter>
|
||||||
|
</AlertDialogContent>
|
||||||
|
</AlertDialog>
|
||||||
|
</DropdownMenuItem>
|
||||||
|
<DropdownMenuItem className="p-0">
|
||||||
|
<Dialog>
|
||||||
|
<DialogTrigger>
|
||||||
|
<Button
|
||||||
|
variant={
|
||||||
|
"ghost"
|
||||||
|
}
|
||||||
|
className="flex items-center gap-2 p-1 text-sm"
|
||||||
|
onClick={() => {
|
||||||
|
setSelectedFileFullText(
|
||||||
|
null,
|
||||||
|
);
|
||||||
|
setSelectedFile(
|
||||||
|
file.file_name,
|
||||||
|
);
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<ArrowsOutSimple className="h-4 w-4" />
|
||||||
|
<span className="text-xs">
|
||||||
|
View Full
|
||||||
|
Text
|
||||||
|
</span>
|
||||||
|
</Button>
|
||||||
|
</DialogTrigger>
|
||||||
|
<DialogContent>
|
||||||
|
<DialogHeader>
|
||||||
|
<DialogTitle>
|
||||||
|
{file.file_name
|
||||||
|
.split(
|
||||||
|
"/",
|
||||||
|
)
|
||||||
|
.pop()}
|
||||||
|
</DialogTitle>
|
||||||
|
</DialogHeader>
|
||||||
|
<ScrollArea className="h-[50vh]">
|
||||||
|
<p className="whitespace-pre-wrap break-words text-sm font-normal">
|
||||||
|
{
|
||||||
|
selectedFileFullText
|
||||||
|
}
|
||||||
|
</p>
|
||||||
|
</ScrollArea>
|
||||||
|
</DialogContent>
|
||||||
|
</Dialog>
|
||||||
|
</DropdownMenuItem>
|
||||||
|
</DropdownMenuContent>
|
||||||
|
</DropdownMenu>
|
||||||
|
</CardTitle>
|
||||||
|
</CardHeader>
|
||||||
|
<CardContent className="p-2">
|
||||||
|
<ScrollArea className="h-24">
|
||||||
|
<p className="whitespace-pre-wrap break-words text-sm font-normal text-muted-foreground p-2 rounded-lg bg-background">
|
||||||
|
{file.raw_text.slice(0, 100)}...
|
||||||
|
</p>
|
||||||
|
</ScrollArea>
|
||||||
|
</CardContent>
|
||||||
|
<CardFooter className="flex justify-end gap-2 p-2">
|
||||||
|
<div className="text-muted-foreground text-xs">
|
||||||
|
{formatDateTime(file.updated_at)}
|
||||||
|
</div>
|
||||||
|
</CardFooter>
|
||||||
|
</Card>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
)}
|
)}
|
||||||
{searchResults && searchResults.length === 0 && (
|
{searchResults && searchResults.length === 0 && (
|
||||||
<Card className="flex flex-col items-center justify-center border-none shadow-none">
|
<Card className="flex flex-col items-center justify-center border-none shadow-none">
|
||||||
|
|||||||
@@ -0,0 +1,49 @@
|
|||||||
|
from django.core.management.base import BaseCommand
|
||||||
|
from django.db.models import Exists, OuterRef
|
||||||
|
|
||||||
|
from khoj.database.models import Entry, FileObject
|
||||||
|
|
||||||
|
|
||||||
|
class Command(BaseCommand):
|
||||||
|
help = "Deletes FileObjects that have no associated Entries"
|
||||||
|
|
||||||
|
def add_arguments(self, parser):
|
||||||
|
parser.add_argument(
|
||||||
|
"--apply",
|
||||||
|
action="store_true",
|
||||||
|
help="Actually perform the deletion. Without this flag, only shows what would be deleted.",
|
||||||
|
)
|
||||||
|
|
||||||
|
def handle(self, *args, **options):
|
||||||
|
# Find FileObjects with no related entries using subquery
|
||||||
|
orphaned_files = FileObject.objects.annotate(
|
||||||
|
has_entries=Exists(Entry.objects.filter(file_object=OuterRef("pk")))
|
||||||
|
).filter(has_entries=False)
|
||||||
|
|
||||||
|
total_orphaned = orphaned_files.count()
|
||||||
|
mode = "DELETE" if options["apply"] else "DRY RUN"
|
||||||
|
self.stdout.write(f"[{mode}] Found {total_orphaned} orphaned FileObjects")
|
||||||
|
|
||||||
|
if total_orphaned == 0:
|
||||||
|
self.stdout.write("No orphaned FileObjects to process")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Process in batches of 1000
|
||||||
|
batch_size = 1000
|
||||||
|
processed = 0
|
||||||
|
|
||||||
|
while True:
|
||||||
|
batch = orphaned_files[:batch_size]
|
||||||
|
if not batch:
|
||||||
|
break
|
||||||
|
|
||||||
|
if options["apply"]:
|
||||||
|
count = batch.delete()[0]
|
||||||
|
processed += count
|
||||||
|
self.stdout.write(f"Deleted {processed}/{total_orphaned} orphaned FileObjects")
|
||||||
|
else:
|
||||||
|
processed += len(batch)
|
||||||
|
self.stdout.write(f"Would delete {processed}/{total_orphaned} orphaned FileObjects")
|
||||||
|
|
||||||
|
action = "Deleted" if options["apply"] else "Would delete"
|
||||||
|
self.stdout.write(self.style.SUCCESS(f"{action} {processed} orphaned FileObjects"))
|
||||||
75
src/khoj/database/migrations/0079_entry_file_object.py
Normal file
75
src/khoj/database/migrations/0079_entry_file_object.py
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
# Generated by Django 5.0.10 on 2025-01-10 18:28
|
||||||
|
|
||||||
|
import django.db.models.deletion
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
def migrate_entry_objects(apps, schema_editor):
|
||||||
|
Entry = apps.get_model("database", "Entry")
|
||||||
|
FileObject = apps.get_model("database", "FileObject")
|
||||||
|
db_alias = schema_editor.connection.alias
|
||||||
|
|
||||||
|
# Create lookup dictionary of all file objects
|
||||||
|
file_objects_map = {(fo.user_id, fo.file_name): fo for fo in FileObject.objects.using(db_alias).all()}
|
||||||
|
|
||||||
|
# Process entries in chunks of 1000
|
||||||
|
chunk_size = 1000
|
||||||
|
processed = 0
|
||||||
|
|
||||||
|
processed_entry_ids = set()
|
||||||
|
|
||||||
|
while True:
|
||||||
|
entries = list(
|
||||||
|
Entry.objects.using(db_alias)
|
||||||
|
.select_related("user")
|
||||||
|
.filter(file_object__isnull=True)
|
||||||
|
.exclude(id__in=processed_entry_ids)
|
||||||
|
.only("id", "user", "file_path")[:chunk_size]
|
||||||
|
)
|
||||||
|
|
||||||
|
if not entries:
|
||||||
|
break
|
||||||
|
|
||||||
|
processed_entry_ids.update([entry.id for entry in entries])
|
||||||
|
|
||||||
|
entries_to_update = []
|
||||||
|
for entry in entries:
|
||||||
|
try:
|
||||||
|
file_object = file_objects_map.get((entry.user_id, entry.file_path))
|
||||||
|
if file_object:
|
||||||
|
entry.file_object = file_object
|
||||||
|
entries_to_update.append(entry)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error processing entry {entry.id}: {str(e)}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
if entries_to_update:
|
||||||
|
Entry.objects.using(db_alias).bulk_update(entries_to_update, ["file_object"], batch_size=chunk_size)
|
||||||
|
|
||||||
|
processed += len(entries)
|
||||||
|
print(f"Processed {processed} entries")
|
||||||
|
|
||||||
|
|
||||||
|
def reverse_migration(apps, schema_editor):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
dependencies = [
|
||||||
|
("database", "0078_khojuser_email_verification_code_expiry"),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="entry",
|
||||||
|
name="file_object",
|
||||||
|
field=models.ForeignKey(
|
||||||
|
blank=True,
|
||||||
|
default=None,
|
||||||
|
null=True,
|
||||||
|
on_delete=django.db.models.deletion.CASCADE,
|
||||||
|
to="database.fileobject",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.RunPython(migrate_entry_objects, reverse_migration),
|
||||||
|
]
|
||||||
@@ -326,6 +326,7 @@ class ProcessLock(DbBaseModel):
|
|||||||
INDEX_CONTENT = "index_content"
|
INDEX_CONTENT = "index_content"
|
||||||
SCHEDULED_JOB = "scheduled_job"
|
SCHEDULED_JOB = "scheduled_job"
|
||||||
SCHEDULE_LEADER = "schedule_leader"
|
SCHEDULE_LEADER = "schedule_leader"
|
||||||
|
APPLY_MIGRATIONS = "apply_migrations"
|
||||||
|
|
||||||
# We need to make sure that some operations are thread-safe. To do so, add locks for potentially shared operations.
|
# We need to make sure that some operations are thread-safe. To do so, add locks for potentially shared operations.
|
||||||
# For example, we need to make sure that only one process is updating the embeddings at a time.
|
# For example, we need to make sure that only one process is updating the embeddings at a time.
|
||||||
@@ -658,6 +659,14 @@ class ReflectiveQuestion(DbBaseModel):
|
|||||||
user = models.ForeignKey(KhojUser, on_delete=models.CASCADE, default=None, null=True, blank=True)
|
user = models.ForeignKey(KhojUser, on_delete=models.CASCADE, default=None, null=True, blank=True)
|
||||||
|
|
||||||
|
|
||||||
|
class FileObject(DbBaseModel):
|
||||||
|
# Contains the full text of a file that has associated Entry objects
|
||||||
|
file_name = models.CharField(max_length=400, default=None, null=True, blank=True)
|
||||||
|
raw_text = models.TextField()
|
||||||
|
user = models.ForeignKey(KhojUser, on_delete=models.CASCADE, default=None, null=True, blank=True)
|
||||||
|
agent = models.ForeignKey(Agent, on_delete=models.CASCADE, default=None, null=True, blank=True)
|
||||||
|
|
||||||
|
|
||||||
class Entry(DbBaseModel):
|
class Entry(DbBaseModel):
|
||||||
class EntryType(models.TextChoices):
|
class EntryType(models.TextChoices):
|
||||||
IMAGE = "image"
|
IMAGE = "image"
|
||||||
@@ -689,20 +698,13 @@ class Entry(DbBaseModel):
|
|||||||
hashed_value = models.CharField(max_length=100)
|
hashed_value = models.CharField(max_length=100)
|
||||||
corpus_id = models.UUIDField(default=uuid.uuid4, editable=False)
|
corpus_id = models.UUIDField(default=uuid.uuid4, editable=False)
|
||||||
search_model = models.ForeignKey(SearchModelConfig, on_delete=models.SET_NULL, default=None, null=True, blank=True)
|
search_model = models.ForeignKey(SearchModelConfig, on_delete=models.SET_NULL, default=None, null=True, blank=True)
|
||||||
|
file_object = models.ForeignKey(FileObject, on_delete=models.CASCADE, default=None, null=True, blank=True)
|
||||||
|
|
||||||
def save(self, *args, **kwargs):
|
def save(self, *args, **kwargs):
|
||||||
if self.user and self.agent:
|
if self.user and self.agent:
|
||||||
raise ValidationError("An Entry cannot be associated with both a user and an agent.")
|
raise ValidationError("An Entry cannot be associated with both a user and an agent.")
|
||||||
|
|
||||||
|
|
||||||
class FileObject(DbBaseModel):
|
|
||||||
# Same as Entry but raw will be a much larger string
|
|
||||||
file_name = models.CharField(max_length=400, default=None, null=True, blank=True)
|
|
||||||
raw_text = models.TextField()
|
|
||||||
user = models.ForeignKey(KhojUser, on_delete=models.CASCADE, default=None, null=True, blank=True)
|
|
||||||
agent = models.ForeignKey(Agent, on_delete=models.CASCADE, default=None, null=True, blank=True)
|
|
||||||
|
|
||||||
|
|
||||||
class EntryDates(DbBaseModel):
|
class EntryDates(DbBaseModel):
|
||||||
date = models.DateField()
|
date = models.DateField()
|
||||||
entry = models.ForeignKey(Entry, on_delete=models.CASCADE, related_name="embeddings_dates")
|
entry = models.ForeignKey(Entry, on_delete=models.CASCADE, related_name="embeddings_dates")
|
||||||
|
|||||||
@@ -152,8 +152,22 @@ class TextToEntries(ABC):
|
|||||||
with timer("Generated embeddings for entries to add to database in", logger):
|
with timer("Generated embeddings for entries to add to database in", logger):
|
||||||
entries_to_process = [hash_to_current_entries[hashed_val] for hashed_val in hashes_to_process]
|
entries_to_process = [hash_to_current_entries[hashed_val] for hashed_val in hashes_to_process]
|
||||||
data_to_embed = [getattr(entry, key) for entry in entries_to_process]
|
data_to_embed = [getattr(entry, key) for entry in entries_to_process]
|
||||||
|
modified_files = {entry.file for entry in entries_to_process}
|
||||||
embeddings += self.embeddings_model[model.name].embed_documents(data_to_embed)
|
embeddings += self.embeddings_model[model.name].embed_documents(data_to_embed)
|
||||||
|
|
||||||
|
file_to_file_object_map = {}
|
||||||
|
if file_to_text_map and modified_files:
|
||||||
|
with timer("Indexed text of modified file in", logger):
|
||||||
|
# create or update text of each updated file indexed on DB
|
||||||
|
for modified_file in modified_files:
|
||||||
|
raw_text = file_to_text_map[modified_file]
|
||||||
|
file_object = FileObjectAdapters.get_file_object_by_name(user, modified_file)
|
||||||
|
if file_object:
|
||||||
|
FileObjectAdapters.update_raw_text(file_object, raw_text)
|
||||||
|
else:
|
||||||
|
file_object = FileObjectAdapters.create_file_object(user, modified_file, raw_text)
|
||||||
|
file_to_file_object_map[modified_file] = file_object
|
||||||
|
|
||||||
added_entries: list[DbEntry] = []
|
added_entries: list[DbEntry] = []
|
||||||
with timer("Added entries to database in", logger):
|
with timer("Added entries to database in", logger):
|
||||||
num_items = len(hashes_to_process)
|
num_items = len(hashes_to_process)
|
||||||
@@ -165,6 +179,7 @@ class TextToEntries(ABC):
|
|||||||
batch_embeddings_to_create: List[DbEntry] = []
|
batch_embeddings_to_create: List[DbEntry] = []
|
||||||
for entry_hash, new_entry in entry_batch:
|
for entry_hash, new_entry in entry_batch:
|
||||||
entry = hash_to_current_entries[entry_hash]
|
entry = hash_to_current_entries[entry_hash]
|
||||||
|
file_object = file_to_file_object_map.get(entry.file, None)
|
||||||
batch_embeddings_to_create.append(
|
batch_embeddings_to_create.append(
|
||||||
DbEntry(
|
DbEntry(
|
||||||
user=user,
|
user=user,
|
||||||
@@ -178,6 +193,7 @@ class TextToEntries(ABC):
|
|||||||
hashed_value=entry_hash,
|
hashed_value=entry_hash,
|
||||||
corpus_id=entry.corpus_id,
|
corpus_id=entry.corpus_id,
|
||||||
search_model=model,
|
search_model=model,
|
||||||
|
file_object=file_object,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
@@ -190,19 +206,6 @@ class TextToEntries(ABC):
|
|||||||
logger.error(f"Error adding entries to database:\n{batch_indexing_error}\n---\n{e}", exc_info=True)
|
logger.error(f"Error adding entries to database:\n{batch_indexing_error}\n---\n{e}", exc_info=True)
|
||||||
logger.debug(f"Added {len(added_entries)} {file_type} entries to database")
|
logger.debug(f"Added {len(added_entries)} {file_type} entries to database")
|
||||||
|
|
||||||
if file_to_text_map:
|
|
||||||
with timer("Indexed text of modified file in", logger):
|
|
||||||
# get the set of modified files from added_entries
|
|
||||||
modified_files = {entry.file_path for entry in added_entries}
|
|
||||||
# create or update text of each updated file indexed on DB
|
|
||||||
for modified_file in modified_files:
|
|
||||||
raw_text = file_to_text_map[modified_file]
|
|
||||||
file_object = FileObjectAdapters.get_file_object_by_name(user, modified_file)
|
|
||||||
if file_object:
|
|
||||||
FileObjectAdapters.update_raw_text(file_object, raw_text)
|
|
||||||
else:
|
|
||||||
FileObjectAdapters.create_file_object(user, modified_file, raw_text)
|
|
||||||
|
|
||||||
new_dates = []
|
new_dates = []
|
||||||
with timer("Indexed dates from added entries in", logger):
|
with timer("Indexed dates from added entries in", logger):
|
||||||
for added_entry in added_entries:
|
for added_entry in added_entries:
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ from starlette.authentication import requires
|
|||||||
from khoj.database import adapters
|
from khoj.database import adapters
|
||||||
from khoj.database.adapters import (
|
from khoj.database.adapters import (
|
||||||
EntryAdapters,
|
EntryAdapters,
|
||||||
|
FileObjectAdapters,
|
||||||
get_user_github_config,
|
get_user_github_config,
|
||||||
get_user_notion_config,
|
get_user_notion_config,
|
||||||
)
|
)
|
||||||
@@ -270,6 +271,8 @@ async def delete_content_files(
|
|||||||
|
|
||||||
await EntryAdapters.adelete_entry_by_file(user, filename)
|
await EntryAdapters.adelete_entry_by_file(user, filename)
|
||||||
|
|
||||||
|
await FileObjectAdapters.adelete_file_object_by_name(user, filename)
|
||||||
|
|
||||||
return {"status": "ok"}
|
return {"status": "ok"}
|
||||||
|
|
||||||
|
|
||||||
@@ -294,6 +297,8 @@ async def delete_content_file(
|
|||||||
)
|
)
|
||||||
|
|
||||||
deleted_count = await EntryAdapters.adelete_entries_by_filenames(user, files.files)
|
deleted_count = await EntryAdapters.adelete_entries_by_filenames(user, files.files)
|
||||||
|
for file in files.files:
|
||||||
|
await FileObjectAdapters.adelete_file_object_by_name(user, file)
|
||||||
|
|
||||||
return {"status": "ok", "deleted_count": deleted_count}
|
return {"status": "ok", "deleted_count": deleted_count}
|
||||||
|
|
||||||
@@ -325,6 +330,65 @@ def get_content_types(request: Request, client: Optional[str] = None):
|
|||||||
return list(configured_content_types & all_content_types)
|
return list(configured_content_types & all_content_types)
|
||||||
|
|
||||||
|
|
||||||
|
@api_content.get("/all", response_model=Dict[str, str])
|
||||||
|
@requires(["authenticated"])
|
||||||
|
async def get_all_content(request: Request, client: Optional[str] = None, truncated: Optional[bool] = True):
|
||||||
|
user = request.user.object
|
||||||
|
|
||||||
|
update_telemetry_state(
|
||||||
|
request=request,
|
||||||
|
telemetry_type="api",
|
||||||
|
api="get_all_filenames",
|
||||||
|
client=client,
|
||||||
|
)
|
||||||
|
|
||||||
|
files_data = []
|
||||||
|
file_objects = await FileObjectAdapters.aget_all_file_objects(user)
|
||||||
|
for file_object in file_objects:
|
||||||
|
files_data.append(
|
||||||
|
{
|
||||||
|
"file_name": file_object.file_name,
|
||||||
|
"raw_text": file_object.raw_text[:1000] if truncated else file_object.raw_text,
|
||||||
|
"updated_at": str(file_object.updated_at),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
return Response(content=json.dumps(files_data), media_type="application/json", status_code=200)
|
||||||
|
|
||||||
|
|
||||||
|
@api_content.get("/file", response_model=Dict[str, str])
|
||||||
|
@requires(["authenticated"])
|
||||||
|
async def get_file_object(
|
||||||
|
request: Request,
|
||||||
|
file_name: str,
|
||||||
|
client: Optional[str] = None,
|
||||||
|
):
|
||||||
|
user = request.user.object
|
||||||
|
|
||||||
|
file_object = (await FileObjectAdapters.aget_file_objects_by_name(user, file_name))[0]
|
||||||
|
if not file_object:
|
||||||
|
return Response(
|
||||||
|
content=json.dumps({"error": "File not found"}),
|
||||||
|
media_type="application/json",
|
||||||
|
status_code=404,
|
||||||
|
)
|
||||||
|
|
||||||
|
update_telemetry_state(
|
||||||
|
request=request,
|
||||||
|
telemetry_type="api",
|
||||||
|
api="get_file",
|
||||||
|
client=client,
|
||||||
|
)
|
||||||
|
|
||||||
|
return Response(
|
||||||
|
content=json.dumps(
|
||||||
|
{"id": file_object.id, "file_name": file_object.file_name, "raw_text": file_object.raw_text}
|
||||||
|
),
|
||||||
|
media_type="application/json",
|
||||||
|
status_code=200,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@api_content.get("/{content_source}", response_model=List[str])
|
@api_content.get("/{content_source}", response_model=List[str])
|
||||||
@requires(["authenticated"])
|
@requires(["authenticated"])
|
||||||
async def get_content_source(
|
async def get_content_source(
|
||||||
|
|||||||
Reference in New Issue
Block a user