mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 21:19:12 +00:00
Initial commit: add a dedicated page for managing the knowledge base
- One current issue in the Khoj application is that managing the files being referenced as the user's knowledge base is slightly opaque and difficult to access - Add a migration for associating the fileobjects directly with the Entry objects, making it easier to get data via foreign key - Add the new page that shows all indexed files in the search view, also allowing you to upload new docs directly from that page - Support new APIs for getting / deleting files
This commit is contained in:
@@ -94,3 +94,33 @@ export function useDebounce<T>(value: T, delay: number): T {
|
||||
|
||||
return debouncedValue;
|
||||
}
|
||||
|
||||
export const formatDateTime = (isoString: string): string => {
|
||||
try {
|
||||
const date = new Date(isoString);
|
||||
const now = new Date();
|
||||
const diffInMinutes = Math.floor((now.getTime() - date.getTime()) / 60000);
|
||||
|
||||
// Show relative time for recent dates
|
||||
if (diffInMinutes < 1) return "just now";
|
||||
if (diffInMinutes < 60) return `${diffInMinutes} minutes ago`;
|
||||
if (diffInMinutes < 120) return "1 hour ago";
|
||||
if (diffInMinutes < 1440) return `${Math.floor(diffInMinutes / 60)} hours ago`;
|
||||
|
||||
// For older dates, show full formatted date
|
||||
const formatter = new Intl.DateTimeFormat("en-US", {
|
||||
month: "long",
|
||||
day: "numeric",
|
||||
year: "numeric",
|
||||
hour: "numeric",
|
||||
minute: "2-digit",
|
||||
hour12: true,
|
||||
timeZoneName: "short",
|
||||
});
|
||||
|
||||
return formatter.format(date);
|
||||
} catch (error) {
|
||||
console.error("Error formatting date:", error);
|
||||
return isoString;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -17,7 +17,7 @@ import {
|
||||
KhojSearchLogo,
|
||||
} from "../logo/khojLogo";
|
||||
import { Gear } from "@phosphor-icons/react/dist/ssr";
|
||||
import { Plus } from "@phosphor-icons/react";
|
||||
import { Book, Plus } from "@phosphor-icons/react";
|
||||
import { useEffect, useState } from "react";
|
||||
import AllConversations from "../allConversations/allConversations";
|
||||
import FooterMenu from "../navMenu/navMenu";
|
||||
@@ -26,6 +26,7 @@ import { useIsMobileWidth } from "@/app/common/utils";
|
||||
import { UserPlusIcon } from "lucide-react";
|
||||
import { useAuthenticatedData } from "@/app/common/auth";
|
||||
import LoginPrompt from "../loginPrompt/loginPrompt";
|
||||
import { url } from "inspector";
|
||||
|
||||
// Menu items.
|
||||
const items = [
|
||||
@@ -54,6 +55,11 @@ const items = [
|
||||
url: "/settings",
|
||||
icon: Gear,
|
||||
},
|
||||
{
|
||||
title: "Knowledge Base",
|
||||
url: "/knowledge",
|
||||
icon: Book,
|
||||
},
|
||||
];
|
||||
|
||||
const SIDEBAR_KEYBOARD_SHORTCUT = "b";
|
||||
|
||||
93
src/interface/web/app/knowledge/page.tsx
Normal file
93
src/interface/web/app/knowledge/page.tsx
Normal file
@@ -0,0 +1,93 @@
|
||||
"use client";
|
||||
|
||||
import { useState, useEffect } from "react";
|
||||
import { SidebarInset, SidebarProvider, SidebarTrigger } from "@/components/ui/sidebar";
|
||||
import { AppSidebar } from "../components/appSidebar/appSidebar";
|
||||
import { Separator } from "@/components/ui/separator";
|
||||
import { KhojLogoType } from "../components/logo/khojLogo";
|
||||
import { Card, CardHeader, CardTitle, CardContent } from "@/components/ui/card";
|
||||
import { useIsMobileWidth } from "../common/utils";
|
||||
import { InlineLoading } from "../components/loading/loading";
|
||||
|
||||
interface FileObject {
|
||||
file_name: string;
|
||||
raw_text: string;
|
||||
}
|
||||
|
||||
export default function KnowledgeBase() {
|
||||
const [files, setFiles] = useState<FileObject[]>([]);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const isMobileWidth = useIsMobileWidth();
|
||||
|
||||
useEffect(() => {
|
||||
const fetchFiles = async () => {
|
||||
try {
|
||||
const response = await fetch("/api/content/all");
|
||||
if (!response.ok) throw new Error("Failed to fetch files");
|
||||
|
||||
const filesList = await response.json();
|
||||
if (Array.isArray(filesList)) {
|
||||
setFiles(filesList.toSorted());
|
||||
}
|
||||
} catch (error) {
|
||||
setError("Failed to load files");
|
||||
console.error("Error fetching files:", error);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
fetchFiles();
|
||||
}, []);
|
||||
|
||||
return (
|
||||
<SidebarProvider>
|
||||
<AppSidebar conversationId={""} />
|
||||
<SidebarInset>
|
||||
<header className="flex h-16 shrink-0 items-center gap-2 border-b px-4">
|
||||
<SidebarTrigger className="-ml-1" />
|
||||
<Separator orientation="vertical" className="mr-2 h-4" />
|
||||
{isMobileWidth ? (
|
||||
<a className="p-0 no-underline" href="/">
|
||||
<KhojLogoType className="h-auto w-16" />
|
||||
</a>
|
||||
) : (
|
||||
<h2 className="text-lg">Knowledge Base</h2>
|
||||
)}
|
||||
</header>
|
||||
<main>
|
||||
<div className="md:w-3/4 sm:w-full mx-auto pt-6 md:pt-8">
|
||||
{loading && (
|
||||
<div className="mt-4 flex items-center justify-center">
|
||||
<InlineLoading
|
||||
className="mt-4"
|
||||
message={"Loading"}
|
||||
iconClassName="h-5 w-5"
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
{error && <div className="text-red-500">{error}</div>}
|
||||
|
||||
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">
|
||||
{files.map((file, index) => (
|
||||
<Card key={index}>
|
||||
<CardHeader>
|
||||
<CardTitle className="text-sm font-medium">
|
||||
{file.file_name.split("/").pop()}
|
||||
</CardTitle>
|
||||
</CardHeader>
|
||||
<CardContent>
|
||||
<p className="text-sm text-muted-foreground">
|
||||
{file.raw_text.slice(0, 100)}...
|
||||
</p>
|
||||
</CardContent>
|
||||
</Card>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
</main>
|
||||
</SidebarInset>
|
||||
</SidebarProvider>
|
||||
);
|
||||
}
|
||||
@@ -2,6 +2,7 @@ import type { Metadata } from "next";
|
||||
|
||||
import "../globals.css";
|
||||
import { ContentSecurityPolicy } from "../common/layoutHelper";
|
||||
import { Toaster } from "@/components/ui/toaster";
|
||||
|
||||
export const metadata: Metadata = {
|
||||
title: "Khoj AI - Search",
|
||||
@@ -35,7 +36,10 @@ export default function RootLayout({
|
||||
return (
|
||||
<html>
|
||||
<ContentSecurityPolicy />
|
||||
<body>{children}</body>
|
||||
<body>
|
||||
{children}
|
||||
<Toaster />
|
||||
</body>
|
||||
</html>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -24,16 +24,52 @@ import {
|
||||
MagnifyingGlass,
|
||||
NoteBlank,
|
||||
NotionLogo,
|
||||
Eye,
|
||||
Trash,
|
||||
ArrowsOutSimple,
|
||||
DotsThreeVertical,
|
||||
Waveform,
|
||||
Plus,
|
||||
} from "@phosphor-icons/react";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import Link from "next/link";
|
||||
import { getIconFromFilename } from "../common/iconUtils";
|
||||
import { useIsMobileWidth } from "../common/utils";
|
||||
import { formatDateTime, useIsMobileWidth } from "../common/utils";
|
||||
import { SidebarInset, SidebarProvider, SidebarTrigger } from "@/components/ui/sidebar";
|
||||
import { AppSidebar } from "../components/appSidebar/appSidebar";
|
||||
import { Separator } from "@/components/ui/separator";
|
||||
import { KhojLogoType } from "../components/logo/khojLogo";
|
||||
|
||||
import { InlineLoading } from "../components/loading/loading";
|
||||
import {
|
||||
AlertDialog,
|
||||
AlertDialogContent,
|
||||
AlertDialogDescription,
|
||||
AlertDialogFooter,
|
||||
AlertDialogHeader,
|
||||
AlertDialogTitle,
|
||||
AlertDialogCancel,
|
||||
AlertDialogAction,
|
||||
AlertDialogTrigger,
|
||||
} from "@/components/ui/alert-dialog";
|
||||
import {
|
||||
Dialog,
|
||||
DialogContent,
|
||||
DialogHeader,
|
||||
DialogTitle,
|
||||
DialogTrigger,
|
||||
} from "@/components/ui/dialog";
|
||||
import { useToast } from "@/components/ui/use-toast";
|
||||
import { Scroll } from "lucide-react";
|
||||
import {
|
||||
DropdownMenu,
|
||||
DropdownMenuContent,
|
||||
DropdownMenuItem,
|
||||
DropdownMenuLabel,
|
||||
DropdownMenuTrigger,
|
||||
} from "@/components/ui/dropdown-menu";
|
||||
import { uploadDataForIndexing } from "../common/chatFunctions";
|
||||
import { CommandDialog } from "@/components/ui/command";
|
||||
import { Progress } from "@/components/ui/progress";
|
||||
interface AdditionalData {
|
||||
file: string;
|
||||
source: string;
|
||||
@@ -49,6 +85,12 @@ interface SearchResult {
|
||||
"corpus-id": string;
|
||||
}
|
||||
|
||||
interface FileObject {
|
||||
file_name: string;
|
||||
raw_text: string;
|
||||
updated_at: string;
|
||||
}
|
||||
|
||||
function getNoteTypeIcon(source: string) {
|
||||
if (source === "notion") {
|
||||
return <NotionLogo className="text-muted-foreground" />;
|
||||
@@ -92,7 +134,7 @@ function Note(props: NoteResultProps) {
|
||||
const fileIcon = getIconFromFilename(fileName || ".txt", "h-4 w-4 inline mr-2");
|
||||
|
||||
return (
|
||||
<Card className="bg-secondary h-full shadow-sm rounded-lg border border-muted mb-4">
|
||||
<Card className="bg-secondary h-full shadow-sm rounded-lg border border-muted mb-4 animate-fade-in-up">
|
||||
<CardHeader>
|
||||
<CardTitle className="inline-flex gap-2">
|
||||
{getNoteTypeIcon(note.additional.source)}
|
||||
@@ -139,7 +181,7 @@ function focusNote(note: SearchResult) {
|
||||
const fileIcon = getIconFromFilename(fileName || ".txt", "h-4 w-4 inline mr-2");
|
||||
|
||||
return (
|
||||
<Card className="bg-secondary h-full shadow-sm rounded-lg bg-gradient-to-b from-background to-slate-50 dark:to-gray-950 border border-muted mb-4">
|
||||
<Card className="bg-secondary h-full shadow-sm rounded-lg border border-muted mb-4">
|
||||
<CardHeader>
|
||||
<CardTitle>{fileName}</CardTitle>
|
||||
</CardHeader>
|
||||
@@ -167,27 +209,147 @@ function focusNote(note: SearchResult) {
|
||||
);
|
||||
}
|
||||
|
||||
const UploadFiles: React.FC<{
|
||||
onClose: () => void;
|
||||
setUploadedFiles: (files: string[]) => void;
|
||||
}> = ({ onClose, setUploadedFiles }) => {
|
||||
const [syncedFiles, setSyncedFiles] = useState<string[]>([]);
|
||||
const [selectedFiles, setSelectedFiles] = useState<string[]>([]);
|
||||
const [searchQuery, setSearchQuery] = useState("");
|
||||
const [isDragAndDropping, setIsDragAndDropping] = useState(false);
|
||||
|
||||
const [warning, setWarning] = useState<string | null>(null);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [uploading, setUploading] = useState(false);
|
||||
const [progressValue, setProgressValue] = useState(0);
|
||||
const fileInputRef = useRef<HTMLInputElement>(null);
|
||||
|
||||
useEffect(() => {
|
||||
if (!uploading) {
|
||||
setProgressValue(0);
|
||||
}
|
||||
|
||||
if (uploading) {
|
||||
const interval = setInterval(() => {
|
||||
setProgressValue((prev) => {
|
||||
const increment = Math.floor(Math.random() * 5) + 1; // Generates a random number between 1 and 5
|
||||
const nextValue = prev + increment;
|
||||
return nextValue < 100 ? nextValue : 100; // Ensures progress does not exceed 100
|
||||
});
|
||||
}, 800);
|
||||
return () => clearInterval(interval);
|
||||
}
|
||||
}, [uploading]);
|
||||
|
||||
const filteredFiles = syncedFiles.filter((file) =>
|
||||
file.toLowerCase().includes(searchQuery.toLowerCase()),
|
||||
);
|
||||
|
||||
function handleDragOver(event: React.DragEvent<HTMLDivElement>) {
|
||||
event.preventDefault();
|
||||
setIsDragAndDropping(true);
|
||||
}
|
||||
|
||||
function handleDragLeave(event: React.DragEvent<HTMLDivElement>) {
|
||||
event.preventDefault();
|
||||
setIsDragAndDropping(false);
|
||||
}
|
||||
|
||||
function handleDragAndDropFiles(event: React.DragEvent<HTMLDivElement>) {
|
||||
event.preventDefault();
|
||||
setIsDragAndDropping(false);
|
||||
|
||||
if (!event.dataTransfer.files) return;
|
||||
|
||||
uploadFiles(event.dataTransfer.files);
|
||||
}
|
||||
|
||||
function openFileInput() {
|
||||
if (fileInputRef && fileInputRef.current) {
|
||||
fileInputRef.current.click();
|
||||
}
|
||||
}
|
||||
|
||||
function handleFileChange(event: React.ChangeEvent<HTMLInputElement>) {
|
||||
if (!event.target.files) return;
|
||||
|
||||
uploadFiles(event.target.files);
|
||||
}
|
||||
|
||||
function uploadFiles(files: FileList) {
|
||||
uploadDataForIndexing(files, setWarning, setUploading, setError, setUploadedFiles);
|
||||
}
|
||||
|
||||
return (
|
||||
<div
|
||||
className={`flex flex-col h-full`}
|
||||
onDragOver={handleDragOver}
|
||||
onDragLeave={handleDragLeave}
|
||||
onDrop={handleDragAndDropFiles}
|
||||
onClick={openFileInput}
|
||||
>
|
||||
<input
|
||||
type="file"
|
||||
multiple
|
||||
ref={fileInputRef}
|
||||
style={{ display: "none" }}
|
||||
onChange={handleFileChange}
|
||||
/>
|
||||
<div className="flex-none p-4">
|
||||
{uploading && (
|
||||
<Progress
|
||||
indicatorColor="bg-slate-500"
|
||||
className="w-full h-2 rounded-full"
|
||||
value={progressValue}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
<div
|
||||
className={`flex-none p-4 bg-secondary border-b ${isDragAndDropping ? "animate-pulse" : ""} rounded-lg`}
|
||||
>
|
||||
<div className="flex items-center justify-center w-full h-32 border-2 border-dashed border-gray-300 rounded-lg">
|
||||
{isDragAndDropping ? (
|
||||
<div className="flex items-center justify-center w-full h-full">
|
||||
<Waveform className="h-6 w-6 mr-2" />
|
||||
<span>Drop files to upload</span>
|
||||
</div>
|
||||
) : (
|
||||
<div className="flex items-center justify-center w-full h-full">
|
||||
<Plus className="h-6 w-6 mr-2" />
|
||||
<span>Drag and drop files here</span>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default function Search() {
|
||||
const [searchQuery, setSearchQuery] = useState("");
|
||||
const [searchResults, setSearchResults] = useState<SearchResult[] | null>(null);
|
||||
const [searchResultsLoading, setSearchResultsLoading] = useState(false);
|
||||
const [focusSearchResult, setFocusSearchResult] = useState<SearchResult | null>(null);
|
||||
const [exampleQuery, setExampleQuery] = useState("");
|
||||
const [files, setFiles] = useState<FileObject[]>([]);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [fileObjectsLoading, setFileObjectsLoading] = useState(true);
|
||||
const searchTimeoutRef = useRef<NodeJS.Timeout | null>(null);
|
||||
const [selectedFile, setSelectedFile] = useState<string | null>(null);
|
||||
const [selectedFileFullText, setSelectedFileFullText] = useState<string | null>(null);
|
||||
const [isDeleting, setIsDeleting] = useState(false);
|
||||
const [uploadedFiles, setUploadedFiles] = useState<string[]>([]);
|
||||
const [selectedFiles, setSelectedFiles] = useState<string[]>([]);
|
||||
const [filteredFiles, setFilteredFiles] = useState<string[]>([]);
|
||||
|
||||
const { toast } = useToast();
|
||||
|
||||
const isMobileWidth = useIsMobileWidth();
|
||||
|
||||
useEffect(() => {
|
||||
setExampleQuery(
|
||||
naturalLanguageSearchQueryExamples[
|
||||
Math.floor(Math.random() * naturalLanguageSearchQueryExamples.length)
|
||||
],
|
||||
);
|
||||
}, []);
|
||||
|
||||
function search() {
|
||||
if (searchResultsLoading || !searchQuery.trim()) return;
|
||||
|
||||
setSearchResultsLoading(true);
|
||||
|
||||
const apiUrl = `/api/search?q=${encodeURIComponent(searchQuery)}&client=web`;
|
||||
fetch(apiUrl, {
|
||||
method: "GET",
|
||||
@@ -205,8 +367,69 @@ export default function Search() {
|
||||
});
|
||||
}
|
||||
|
||||
const deleteSelected = async () => {
|
||||
let filesToDelete = selectedFiles.length > 0 ? selectedFiles : filteredFiles;
|
||||
|
||||
if (filesToDelete.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await fetch("/api/content/files", {
|
||||
method: "DELETE",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({ files: filesToDelete }),
|
||||
});
|
||||
|
||||
if (!response.ok) throw new Error("Failed to delete files");
|
||||
|
||||
// Update the syncedFiles state
|
||||
setUploadedFiles((prevFiles) =>
|
||||
prevFiles.filter((file) => !filesToDelete.includes(file)),
|
||||
);
|
||||
|
||||
// Reset selectedFiles
|
||||
setSelectedFiles([]);
|
||||
} catch (error) {
|
||||
console.error("Error deleting files:", error);
|
||||
}
|
||||
};
|
||||
|
||||
const fetchFiles = async () => {
|
||||
try {
|
||||
const response = await fetch("/api/content/all");
|
||||
if (!response.ok) throw new Error("Failed to fetch files");
|
||||
|
||||
const filesList = await response.json();
|
||||
if (Array.isArray(filesList)) {
|
||||
setFiles(filesList.toSorted());
|
||||
}
|
||||
} catch (error) {
|
||||
setError("Failed to load files");
|
||||
console.error("Error fetching files:", error);
|
||||
} finally {
|
||||
setFileObjectsLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
const fetchSpecificFile = async (fileName: string) => {
|
||||
try {
|
||||
const response = await fetch(`/api/content/file?file_name=${fileName}`);
|
||||
if (!response.ok) throw new Error("Failed to fetch file");
|
||||
|
||||
const file = await response.json();
|
||||
setSelectedFileFullText(file.raw_text);
|
||||
} catch (error) {
|
||||
setError("Failed to load file");
|
||||
console.error("Error fetching file:", error);
|
||||
}
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
if (!searchQuery.trim()) {
|
||||
setSearchResults(null);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -229,6 +452,48 @@ export default function Search() {
|
||||
};
|
||||
}, [searchQuery]);
|
||||
|
||||
useEffect(() => {
|
||||
if (selectedFile) {
|
||||
fetchSpecificFile(selectedFile);
|
||||
}
|
||||
}, [selectedFile]);
|
||||
|
||||
useEffect(() => {
|
||||
fetchFiles();
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
if (uploadedFiles.length > 0) {
|
||||
fetchFiles();
|
||||
}
|
||||
}, [uploadedFiles]);
|
||||
|
||||
const handleDelete = async (fileName: string) => {
|
||||
setIsDeleting(true);
|
||||
try {
|
||||
const response = await fetch(`/api/content/file?filename=${fileName}`, {
|
||||
method: "DELETE",
|
||||
});
|
||||
if (!response.ok) throw new Error("Failed to delete file");
|
||||
toast({
|
||||
title: "File deleted",
|
||||
description: `File ${fileName} has been deleted`,
|
||||
variant: "default",
|
||||
});
|
||||
|
||||
// Refresh files list
|
||||
fetchFiles();
|
||||
} catch (error) {
|
||||
toast({
|
||||
title: "Error deleting file",
|
||||
description: `Failed to delete file ${fileName}`,
|
||||
variant: "destructive",
|
||||
});
|
||||
} finally {
|
||||
setIsDeleting(false);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<SidebarProvider>
|
||||
<AppSidebar conversationId={""} />
|
||||
@@ -251,20 +516,34 @@ export default function Search() {
|
||||
<div className="flex justify-between items-center border-2 border-muted p-1 gap-1 rounded-lg">
|
||||
<Input
|
||||
autoFocus={true}
|
||||
className="border-none pl-4"
|
||||
className="border-none pl-4 focus-visible:ring-transparent focus-visible:ring-offset-transparent"
|
||||
onChange={(e) => setSearchQuery(e.currentTarget.value)}
|
||||
onKeyDown={(e) => e.key === "Enter" && search()}
|
||||
type="search"
|
||||
placeholder="Search Documents"
|
||||
/>
|
||||
<button
|
||||
className="px-2 gap-2 inline-flex items-center rounded border-l border-gray-300 hover:text-gray-500"
|
||||
<Button
|
||||
className="px-2 gap-2 inline-flex rounded-none items-center border-l border-gray-300 hover:text-gray-500"
|
||||
variant={"ghost"}
|
||||
onClick={() => search()}
|
||||
>
|
||||
<MagnifyingGlass className="h-4 w-4" />
|
||||
<span>Find</span>
|
||||
</button>
|
||||
</Button>
|
||||
</div>
|
||||
<UploadFiles
|
||||
onClose={() => {}}
|
||||
setUploadedFiles={setUploadedFiles}
|
||||
/>
|
||||
{searchResultsLoading && (
|
||||
<div className="mt-4 flex items-center justify-center">
|
||||
<InlineLoading
|
||||
className="mt-4"
|
||||
message={"Searching"}
|
||||
iconClassName="h-5 w-5"
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
{focusSearchResult && (
|
||||
<div className="mt-4">
|
||||
<Button
|
||||
@@ -279,6 +558,7 @@ export default function Search() {
|
||||
</div>
|
||||
)}
|
||||
{!focusSearchResult &&
|
||||
!searchResultsLoading &&
|
||||
searchResults &&
|
||||
searchResults.length > 0 && (
|
||||
<div className="mt-4 max-w-[92vw] break-all">
|
||||
@@ -297,23 +577,149 @@ export default function Search() {
|
||||
</ScrollArea>
|
||||
</div>
|
||||
)}
|
||||
{searchResults == null && (
|
||||
<Card className="flex flex-col items-center justify-center border-none shadow-none">
|
||||
<CardHeader className="flex flex-col items-center justify-center">
|
||||
<CardDescription className="border-muted-foreground border w-fit rounded-lg mb-2 text-center text-lg p-4">
|
||||
<FileMagnifyingGlass
|
||||
weight="fill"
|
||||
className="text-muted-foreground h-10 w-10"
|
||||
{searchResults === null && (
|
||||
<div className="w-full mt-4">
|
||||
{fileObjectsLoading && (
|
||||
<div className="mt-4 flex items-center justify-center">
|
||||
<InlineLoading
|
||||
className="mt-4"
|
||||
message={"Loading"}
|
||||
iconClassName="h-5 w-5"
|
||||
/>
|
||||
</CardDescription>
|
||||
<CardTitle className="text-center">
|
||||
Search across your documents
|
||||
</CardTitle>
|
||||
</CardHeader>
|
||||
<CardContent className="text-muted-foreground items-center justify-center text-center flex">
|
||||
<Lightbulb className="inline mr-2" /> {exampleQuery}
|
||||
</CardContent>
|
||||
</Card>
|
||||
</div>
|
||||
)}
|
||||
{error && <div className="text-red-500">{error}</div>}
|
||||
|
||||
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">
|
||||
{files.map((file, index) => (
|
||||
<Card
|
||||
key={index}
|
||||
className="animate-fade-in-up bg-secondary h-52"
|
||||
>
|
||||
<CardHeader className="p-2">
|
||||
<CardTitle
|
||||
className="flex items-center gap-2"
|
||||
title={file.file_name}
|
||||
>
|
||||
<div className="text-sm font-medium truncate hover:text-clip hover:whitespace-normal">
|
||||
{file.file_name.split("/").pop()}
|
||||
</div>
|
||||
<DropdownMenu>
|
||||
<DropdownMenuTrigger>
|
||||
<Button variant={"ghost"}>
|
||||
<DotsThreeVertical className="h-4 w-4" />
|
||||
</Button>
|
||||
</DropdownMenuTrigger>
|
||||
<DropdownMenuContent className="flex flex-col gap-0 w-fit">
|
||||
<DropdownMenuItem className="p-0">
|
||||
<AlertDialog>
|
||||
<AlertDialogTrigger>
|
||||
<Button
|
||||
variant={
|
||||
"ghost"
|
||||
}
|
||||
className="flex items-center gap-2 p-1 text-sm"
|
||||
>
|
||||
<Trash className="h-4 w-4" />
|
||||
<span className="text-xs">
|
||||
Delete
|
||||
</span>
|
||||
</Button>
|
||||
</AlertDialogTrigger>
|
||||
<AlertDialogContent>
|
||||
<AlertDialogHeader>
|
||||
<AlertDialogTitle>
|
||||
Delete File
|
||||
</AlertDialogTitle>
|
||||
</AlertDialogHeader>
|
||||
<AlertDialogDescription>
|
||||
Are you sure you
|
||||
want to delete
|
||||
this file?
|
||||
</AlertDialogDescription>
|
||||
<AlertDialogFooter>
|
||||
<AlertDialogCancel>
|
||||
Cancel
|
||||
</AlertDialogCancel>
|
||||
<AlertDialogAction
|
||||
onClick={() =>
|
||||
handleDelete(
|
||||
file.file_name,
|
||||
)
|
||||
}
|
||||
>
|
||||
{isDeleting
|
||||
? "Deleting..."
|
||||
: "Delete"}
|
||||
</AlertDialogAction>
|
||||
</AlertDialogFooter>
|
||||
</AlertDialogContent>
|
||||
</AlertDialog>
|
||||
</DropdownMenuItem>
|
||||
<DropdownMenuItem className="p-0">
|
||||
<Dialog>
|
||||
<DialogTrigger>
|
||||
<Button
|
||||
variant={
|
||||
"ghost"
|
||||
}
|
||||
className="flex items-center gap-2 p-1 text-sm"
|
||||
onClick={() => {
|
||||
setSelectedFileFullText(
|
||||
null,
|
||||
);
|
||||
setSelectedFile(
|
||||
file.file_name,
|
||||
);
|
||||
}}
|
||||
>
|
||||
<ArrowsOutSimple className="h-4 w-4" />
|
||||
<span className="text-xs">
|
||||
View Full
|
||||
Text
|
||||
</span>
|
||||
</Button>
|
||||
</DialogTrigger>
|
||||
<DialogContent>
|
||||
<DialogHeader>
|
||||
<DialogTitle>
|
||||
{file.file_name
|
||||
.split(
|
||||
"/",
|
||||
)
|
||||
.pop()}
|
||||
</DialogTitle>
|
||||
</DialogHeader>
|
||||
<ScrollArea className="h-[50vh]">
|
||||
<p className="whitespace-pre-wrap break-words text-sm font-normal">
|
||||
{
|
||||
selectedFileFullText
|
||||
}
|
||||
</p>
|
||||
</ScrollArea>
|
||||
</DialogContent>
|
||||
</Dialog>
|
||||
</DropdownMenuItem>
|
||||
</DropdownMenuContent>
|
||||
</DropdownMenu>
|
||||
</CardTitle>
|
||||
</CardHeader>
|
||||
<CardContent className="p-2">
|
||||
<ScrollArea className="h-24">
|
||||
<p className="whitespace-pre-wrap break-words text-sm font-normal text-muted-foreground p-2 rounded-lg bg-background">
|
||||
{file.raw_text.slice(0, 100)}...
|
||||
</p>
|
||||
</ScrollArea>
|
||||
</CardContent>
|
||||
<CardFooter className="flex justify-end gap-2 p-2">
|
||||
<div className="text-muted-foreground text-xs">
|
||||
{formatDateTime(file.updated_at)}
|
||||
</div>
|
||||
</CardFooter>
|
||||
</Card>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
{searchResults && searchResults.length === 0 && (
|
||||
<Card className="flex flex-col items-center justify-center border-none shadow-none">
|
||||
|
||||
@@ -0,0 +1,49 @@
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.db.models import Exists, OuterRef
|
||||
|
||||
from khoj.database.models import Entry, FileObject
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Deletes FileObjects that have no associated Entries"
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
"--apply",
|
||||
action="store_true",
|
||||
help="Actually perform the deletion. Without this flag, only shows what would be deleted.",
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
# Find FileObjects with no related entries using subquery
|
||||
orphaned_files = FileObject.objects.annotate(
|
||||
has_entries=Exists(Entry.objects.filter(file_object=OuterRef("pk")))
|
||||
).filter(has_entries=False)
|
||||
|
||||
total_orphaned = orphaned_files.count()
|
||||
mode = "DELETE" if options["apply"] else "DRY RUN"
|
||||
self.stdout.write(f"[{mode}] Found {total_orphaned} orphaned FileObjects")
|
||||
|
||||
if total_orphaned == 0:
|
||||
self.stdout.write("No orphaned FileObjects to process")
|
||||
return
|
||||
|
||||
# Process in batches of 1000
|
||||
batch_size = 1000
|
||||
processed = 0
|
||||
|
||||
while True:
|
||||
batch = orphaned_files[:batch_size]
|
||||
if not batch:
|
||||
break
|
||||
|
||||
if options["apply"]:
|
||||
count = batch.delete()[0]
|
||||
processed += count
|
||||
self.stdout.write(f"Deleted {processed}/{total_orphaned} orphaned FileObjects")
|
||||
else:
|
||||
processed += len(batch)
|
||||
self.stdout.write(f"Would delete {processed}/{total_orphaned} orphaned FileObjects")
|
||||
|
||||
action = "Deleted" if options["apply"] else "Would delete"
|
||||
self.stdout.write(self.style.SUCCESS(f"{action} {processed} orphaned FileObjects"))
|
||||
75
src/khoj/database/migrations/0079_entry_file_object.py
Normal file
75
src/khoj/database/migrations/0079_entry_file_object.py
Normal file
@@ -0,0 +1,75 @@
|
||||
# Generated by Django 5.0.10 on 2025-01-10 18:28
|
||||
|
||||
import django.db.models.deletion
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
def migrate_entry_objects(apps, schema_editor):
|
||||
Entry = apps.get_model("database", "Entry")
|
||||
FileObject = apps.get_model("database", "FileObject")
|
||||
db_alias = schema_editor.connection.alias
|
||||
|
||||
# Create lookup dictionary of all file objects
|
||||
file_objects_map = {(fo.user_id, fo.file_name): fo for fo in FileObject.objects.using(db_alias).all()}
|
||||
|
||||
# Process entries in chunks of 1000
|
||||
chunk_size = 1000
|
||||
processed = 0
|
||||
|
||||
processed_entry_ids = set()
|
||||
|
||||
while True:
|
||||
entries = list(
|
||||
Entry.objects.using(db_alias)
|
||||
.select_related("user")
|
||||
.filter(file_object__isnull=True)
|
||||
.exclude(id__in=processed_entry_ids)
|
||||
.only("id", "user", "file_path")[:chunk_size]
|
||||
)
|
||||
|
||||
if not entries:
|
||||
break
|
||||
|
||||
processed_entry_ids.update([entry.id for entry in entries])
|
||||
|
||||
entries_to_update = []
|
||||
for entry in entries:
|
||||
try:
|
||||
file_object = file_objects_map.get((entry.user_id, entry.file_path))
|
||||
if file_object:
|
||||
entry.file_object = file_object
|
||||
entries_to_update.append(entry)
|
||||
except Exception as e:
|
||||
print(f"Error processing entry {entry.id}: {str(e)}")
|
||||
continue
|
||||
|
||||
if entries_to_update:
|
||||
Entry.objects.using(db_alias).bulk_update(entries_to_update, ["file_object"], batch_size=chunk_size)
|
||||
|
||||
processed += len(entries)
|
||||
print(f"Processed {processed} entries")
|
||||
|
||||
|
||||
def reverse_migration(apps, schema_editor):
|
||||
pass
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("database", "0078_khojuser_email_verification_code_expiry"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name="entry",
|
||||
name="file_object",
|
||||
field=models.ForeignKey(
|
||||
blank=True,
|
||||
default=None,
|
||||
null=True,
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
to="database.fileobject",
|
||||
),
|
||||
),
|
||||
migrations.RunPython(migrate_entry_objects, reverse_migration),
|
||||
]
|
||||
@@ -326,6 +326,7 @@ class ProcessLock(DbBaseModel):
|
||||
INDEX_CONTENT = "index_content"
|
||||
SCHEDULED_JOB = "scheduled_job"
|
||||
SCHEDULE_LEADER = "schedule_leader"
|
||||
APPLY_MIGRATIONS = "apply_migrations"
|
||||
|
||||
# We need to make sure that some operations are thread-safe. To do so, add locks for potentially shared operations.
|
||||
# For example, we need to make sure that only one process is updating the embeddings at a time.
|
||||
@@ -658,6 +659,14 @@ class ReflectiveQuestion(DbBaseModel):
|
||||
user = models.ForeignKey(KhojUser, on_delete=models.CASCADE, default=None, null=True, blank=True)
|
||||
|
||||
|
||||
class FileObject(DbBaseModel):
|
||||
# Contains the full text of a file that has associated Entry objects
|
||||
file_name = models.CharField(max_length=400, default=None, null=True, blank=True)
|
||||
raw_text = models.TextField()
|
||||
user = models.ForeignKey(KhojUser, on_delete=models.CASCADE, default=None, null=True, blank=True)
|
||||
agent = models.ForeignKey(Agent, on_delete=models.CASCADE, default=None, null=True, blank=True)
|
||||
|
||||
|
||||
class Entry(DbBaseModel):
|
||||
class EntryType(models.TextChoices):
|
||||
IMAGE = "image"
|
||||
@@ -689,20 +698,13 @@ class Entry(DbBaseModel):
|
||||
hashed_value = models.CharField(max_length=100)
|
||||
corpus_id = models.UUIDField(default=uuid.uuid4, editable=False)
|
||||
search_model = models.ForeignKey(SearchModelConfig, on_delete=models.SET_NULL, default=None, null=True, blank=True)
|
||||
file_object = models.ForeignKey(FileObject, on_delete=models.CASCADE, default=None, null=True, blank=True)
|
||||
|
||||
def save(self, *args, **kwargs):
|
||||
if self.user and self.agent:
|
||||
raise ValidationError("An Entry cannot be associated with both a user and an agent.")
|
||||
|
||||
|
||||
class FileObject(DbBaseModel):
|
||||
# Same as Entry but raw will be a much larger string
|
||||
file_name = models.CharField(max_length=400, default=None, null=True, blank=True)
|
||||
raw_text = models.TextField()
|
||||
user = models.ForeignKey(KhojUser, on_delete=models.CASCADE, default=None, null=True, blank=True)
|
||||
agent = models.ForeignKey(Agent, on_delete=models.CASCADE, default=None, null=True, blank=True)
|
||||
|
||||
|
||||
class EntryDates(DbBaseModel):
|
||||
date = models.DateField()
|
||||
entry = models.ForeignKey(Entry, on_delete=models.CASCADE, related_name="embeddings_dates")
|
||||
|
||||
@@ -152,8 +152,22 @@ class TextToEntries(ABC):
|
||||
with timer("Generated embeddings for entries to add to database in", logger):
|
||||
entries_to_process = [hash_to_current_entries[hashed_val] for hashed_val in hashes_to_process]
|
||||
data_to_embed = [getattr(entry, key) for entry in entries_to_process]
|
||||
modified_files = {entry.file for entry in entries_to_process}
|
||||
embeddings += self.embeddings_model[model.name].embed_documents(data_to_embed)
|
||||
|
||||
file_to_file_object_map = {}
|
||||
if file_to_text_map and modified_files:
|
||||
with timer("Indexed text of modified file in", logger):
|
||||
# create or update text of each updated file indexed on DB
|
||||
for modified_file in modified_files:
|
||||
raw_text = file_to_text_map[modified_file]
|
||||
file_object = FileObjectAdapters.get_file_object_by_name(user, modified_file)
|
||||
if file_object:
|
||||
FileObjectAdapters.update_raw_text(file_object, raw_text)
|
||||
else:
|
||||
file_object = FileObjectAdapters.create_file_object(user, modified_file, raw_text)
|
||||
file_to_file_object_map[modified_file] = file_object
|
||||
|
||||
added_entries: list[DbEntry] = []
|
||||
with timer("Added entries to database in", logger):
|
||||
num_items = len(hashes_to_process)
|
||||
@@ -165,6 +179,7 @@ class TextToEntries(ABC):
|
||||
batch_embeddings_to_create: List[DbEntry] = []
|
||||
for entry_hash, new_entry in entry_batch:
|
||||
entry = hash_to_current_entries[entry_hash]
|
||||
file_object = file_to_file_object_map.get(entry.file, None)
|
||||
batch_embeddings_to_create.append(
|
||||
DbEntry(
|
||||
user=user,
|
||||
@@ -178,6 +193,7 @@ class TextToEntries(ABC):
|
||||
hashed_value=entry_hash,
|
||||
corpus_id=entry.corpus_id,
|
||||
search_model=model,
|
||||
file_object=file_object,
|
||||
)
|
||||
)
|
||||
try:
|
||||
@@ -190,19 +206,6 @@ class TextToEntries(ABC):
|
||||
logger.error(f"Error adding entries to database:\n{batch_indexing_error}\n---\n{e}", exc_info=True)
|
||||
logger.debug(f"Added {len(added_entries)} {file_type} entries to database")
|
||||
|
||||
if file_to_text_map:
|
||||
with timer("Indexed text of modified file in", logger):
|
||||
# get the set of modified files from added_entries
|
||||
modified_files = {entry.file_path for entry in added_entries}
|
||||
# create or update text of each updated file indexed on DB
|
||||
for modified_file in modified_files:
|
||||
raw_text = file_to_text_map[modified_file]
|
||||
file_object = FileObjectAdapters.get_file_object_by_name(user, modified_file)
|
||||
if file_object:
|
||||
FileObjectAdapters.update_raw_text(file_object, raw_text)
|
||||
else:
|
||||
FileObjectAdapters.create_file_object(user, modified_file, raw_text)
|
||||
|
||||
new_dates = []
|
||||
with timer("Indexed dates from added entries in", logger):
|
||||
for added_entry in added_entries:
|
||||
|
||||
@@ -22,6 +22,7 @@ from starlette.authentication import requires
|
||||
from khoj.database import adapters
|
||||
from khoj.database.adapters import (
|
||||
EntryAdapters,
|
||||
FileObjectAdapters,
|
||||
get_user_github_config,
|
||||
get_user_notion_config,
|
||||
)
|
||||
@@ -270,6 +271,8 @@ async def delete_content_files(
|
||||
|
||||
await EntryAdapters.adelete_entry_by_file(user, filename)
|
||||
|
||||
await FileObjectAdapters.adelete_file_object_by_name(user, filename)
|
||||
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
@@ -294,6 +297,8 @@ async def delete_content_file(
|
||||
)
|
||||
|
||||
deleted_count = await EntryAdapters.adelete_entries_by_filenames(user, files.files)
|
||||
for file in files.files:
|
||||
await FileObjectAdapters.adelete_file_object_by_name(user, file)
|
||||
|
||||
return {"status": "ok", "deleted_count": deleted_count}
|
||||
|
||||
@@ -325,6 +330,65 @@ def get_content_types(request: Request, client: Optional[str] = None):
|
||||
return list(configured_content_types & all_content_types)
|
||||
|
||||
|
||||
@api_content.get("/all", response_model=Dict[str, str])
|
||||
@requires(["authenticated"])
|
||||
async def get_all_content(request: Request, client: Optional[str] = None, truncated: Optional[bool] = True):
|
||||
user = request.user.object
|
||||
|
||||
update_telemetry_state(
|
||||
request=request,
|
||||
telemetry_type="api",
|
||||
api="get_all_filenames",
|
||||
client=client,
|
||||
)
|
||||
|
||||
files_data = []
|
||||
file_objects = await FileObjectAdapters.aget_all_file_objects(user)
|
||||
for file_object in file_objects:
|
||||
files_data.append(
|
||||
{
|
||||
"file_name": file_object.file_name,
|
||||
"raw_text": file_object.raw_text[:1000] if truncated else file_object.raw_text,
|
||||
"updated_at": str(file_object.updated_at),
|
||||
}
|
||||
)
|
||||
|
||||
return Response(content=json.dumps(files_data), media_type="application/json", status_code=200)
|
||||
|
||||
|
||||
@api_content.get("/file", response_model=Dict[str, str])
|
||||
@requires(["authenticated"])
|
||||
async def get_file_object(
|
||||
request: Request,
|
||||
file_name: str,
|
||||
client: Optional[str] = None,
|
||||
):
|
||||
user = request.user.object
|
||||
|
||||
file_object = (await FileObjectAdapters.aget_file_objects_by_name(user, file_name))[0]
|
||||
if not file_object:
|
||||
return Response(
|
||||
content=json.dumps({"error": "File not found"}),
|
||||
media_type="application/json",
|
||||
status_code=404,
|
||||
)
|
||||
|
||||
update_telemetry_state(
|
||||
request=request,
|
||||
telemetry_type="api",
|
||||
api="get_file",
|
||||
client=client,
|
||||
)
|
||||
|
||||
return Response(
|
||||
content=json.dumps(
|
||||
{"id": file_object.id, "file_name": file_object.file_name, "raw_text": file_object.raw_text}
|
||||
),
|
||||
media_type="application/json",
|
||||
status_code=200,
|
||||
)
|
||||
|
||||
|
||||
@api_content.get("/{content_source}", response_model=List[str])
|
||||
@requires(["authenticated"])
|
||||
async def get_content_source(
|
||||
|
||||
Reference in New Issue
Block a user