Speed up Docker image builds using multi-stage parallel pipelines (#987)

## Objective
Improve build speed and size of khoj docker images

## Changes
### Improve docker image build speeds
  - Decouple web app and server build steps
  - Build the web app and server in parallel
  - Cache docker layers for reuse across dockerize github workflow runs
    - Split Docker build layers for improved cacheability (e.g separate `yarn install` and `yarn build` steps)
### Reduce size of khoj docker images 
  - Use an up-to-date `.dockerignore` to exclude unnecessary directories
  - Do not installing cuda python packages for cpu builds
### Improve web app builds
  - Use consistent mechanism to get fonts for web app
  - Make tailwind extensions production instead of dev dependencies
  - Make next.js create production builds for the web app (via `NODE_ENV=production` env var)
This commit is contained in:
Debanjum
2024-11-24 21:49:46 -08:00
committed by GitHub
13 changed files with 112 additions and 82 deletions

View File

@@ -1,10 +1,11 @@
.git/ .*
.pytest_cache/ **/__pycache__/
.vscode/ *.egg-info/
.venv/ documentation/
docs/
tests/ tests/
build/ build/
dist/ dist/
scripts/ scripts/
*.egg-info/ src/interface/
src/telemetry/
!src/interface/web

View File

@@ -73,7 +73,7 @@ jobs:
run: rm -rf /opt/hostedtoolcache run: rm -rf /opt/hostedtoolcache
- name: 📦 Build and Push Docker Image - name: 📦 Build and Push Docker Image
uses: docker/build-push-action@v2 uses: docker/build-push-action@v4
if: (matrix.image == 'local' && github.event_name == 'workflow_dispatch') && github.event.inputs.khoj == 'true' || (matrix.image == 'local' && github.event_name == 'push') if: (matrix.image == 'local' && github.event_name == 'workflow_dispatch') && github.event.inputs.khoj == 'true' || (matrix.image == 'local' && github.event_name == 'push')
with: with:
context: . context: .
@@ -86,9 +86,11 @@ jobs:
build-args: | build-args: |
VERSION=${{ steps.hatch.outputs.version }} VERSION=${{ steps.hatch.outputs.version }}
PORT=42110 PORT=42110
cache-from: type=gha,scope=${{ matrix.image }}
cache-to: type=gha,mode=max,scope=${{ matrix.image }}
- name: 📦️⛅️ Build and Push Cloud Docker Image - name: 📦️⛅️ Build and Push Cloud Docker Image
uses: docker/build-push-action@v2 uses: docker/build-push-action@v4
if: (matrix.image == 'cloud' && github.event_name == 'workflow_dispatch') && github.event.inputs.khoj-cloud == 'true' || (matrix.image == 'cloud' && github.event_name == 'push') if: (matrix.image == 'cloud' && github.event_name == 'workflow_dispatch') && github.event.inputs.khoj-cloud == 'true' || (matrix.image == 'cloud' && github.event_name == 'push')
with: with:
context: . context: .
@@ -101,3 +103,5 @@ jobs:
build-args: | build-args: |
VERSION=${{ steps.hatch.outputs.version }} VERSION=${{ steps.hatch.outputs.version }}
PORT=42110 PORT=42110
cache-from: type=gha,scope=${{ matrix.image }}
cache-to: type=gha,mode=max,scope=${{ matrix.image }}

View File

@@ -1,5 +1,5 @@
# syntax=docker/dockerfile:1 # syntax=docker/dockerfile:1
FROM ubuntu:jammy FROM ubuntu:jammy AS base
LABEL homepage="https://khoj.dev" LABEL homepage="https://khoj.dev"
LABEL repository="https://github.com/khoj-ai/khoj" LABEL repository="https://github.com/khoj-ai/khoj"
LABEL org.opencontainers.image.source="https://github.com/khoj-ai/khoj" LABEL org.opencontainers.image.source="https://github.com/khoj-ai/khoj"
@@ -10,44 +10,54 @@ RUN apt update -y && apt -y install \
python3-pip \ python3-pip \
swig \ swig \
curl \ curl \
# Required by llama-cpp-python pre-built wheels. See #1628
musl-dev \
# Required by RapidOCR # Required by RapidOCR
libgl1 \ libgl1 \
libglx-mesa0 \ libglx-mesa0 \
libglib2.0-0 && \ libglib2.0-0 \
# Required by Next.js Web app
curl -sL https://deb.nodesource.com/setup_20.x | bash - && \
curl -sS https://dl.yarnpkg.com/debian/pubkey.gpg | apt-key add - && \
echo "deb https://dl.yarnpkg.com/debian/ stable main" | tee /etc/apt/sources.list.d/yarn.list && \
apt update -y && apt -y --no-install-recommends install nodejs yarn && \
apt clean && rm -rf /var/lib/apt/lists/* && \
# Required by llama-cpp-python pre-built wheels. See #1628 # Required by llama-cpp-python pre-built wheels. See #1628
ln -s /usr/lib/x86_64-linux-musl/libc.so /lib/libc.musl-x86_64.so.1 musl-dev && \
ln -s /usr/lib/x86_64-linux-musl/libc.so /lib/libc.musl-x86_64.so.1 && \
# Clean up
apt clean && rm -rf /var/lib/apt/lists/*
# Install Application # Build Server
FROM base AS server-deps
WORKDIR /app WORKDIR /app
COPY pyproject.toml . COPY pyproject.toml .
COPY README.md . COPY README.md .
ARG VERSION=0.0.0 ARG VERSION=0.0.0
ENV PIP_EXTRA_INDEX_URL=https://abetlen.github.io/llama-cpp-python/whl/cpu # use the pre-built llama-cpp-python, torch cpu wheel
ENV PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu https://abetlen.github.io/llama-cpp-python/whl/cpu"
# avoid downloading unused cuda specific python packages
ENV CUDA_VISIBLE_DEVICES=""
RUN sed -i "s/dynamic = \\[\"version\"\\]/version = \"$VERSION\"/" pyproject.toml && \ RUN sed -i "s/dynamic = \\[\"version\"\\]/version = \"$VERSION\"/" pyproject.toml && \
pip install --no-cache-dir . pip install --no-cache-dir .
# Copy Source Code # Build Web App
COPY . . FROM node:20-alpine AS web-app
# Set build optimization env vars
# Set the PYTHONPATH environment variable in order for it to find the Django app. ENV NODE_ENV=production
ENV PYTHONPATH=/app/src:$PYTHONPATH ENV NEXT_TELEMETRY_DISABLED=1
# Go to the directory src/interface/web and export the built Next.js assets
WORKDIR /app/src/interface/web WORKDIR /app/src/interface/web
RUN bash -c "yarn install --frozen-lockfile && yarn ciexport && yarn cache clean" # Install dependencies first (cache layer)
COPY src/interface/web/package.json src/interface/web/yarn.lock ./
RUN yarn install --frozen-lockfile
# Copy source and build
COPY src/interface/web/. ./
RUN yarn build
# Merge the Server and Web App into a Single Image
FROM base
ENV PYTHONPATH=/app/src
WORKDIR /app WORKDIR /app
COPY --from=server-deps /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages
COPY --from=web-app /app/src/interface/web/out ./src/khoj/interface/built
COPY . .
RUN cd src && python3 khoj/manage.py collectstatic --noinput
# Run the Application # Run the Application
# There are more arguments required for the application to run, # There are more arguments required for the application to run,
# but these should be passed in through the docker-compose.yml file. # but those should be passed in through the docker-compose.yml file.
ARG PORT ARG PORT
EXPOSE ${PORT} EXPOSE ${PORT}
ENTRYPOINT ["python3", "src/khoj/main.py"] ENTRYPOINT ["python3", "src/khoj/main.py"]

View File

@@ -1,5 +1,5 @@
# syntax=docker/dockerfile:1 # syntax=docker/dockerfile:1
FROM ubuntu:jammy FROM ubuntu:jammy AS base
LABEL homepage="https://khoj.dev" LABEL homepage="https://khoj.dev"
LABEL repository="https://github.com/khoj-ai/khoj" LABEL repository="https://github.com/khoj-ai/khoj"
LABEL org.opencontainers.image.source="https://github.com/khoj-ai/khoj" LABEL org.opencontainers.image.source="https://github.com/khoj-ai/khoj"
@@ -16,38 +16,48 @@ RUN apt update -y && apt -y install \
curl \ curl \
# Required by llama-cpp-python pre-built wheels. See #1628 # Required by llama-cpp-python pre-built wheels. See #1628
musl-dev && \ musl-dev && \
# Required by Next.js Web app ln -s /usr/lib/x86_64-linux-musl/libc.so /lib/libc.musl-x86_64.so.1 && \
curl -sL https://deb.nodesource.com/setup_20.x | bash - && \ # Clean up
curl -sS https://dl.yarnpkg.com/debian/pubkey.gpg | apt-key add - && \ apt clean && rm -rf /var/lib/apt/lists/*
echo "deb https://dl.yarnpkg.com/debian/ stable main" | tee /etc/apt/sources.list.d/yarn.list && \
apt update -y && apt -y --no-install-recommends install nodejs yarn && \
apt clean && rm -rf /var/lib/apt/lists/* && \
# Required by llama-cpp-python pre-built wheels. See #1628
ln -s /usr/lib/x86_64-linux-musl/libc.so /lib/libc.musl-x86_64.so.1
# Install Application # Build Server
FROM base AS server-deps
WORKDIR /app WORKDIR /app
COPY pyproject.toml . COPY pyproject.toml .
COPY README.md . COPY README.md .
ARG VERSION=0.0.0 ARG VERSION=0.0.0
ENV PIP_EXTRA_INDEX_URL=https://abetlen.github.io/llama-cpp-python/whl/cpu # use the pre-built llama-cpp-python, torch cpu wheel
ENV PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu https://abetlen.github.io/llama-cpp-python/whl/cpu"
# avoid downloading unused cuda specific python packages
ENV CUDA_VISIBLE_DEVICES=""
RUN sed -i "s/dynamic = \\[\"version\"\\]/version = \"$VERSION\"/" pyproject.toml && \ RUN sed -i "s/dynamic = \\[\"version\"\\]/version = \"$VERSION\"/" pyproject.toml && \
pip install --no-cache-dir -e .[prod] pip install --no-cache-dir .[prod]
# Copy Source Code # Build Web App
COPY . . FROM node:20-alpine AS web-app
# Set build optimization env vars
# Set the PYTHONPATH environment variable in order for it to find the Django app. ENV NODE_ENV=production
ENV PYTHONPATH=/app/src:$PYTHONPATH ENV NEXT_TELEMETRY_DISABLED=1
# Go to the directory src/interface/web and export the built Next.js assets
WORKDIR /app/src/interface/web WORKDIR /app/src/interface/web
RUN bash -c "yarn install --frozen-lockfile && yarn ciexport && yarn cache clean" # Install dependencies first (cache layer)
COPY src/interface/web/package.json src/interface/web/yarn.lock ./
RUN yarn install --frozen-lockfile
# Copy source and build
COPY src/interface/web/. ./
RUN yarn build
# Merge the Server and Web App into a Single Image
FROM base
ENV PYTHONPATH=/app/src
WORKDIR /app WORKDIR /app
COPY --from=server-deps /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages
COPY --from=web-app /app/src/interface/web/out ./src/khoj/interface/built
COPY . .
RUN cd src && python3 khoj/manage.py collectstatic --noinput
# Run the Application # Run the Application
# There are more arguments required for the application to run, # There are more arguments required for the application to run,
# but these should be passed in through the docker-compose.yml file. # but those should be passed in through the docker-compose.yml file.
ARG PORT ARG PORT
EXPOSE ${PORT} EXPOSE ${PORT}
ENTRYPOINT ["gunicorn", "-c", "gunicorn-config.py", "src.khoj.main:app"] ENTRYPOINT ["gunicorn", "-c", "gunicorn-config.py", "src.khoj.main:app"]

View File

@@ -1,9 +1,7 @@
import type { Metadata } from "next"; import type { Metadata } from "next";
import { Noto_Sans } from "next/font/google"; import { noto_sans, noto_sans_arabic } from "@/app/fonts";
import "../globals.css"; import "../globals.css";
const inter = Noto_Sans({ subsets: ["latin"] });
export const metadata: Metadata = { export const metadata: Metadata = {
title: "Khoj AI - Agents", title: "Khoj AI - Agents",
description: "Find a specialized agent that can help you address more specific needs.", description: "Find a specialized agent that can help you address more specific needs.",
@@ -33,7 +31,7 @@ export default function RootLayout({
children: React.ReactNode; children: React.ReactNode;
}>) { }>) {
return ( return (
<html lang="en"> <html lang="en" className={`${noto_sans.variable} ${noto_sans_arabic.variable}`}>
<meta <meta
httpEquiv="Content-Security-Policy" httpEquiv="Content-Security-Policy"
content="default-src 'self' https://assets.khoj.dev; content="default-src 'self' https://assets.khoj.dev;
@@ -46,7 +44,7 @@ export default function RootLayout({
child-src 'none'; child-src 'none';
object-src 'none';" object-src 'none';"
></meta> ></meta>
<body className={inter.className}>{children}</body> <body>{children}</body>
</html> </html>
); );
} }

View File

@@ -1,9 +1,7 @@
import type { Metadata } from "next"; import type { Metadata } from "next";
import { Noto_Sans } from "next/font/google"; import { noto_sans, noto_sans_arabic } from "@/app/fonts";
import "../globals.css"; import "../globals.css";
const inter = Noto_Sans({ subsets: ["latin"] });
export const metadata: Metadata = { export const metadata: Metadata = {
title: "Khoj AI - Chat", title: "Khoj AI - Chat",
description: description:
@@ -34,7 +32,7 @@ export default function RootLayout({
children: React.ReactNode; children: React.ReactNode;
}>) { }>) {
return ( return (
<html lang="en"> <html lang="en" className={`${noto_sans.variable} ${noto_sans_arabic.variable}`}>
<meta <meta
httpEquiv="Content-Security-Policy" httpEquiv="Content-Security-Policy"
content="default-src 'self' https://assets.khoj.dev; content="default-src 'self' https://assets.khoj.dev;
@@ -47,7 +45,7 @@ export default function RootLayout({
child-src 'none'; child-src 'none';
object-src 'none';" object-src 'none';"
></meta> ></meta>
<body className={inter.className}> <body>
{children} {children}
<script <script
dangerouslySetInnerHTML={{ dangerouslySetInnerHTML={{

View File

@@ -0,0 +1,13 @@
import { Noto_Sans, Noto_Sans_Arabic } from "next/font/google";
export const noto_sans = Noto_Sans({
subsets: ["latin", "latin-ext", "cyrillic", "cyrillic-ext", "devanagari", "vietnamese"],
display: "swap",
variable: "--font-noto-sans",
});
export const noto_sans_arabic = Noto_Sans_Arabic({
subsets: ["arabic"],
display: "swap",
variable: "--font-noto-sans-arabic",
});

View File

@@ -1,7 +1,6 @@
@tailwind base; @tailwind base;
@tailwind components; @tailwind components;
@tailwind utilities; @tailwind utilities;
@import url("https://fonts.googleapis.com/css2?family=Noto+Sans+Arabic:wght@100..900&family=Noto+Sans:ital,wght@0,100..900;1,100..900&display=swap");
@layer base { @layer base {
:root { :root {
@@ -25,7 +24,7 @@
--input: 220 13% 91%; --input: 220 13% 91%;
--ring: 24.6 95% 53.1%; --ring: 24.6 95% 53.1%;
--radius: 0.5rem; --radius: 0.5rem;
--font-family: "Noto Sans", "Noto Sans Arabic", sans-serif !important; --font-family: var(--font-noto-sans), var(--font-noto-sans-arabic), sans-serif !important;
/* Khoj Custom Colors */ /* Khoj Custom Colors */
--frosted-background-color: 20 13% 95%; --frosted-background-color: 20 13% 95%;
@@ -188,7 +187,7 @@
--border: 0 0% 9%; --border: 0 0% 9%;
--input: 0 0% 9%; --input: 0 0% 9%;
--ring: 20.5 90.2% 48.2%; --ring: 20.5 90.2% 48.2%;
--font-family: "Noto Sans", "Noto Sans Arabic", sans-serif !important; --font-family: var(--font-noto-sans), var(--font-noto-sans-arabic), sans-serif !important;
/* Imported from highlight.js */ /* Imported from highlight.js */
pre code.hljs { pre code.hljs {

View File

@@ -1,9 +1,7 @@
import type { Metadata } from "next"; import type { Metadata } from "next";
import { Noto_Sans } from "next/font/google"; import { noto_sans, noto_sans_arabic } from "@/app/fonts";
import "./globals.css"; import "./globals.css";
const inter = Noto_Sans({ subsets: ["latin"] });
export const metadata: Metadata = { export const metadata: Metadata = {
title: "Khoj AI - Home", title: "Khoj AI - Home",
description: "Your Second Brain.", description: "Your Second Brain.",
@@ -39,7 +37,7 @@ export default function RootLayout({
children: React.ReactNode; children: React.ReactNode;
}>) { }>) {
return ( return (
<html lang="en"> <html lang="en" className={`${noto_sans.variable} ${noto_sans_arabic.variable}`}>
<meta <meta
httpEquiv="Content-Security-Policy" httpEquiv="Content-Security-Policy"
content="default-src 'self' https://assets.khoj.dev; content="default-src 'self' https://assets.khoj.dev;
@@ -52,7 +50,7 @@ export default function RootLayout({
child-src 'none'; child-src 'none';
object-src 'none';" object-src 'none';"
></meta> ></meta>
<body className={inter.className}>{children}</body> <body>{children}</body>
</html> </html>
); );
} }

View File

@@ -1,10 +1,8 @@
import type { Metadata } from "next"; import type { Metadata } from "next";
import { Noto_Sans } from "next/font/google"; import { noto_sans, noto_sans_arabic } from "@/app/fonts";
import "../globals.css"; import "../globals.css";
import { Toaster } from "@/components/ui/toaster"; import { Toaster } from "@/components/ui/toaster";
const inter = Noto_Sans({ subsets: ["latin"] });
export const metadata: Metadata = { export const metadata: Metadata = {
title: "Khoj AI - Settings", title: "Khoj AI - Settings",
description: "Configure Khoj to get personalized, deeper assistance.", description: "Configure Khoj to get personalized, deeper assistance.",
@@ -34,7 +32,7 @@ export default function RootLayout({
children: React.ReactNode; children: React.ReactNode;
}>) { }>) {
return ( return (
<html lang="en"> <html lang="en" className={`${noto_sans.variable} ${noto_sans_arabic.variable}`}>
<meta <meta
httpEquiv="Content-Security-Policy" httpEquiv="Content-Security-Policy"
content="default-src 'self' https://assets.khoj.dev; content="default-src 'self' https://assets.khoj.dev;
@@ -46,7 +44,7 @@ export default function RootLayout({
child-src 'none'; child-src 'none';
object-src 'none';" object-src 'none';"
></meta> ></meta>
<body className={inter.className}> <body>
{children} {children}
<Toaster /> <Toaster />
</body> </body>

View File

@@ -1,9 +1,7 @@
import type { Metadata } from "next"; import type { Metadata } from "next";
import { Noto_Sans } from "next/font/google"; import { noto_sans, noto_sans_arabic } from "@/app/fonts";
import "../../globals.css"; import "../../globals.css";
const inter = Noto_Sans({ subsets: ["latin"] });
export const metadata: Metadata = { export const metadata: Metadata = {
title: "Khoj AI - Chat", title: "Khoj AI - Chat",
description: "Use this page to view a chat with Khoj AI.", description: "Use this page to view a chat with Khoj AI.",
@@ -15,7 +13,7 @@ export default function RootLayout({
children: React.ReactNode; children: React.ReactNode;
}>) { }>) {
return ( return (
<html lang="en"> <html lang="en" className={`${noto_sans.variable} ${noto_sans_arabic.variable}`}>
<meta <meta
httpEquiv="Content-Security-Policy" httpEquiv="Content-Security-Policy"
content="default-src 'self' https://assets.khoj.dev; content="default-src 'self' https://assets.khoj.dev;
@@ -27,7 +25,7 @@ export default function RootLayout({
child-src 'none'; child-src 'none';
object-src 'none';" object-src 'none';"
></meta> ></meta>
<body className={inter.className}> <body>
{children} {children}
<script <script
dangerouslySetInnerHTML={{ dangerouslySetInnerHTML={{

View File

@@ -62,6 +62,9 @@
"react-hook-form": "^7.52.1", "react-hook-form": "^7.52.1",
"shadcn-ui": "^0.8.0", "shadcn-ui": "^0.8.0",
"swr": "^2.2.5", "swr": "^2.2.5",
"tailwind-merge": "^2.3.0",
"tailwindcss": "^3.4.6",
"tailwindcss-animate": "^1.0.7",
"typescript": "^5", "typescript": "^5",
"vaul": "^0.9.1", "vaul": "^0.9.1",
"zod": "^3.23.8" "zod": "^3.23.8"
@@ -82,9 +85,6 @@
"lint-staged": "^15.2.7", "lint-staged": "^15.2.7",
"nodemon": "^3.1.3", "nodemon": "^3.1.3",
"prettier": "3.3.3", "prettier": "3.3.3",
"tailwind-merge": "^2.3.0",
"tailwindcss": "^3.4.6",
"tailwindcss-animate": "^1.0.7",
"typescript": "^5" "typescript": "^5"
}, },
"prettier": { "prettier": {

View File

@@ -55,6 +55,9 @@ const config = {
}, },
}, },
extend: { extend: {
fontFamily: {
sans: ["var(--font-noto-sans)", "var(--font-noto-sans-arabic)"],
},
colors: { colors: {
border: "hsl(var(--border))", border: "hsl(var(--border))",
input: "hsl(var(--input))", input: "hsl(var(--input))",