Speed up Docker image builds using multi-stage parallel pipelines (#987)

## Objective
Improve build speed and size of khoj docker images

## Changes
### Improve docker image build speeds
  - Decouple web app and server build steps
  - Build the web app and server in parallel
  - Cache docker layers for reuse across dockerize github workflow runs
    - Split Docker build layers for improved cacheability (e.g separate `yarn install` and `yarn build` steps)
### Reduce size of khoj docker images 
  - Use an up-to-date `.dockerignore` to exclude unnecessary directories
  - Do not installing cuda python packages for cpu builds
### Improve web app builds
  - Use consistent mechanism to get fonts for web app
  - Make tailwind extensions production instead of dev dependencies
  - Make next.js create production builds for the web app (via `NODE_ENV=production` env var)
This commit is contained in:
Debanjum
2024-11-24 21:49:46 -08:00
committed by GitHub
13 changed files with 112 additions and 82 deletions

View File

@@ -1,10 +1,11 @@
.git/
.pytest_cache/
.vscode/
.venv/
docs/
.*
**/__pycache__/
*.egg-info/
documentation/
tests/
build/
dist/
scripts/
*.egg-info/
src/interface/
src/telemetry/
!src/interface/web

View File

@@ -73,7 +73,7 @@ jobs:
run: rm -rf /opt/hostedtoolcache
- name: 📦 Build and Push Docker Image
uses: docker/build-push-action@v2
uses: docker/build-push-action@v4
if: (matrix.image == 'local' && github.event_name == 'workflow_dispatch') && github.event.inputs.khoj == 'true' || (matrix.image == 'local' && github.event_name == 'push')
with:
context: .
@@ -86,9 +86,11 @@ jobs:
build-args: |
VERSION=${{ steps.hatch.outputs.version }}
PORT=42110
cache-from: type=gha,scope=${{ matrix.image }}
cache-to: type=gha,mode=max,scope=${{ matrix.image }}
- name: 📦️⛅️ Build and Push Cloud Docker Image
uses: docker/build-push-action@v2
uses: docker/build-push-action@v4
if: (matrix.image == 'cloud' && github.event_name == 'workflow_dispatch') && github.event.inputs.khoj-cloud == 'true' || (matrix.image == 'cloud' && github.event_name == 'push')
with:
context: .
@@ -101,3 +103,5 @@ jobs:
build-args: |
VERSION=${{ steps.hatch.outputs.version }}
PORT=42110
cache-from: type=gha,scope=${{ matrix.image }}
cache-to: type=gha,mode=max,scope=${{ matrix.image }}

View File

@@ -1,5 +1,5 @@
# syntax=docker/dockerfile:1
FROM ubuntu:jammy
FROM ubuntu:jammy AS base
LABEL homepage="https://khoj.dev"
LABEL repository="https://github.com/khoj-ai/khoj"
LABEL org.opencontainers.image.source="https://github.com/khoj-ai/khoj"
@@ -10,44 +10,54 @@ RUN apt update -y && apt -y install \
python3-pip \
swig \
curl \
# Required by llama-cpp-python pre-built wheels. See #1628
musl-dev \
# Required by RapidOCR
libgl1 \
libglx-mesa0 \
libglib2.0-0 && \
# Required by Next.js Web app
curl -sL https://deb.nodesource.com/setup_20.x | bash - && \
curl -sS https://dl.yarnpkg.com/debian/pubkey.gpg | apt-key add - && \
echo "deb https://dl.yarnpkg.com/debian/ stable main" | tee /etc/apt/sources.list.d/yarn.list && \
apt update -y && apt -y --no-install-recommends install nodejs yarn && \
apt clean && rm -rf /var/lib/apt/lists/* && \
libglib2.0-0 \
# Required by llama-cpp-python pre-built wheels. See #1628
ln -s /usr/lib/x86_64-linux-musl/libc.so /lib/libc.musl-x86_64.so.1
musl-dev && \
ln -s /usr/lib/x86_64-linux-musl/libc.so /lib/libc.musl-x86_64.so.1 && \
# Clean up
apt clean && rm -rf /var/lib/apt/lists/*
# Install Application
# Build Server
FROM base AS server-deps
WORKDIR /app
COPY pyproject.toml .
COPY README.md .
ARG VERSION=0.0.0
ENV PIP_EXTRA_INDEX_URL=https://abetlen.github.io/llama-cpp-python/whl/cpu
# use the pre-built llama-cpp-python, torch cpu wheel
ENV PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu https://abetlen.github.io/llama-cpp-python/whl/cpu"
# avoid downloading unused cuda specific python packages
ENV CUDA_VISIBLE_DEVICES=""
RUN sed -i "s/dynamic = \\[\"version\"\\]/version = \"$VERSION\"/" pyproject.toml && \
pip install --no-cache-dir .
# Copy Source Code
COPY . .
# Set the PYTHONPATH environment variable in order for it to find the Django app.
ENV PYTHONPATH=/app/src:$PYTHONPATH
# Go to the directory src/interface/web and export the built Next.js assets
# Build Web App
FROM node:20-alpine AS web-app
# Set build optimization env vars
ENV NODE_ENV=production
ENV NEXT_TELEMETRY_DISABLED=1
WORKDIR /app/src/interface/web
RUN bash -c "yarn install --frozen-lockfile && yarn ciexport && yarn cache clean"
# Install dependencies first (cache layer)
COPY src/interface/web/package.json src/interface/web/yarn.lock ./
RUN yarn install --frozen-lockfile
# Copy source and build
COPY src/interface/web/. ./
RUN yarn build
# Merge the Server and Web App into a Single Image
FROM base
ENV PYTHONPATH=/app/src
WORKDIR /app
COPY --from=server-deps /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages
COPY --from=web-app /app/src/interface/web/out ./src/khoj/interface/built
COPY . .
RUN cd src && python3 khoj/manage.py collectstatic --noinput
# Run the Application
# There are more arguments required for the application to run,
# but these should be passed in through the docker-compose.yml file.
# but those should be passed in through the docker-compose.yml file.
ARG PORT
EXPOSE ${PORT}
ENTRYPOINT ["python3", "src/khoj/main.py"]

View File

@@ -1,5 +1,5 @@
# syntax=docker/dockerfile:1
FROM ubuntu:jammy
FROM ubuntu:jammy AS base
LABEL homepage="https://khoj.dev"
LABEL repository="https://github.com/khoj-ai/khoj"
LABEL org.opencontainers.image.source="https://github.com/khoj-ai/khoj"
@@ -16,38 +16,48 @@ RUN apt update -y && apt -y install \
curl \
# Required by llama-cpp-python pre-built wheels. See #1628
musl-dev && \
# Required by Next.js Web app
curl -sL https://deb.nodesource.com/setup_20.x | bash - && \
curl -sS https://dl.yarnpkg.com/debian/pubkey.gpg | apt-key add - && \
echo "deb https://dl.yarnpkg.com/debian/ stable main" | tee /etc/apt/sources.list.d/yarn.list && \
apt update -y && apt -y --no-install-recommends install nodejs yarn && \
apt clean && rm -rf /var/lib/apt/lists/* && \
# Required by llama-cpp-python pre-built wheels. See #1628
ln -s /usr/lib/x86_64-linux-musl/libc.so /lib/libc.musl-x86_64.so.1
ln -s /usr/lib/x86_64-linux-musl/libc.so /lib/libc.musl-x86_64.so.1 && \
# Clean up
apt clean && rm -rf /var/lib/apt/lists/*
# Install Application
# Build Server
FROM base AS server-deps
WORKDIR /app
COPY pyproject.toml .
COPY README.md .
ARG VERSION=0.0.0
ENV PIP_EXTRA_INDEX_URL=https://abetlen.github.io/llama-cpp-python/whl/cpu
# use the pre-built llama-cpp-python, torch cpu wheel
ENV PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu https://abetlen.github.io/llama-cpp-python/whl/cpu"
# avoid downloading unused cuda specific python packages
ENV CUDA_VISIBLE_DEVICES=""
RUN sed -i "s/dynamic = \\[\"version\"\\]/version = \"$VERSION\"/" pyproject.toml && \
pip install --no-cache-dir -e .[prod]
pip install --no-cache-dir .[prod]
# Copy Source Code
COPY . .
# Set the PYTHONPATH environment variable in order for it to find the Django app.
ENV PYTHONPATH=/app/src:$PYTHONPATH
# Go to the directory src/interface/web and export the built Next.js assets
# Build Web App
FROM node:20-alpine AS web-app
# Set build optimization env vars
ENV NODE_ENV=production
ENV NEXT_TELEMETRY_DISABLED=1
WORKDIR /app/src/interface/web
RUN bash -c "yarn install --frozen-lockfile && yarn ciexport && yarn cache clean"
# Install dependencies first (cache layer)
COPY src/interface/web/package.json src/interface/web/yarn.lock ./
RUN yarn install --frozen-lockfile
# Copy source and build
COPY src/interface/web/. ./
RUN yarn build
# Merge the Server and Web App into a Single Image
FROM base
ENV PYTHONPATH=/app/src
WORKDIR /app
COPY --from=server-deps /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages
COPY --from=web-app /app/src/interface/web/out ./src/khoj/interface/built
COPY . .
RUN cd src && python3 khoj/manage.py collectstatic --noinput
# Run the Application
# There are more arguments required for the application to run,
# but these should be passed in through the docker-compose.yml file.
# but those should be passed in through the docker-compose.yml file.
ARG PORT
EXPOSE ${PORT}
ENTRYPOINT ["gunicorn", "-c", "gunicorn-config.py", "src.khoj.main:app"]

View File

@@ -1,9 +1,7 @@
import type { Metadata } from "next";
import { Noto_Sans } from "next/font/google";
import { noto_sans, noto_sans_arabic } from "@/app/fonts";
import "../globals.css";
const inter = Noto_Sans({ subsets: ["latin"] });
export const metadata: Metadata = {
title: "Khoj AI - Agents",
description: "Find a specialized agent that can help you address more specific needs.",
@@ -33,7 +31,7 @@ export default function RootLayout({
children: React.ReactNode;
}>) {
return (
<html lang="en">
<html lang="en" className={`${noto_sans.variable} ${noto_sans_arabic.variable}`}>
<meta
httpEquiv="Content-Security-Policy"
content="default-src 'self' https://assets.khoj.dev;
@@ -46,7 +44,7 @@ export default function RootLayout({
child-src 'none';
object-src 'none';"
></meta>
<body className={inter.className}>{children}</body>
<body>{children}</body>
</html>
);
}

View File

@@ -1,9 +1,7 @@
import type { Metadata } from "next";
import { Noto_Sans } from "next/font/google";
import { noto_sans, noto_sans_arabic } from "@/app/fonts";
import "../globals.css";
const inter = Noto_Sans({ subsets: ["latin"] });
export const metadata: Metadata = {
title: "Khoj AI - Chat",
description:
@@ -34,7 +32,7 @@ export default function RootLayout({
children: React.ReactNode;
}>) {
return (
<html lang="en">
<html lang="en" className={`${noto_sans.variable} ${noto_sans_arabic.variable}`}>
<meta
httpEquiv="Content-Security-Policy"
content="default-src 'self' https://assets.khoj.dev;
@@ -47,7 +45,7 @@ export default function RootLayout({
child-src 'none';
object-src 'none';"
></meta>
<body className={inter.className}>
<body>
{children}
<script
dangerouslySetInnerHTML={{

View File

@@ -0,0 +1,13 @@
import { Noto_Sans, Noto_Sans_Arabic } from "next/font/google";
export const noto_sans = Noto_Sans({
subsets: ["latin", "latin-ext", "cyrillic", "cyrillic-ext", "devanagari", "vietnamese"],
display: "swap",
variable: "--font-noto-sans",
});
export const noto_sans_arabic = Noto_Sans_Arabic({
subsets: ["arabic"],
display: "swap",
variable: "--font-noto-sans-arabic",
});

View File

@@ -1,7 +1,6 @@
@tailwind base;
@tailwind components;
@tailwind utilities;
@import url("https://fonts.googleapis.com/css2?family=Noto+Sans+Arabic:wght@100..900&family=Noto+Sans:ital,wght@0,100..900;1,100..900&display=swap");
@layer base {
:root {
@@ -25,7 +24,7 @@
--input: 220 13% 91%;
--ring: 24.6 95% 53.1%;
--radius: 0.5rem;
--font-family: "Noto Sans", "Noto Sans Arabic", sans-serif !important;
--font-family: var(--font-noto-sans), var(--font-noto-sans-arabic), sans-serif !important;
/* Khoj Custom Colors */
--frosted-background-color: 20 13% 95%;
@@ -188,7 +187,7 @@
--border: 0 0% 9%;
--input: 0 0% 9%;
--ring: 20.5 90.2% 48.2%;
--font-family: "Noto Sans", "Noto Sans Arabic", sans-serif !important;
--font-family: var(--font-noto-sans), var(--font-noto-sans-arabic), sans-serif !important;
/* Imported from highlight.js */
pre code.hljs {

View File

@@ -1,9 +1,7 @@
import type { Metadata } from "next";
import { Noto_Sans } from "next/font/google";
import { noto_sans, noto_sans_arabic } from "@/app/fonts";
import "./globals.css";
const inter = Noto_Sans({ subsets: ["latin"] });
export const metadata: Metadata = {
title: "Khoj AI - Home",
description: "Your Second Brain.",
@@ -39,7 +37,7 @@ export default function RootLayout({
children: React.ReactNode;
}>) {
return (
<html lang="en">
<html lang="en" className={`${noto_sans.variable} ${noto_sans_arabic.variable}`}>
<meta
httpEquiv="Content-Security-Policy"
content="default-src 'self' https://assets.khoj.dev;
@@ -52,7 +50,7 @@ export default function RootLayout({
child-src 'none';
object-src 'none';"
></meta>
<body className={inter.className}>{children}</body>
<body>{children}</body>
</html>
);
}

View File

@@ -1,10 +1,8 @@
import type { Metadata } from "next";
import { Noto_Sans } from "next/font/google";
import { noto_sans, noto_sans_arabic } from "@/app/fonts";
import "../globals.css";
import { Toaster } from "@/components/ui/toaster";
const inter = Noto_Sans({ subsets: ["latin"] });
export const metadata: Metadata = {
title: "Khoj AI - Settings",
description: "Configure Khoj to get personalized, deeper assistance.",
@@ -34,7 +32,7 @@ export default function RootLayout({
children: React.ReactNode;
}>) {
return (
<html lang="en">
<html lang="en" className={`${noto_sans.variable} ${noto_sans_arabic.variable}`}>
<meta
httpEquiv="Content-Security-Policy"
content="default-src 'self' https://assets.khoj.dev;
@@ -46,7 +44,7 @@ export default function RootLayout({
child-src 'none';
object-src 'none';"
></meta>
<body className={inter.className}>
<body>
{children}
<Toaster />
</body>

View File

@@ -1,9 +1,7 @@
import type { Metadata } from "next";
import { Noto_Sans } from "next/font/google";
import { noto_sans, noto_sans_arabic } from "@/app/fonts";
import "../../globals.css";
const inter = Noto_Sans({ subsets: ["latin"] });
export const metadata: Metadata = {
title: "Khoj AI - Chat",
description: "Use this page to view a chat with Khoj AI.",
@@ -15,7 +13,7 @@ export default function RootLayout({
children: React.ReactNode;
}>) {
return (
<html lang="en">
<html lang="en" className={`${noto_sans.variable} ${noto_sans_arabic.variable}`}>
<meta
httpEquiv="Content-Security-Policy"
content="default-src 'self' https://assets.khoj.dev;
@@ -27,7 +25,7 @@ export default function RootLayout({
child-src 'none';
object-src 'none';"
></meta>
<body className={inter.className}>
<body>
{children}
<script
dangerouslySetInnerHTML={{

View File

@@ -62,6 +62,9 @@
"react-hook-form": "^7.52.1",
"shadcn-ui": "^0.8.0",
"swr": "^2.2.5",
"tailwind-merge": "^2.3.0",
"tailwindcss": "^3.4.6",
"tailwindcss-animate": "^1.0.7",
"typescript": "^5",
"vaul": "^0.9.1",
"zod": "^3.23.8"
@@ -82,9 +85,6 @@
"lint-staged": "^15.2.7",
"nodemon": "^3.1.3",
"prettier": "3.3.3",
"tailwind-merge": "^2.3.0",
"tailwindcss": "^3.4.6",
"tailwindcss-animate": "^1.0.7",
"typescript": "^5"
},
"prettier": {

View File

@@ -55,6 +55,9 @@ const config = {
},
},
extend: {
fontFamily: {
sans: ["var(--font-noto-sans)", "var(--font-noto-sans-arabic)"],
},
colors: {
border: "hsl(var(--border))",
input: "hsl(var(--input))",