feat(agents): enhance Dockerfile templates with comprehensive documentation

dwayn · dwayn · commit cc0689e24ff3 · 2025-08-11T19:34:49.000-05:00
- Add multi-stage build optimization guidance with examples
  - Add system package documentation for common use cases
  - Add detailed PyAudio installation guide with sounddevice alternative
  - Add troubleshooting sections for some common build/runtime issues
  - Improve comments explaining each Dockerfile command and best practices
  - Fix pip Dockerfile order (USER after chown for proper permissions)
  - Add Node.js native module compilation guidanc
diff --git a/pkg/agentfs/examples/node.Dockerfile b/pkg/agentfs/examples/node.Dockerfile
@@ -1,26 +1,126 @@
-# This is an example Dockerfile that builds a minimal container for running LK Agents
+# This Dockerfile creates a production-ready container for a LiveKit Node.js agent
+# It uses a multi-stage build to minimize the final image size
 # syntax=docker/dockerfile:1
+
+# === MULTI-STAGE BUILD STRUCTURE ===
+# Stage 1 (base): Sets up Node.js environment with pnpm
+# Stage 2 (build): Installs dependencies and builds the application
+# Stage 3 (final): Copies only necessary files for runtime
+#
+# Benefits: Smaller final image without build tools and source files
+# Final image contains only: compiled JS, node_modules, and runtime dependencies
+
 FROM node:20-slim AS base
 
+# Set the working directory where our application will live
 WORKDIR /app
 
+# Install pnpm globally for faster, more efficient package management
+# pnpm uses a content-addressable storage for packages, saving disk space
 RUN npm install -g pnpm@9.7.0
 
-# Throw away build stage to reduce size of final image
+# === BUILD STAGE ===
+# This stage is discarded after building, keeping the final image small
 FROM base AS build
 
+# Install CA certificates for HTTPS connections during package installation
+# --no-install-recommends keeps the image smaller by avoiding suggested packages
 RUN apt-get update -qq && apt-get install --no-install-recommends -y ca-certificates
+
+# Copy all application files into the build container
+# --link creates a separate layer that can be reused if files haven't changed
 COPY --link . .
 
+# Install dependencies using pnpm
+# --frozen-lockfile ensures exact versions from pnpm-lock.yaml are used
+# This provides reproducible builds across different environments
 RUN pnpm install --frozen-lockfile
+
+# Build the TypeScript application
+# This compiles TypeScript to JavaScript and prepares for production
 RUN pnpm run build
 
+# === FINAL PRODUCTION STAGE ===
+# Start from the base image without build tools
 FROM base
+
+# Copy the built application from the build stage
+# This includes node_modules and compiled JavaScript files
 COPY --from=build /app /app
+
+# Copy SSL certificates for HTTPS connections at runtime
 COPY --from=build /etc/ssl/certs /etc/ssl/certs
 
-# Start the server by default, this can be overwritten at runtime
+# Expose the healthcheck port
+# This allows Docker and orchestration systems to check if the container is healthy
 EXPOSE 8081
 
+# Run the application
+# The "start" command tells the agent to connect to LiveKit and begin waiting for jobs
+# Modify the path if your entry point is different (e.g., ./dist/index.js)
 CMD [ "node", "./dist/agent.js", "start" ]
 
+# === COMMON CUSTOMIZATIONS ===
+#
+# 1. Using npm or yarn instead of pnpm:
+#    Replace pnpm commands with npm or yarn equivalents:
+#    - npm: RUN npm ci (instead of pnpm install --frozen-lockfile)
+#    - yarn: RUN yarn install --frozen-lockfile
+#
+# 2. Installing system dependencies for native modules:
+#    Some Node.js packages require system libraries. Add before COPY in build stage:
+#    
+#    # For packages with native C++ addons:
+#    RUN apt-get update -qq && apt-get install --no-install-recommends -y \
+#        ca-certificates \
+#        python3 \
+#        make \
+#        g++ \
+#        && rm -rf /var/lib/apt/lists/*
+#
+# 3. Different entry point locations:
+#    - If using src/index.js: CMD ["node", "./src/index.js", "start"]
+#    - If using dist/main.js: CMD ["node", "./dist/main.js", "start"]
+#    - For development: CMD ["npm", "run", "dev"]
+#
+# 4. Environment variables:
+#    Set Node.js environment for production:
+#    ENV NODE_ENV=production
+#
+# 5. Running as non-root user (recommended for security):
+#    Add before the final CMD:
+#    RUN adduser --disabled-password --gecos "" --uid 10001 appuser
+#    USER appuser
+#
+# === TROUBLESHOOTING COMMON ISSUES ===
+#
+# 1. "Module not found" errors:
+#    - Ensure all dependencies are in package.json
+#    - Check that build output is in the expected location
+#    - Verify node_modules are copied correctly
+#
+# 2. "EACCES: permission denied" errors:
+#    - Add a non-root user (see example above)
+#    - Ensure files have correct permissions
+#
+# 3. Large image sizes:
+#    - Use node:20-alpine instead of node:20-slim for smaller base
+#    - Ensure .dockerignore excludes unnecessary files
+#    - Consider using npm prune --production after build
+#
+# 4. Slow builds:
+#    - Use Docker BuildKit: DOCKER_BUILDKIT=1 docker build
+#    - Order COPY commands from least to most frequently changed
+#    - Copy package.json and lock file before source code for better caching
+#
+# 5. Native module compilation issues:
+#    - Install build tools in the build stage (see customization #2)
+#    - For node-gyp: apt-get install python3 make g++
+#    - Consider using prebuilt binaries when available
+#
+# 6. Runtime connection issues:
+#    - Verify the agent can reach the LiveKit server
+#    - Check that required environment variables are set
+#    - Ensure the healthcheck endpoint (8081) is accessible
+#
+# For more help: https://docs.livekit.io/agents/
diff --git a/pkg/agentfs/examples/python.pip.Dockerfile b/pkg/agentfs/examples/python.pip.Dockerfile
@@ -1,5 +1,22 @@
-# This is an example Dockerfile that builds a minimal container for running LK Agents
+# This Dockerfile creates a production-ready container for a LiveKit agent using pip
 # syntax=docker/dockerfile:1
+#
+# === MULTI-STAGE BUILD OPTIMIZATION ===
+# For smaller production images, consider using a multi-stage build:
+# Stage 1: Build dependencies and compile packages  
+# Stage 2: Copy only the compiled packages to a clean runtime image
+#
+# Example multi-stage build structure:
+# FROM python:3.11-slim AS builder
+# [install build tools, compile packages]
+# FROM python:3.11-slim AS runtime
+# COPY --from=builder /home/appuser/.local /home/appuser/.local
+# [runtime setup only]
+#
+# Benefits: 30-50% smaller final image size
+# Trade-offs: Longer build time, more complex debugging
+# Use when: Image size is critical (e.g., serverless, edge deployment)
+
 ARG PYTHON_VERSION=3.11.6
 FROM python:${PYTHON_VERSION}-slim
 
@@ -21,32 +38,182 @@ RUN adduser \
     --uid "${UID}" \
     appuser
 
-
-# Install gcc and other build dependencies.
+# Install build dependencies required for Python packages with native extensions
+#
+# Common system packages you might need (uncomment and modify as needed):
+#
+# === Core Build Tools ===
+# - gcc/g++: C/C++ compilers for building packages with native extensions
+# - python3-dev: Python development headers needed for compilation
+# - build-essential: Essential build tools (includes gcc, make, etc.)
+# - pkg-config: Tool for managing library compilation/linking flags
+#
+# === Audio Processing ===
+# For audio agents (pyaudio, soundfile, librosa):
+# - libasound2-dev: ALSA development headers
+# - libportaudio2: Cross-platform audio I/O library
+# - libsndfile1-dev: Library for reading/writing audio files
+# - ffmpeg: Audio/video processing (for format conversion)
+#
+# === Computer Vision ===
+# For image/video processing (opencv, pillow):
+# - libopencv-dev: OpenCV development headers
+# - libjpeg-dev: JPEG image format support
+# - libpng-dev: PNG image format support
+# - libwebp-dev: WebP image format support
+# - libtiff5-dev: TIFF image format support
+#
+# === Machine Learning ===
+# For ML/AI packages (scipy, numpy, scikit-learn):
+# - libblas-dev: Basic Linear Algebra Subprograms
+# - liblapack-dev: Linear Algebra Package
+# - libatlas-base-dev: Automatically Tuned Linear Algebra Software
+# - gfortran: Fortran compiler (needed for some numerical libraries)
+#
+# === Database & Networking ===
+# - libpq-dev: PostgreSQL development headers
+# - libmysqlclient-dev: MySQL development headers
+# - libssl-dev: SSL/TLS support for cryptographic packages
+# - libffi-dev: Foreign Function Interface library for cffi
+# - libcurl4-openssl-dev: HTTP client library
+#
+# === Examples for Common Use Cases ===
+#
+# Audio Processing Agent:
+# RUN apt-get update && \
+#     apt-get install -y \
+#     gcc python3-dev \
+#     libasound2-dev libportaudio2 libsndfile1-dev \
+#     ffmpeg \
+#     && rm -rf /var/lib/apt/lists/*
+#
+# Computer Vision Agent:
+# RUN apt-get update && \
+#     apt-get install -y \
+#     gcc python3-dev \
+#     libopencv-dev libjpeg-dev libpng-dev libwebp-dev \
+#     && rm -rf /var/lib/apt/lists/*
+#
+# Machine Learning Agent:
+# RUN apt-get update && \
+#     apt-get install -y \
+#     gcc g++ gfortran python3-dev \
+#     libblas-dev liblapack-dev libatlas-base-dev \
+#     && rm -rf /var/lib/apt/lists/*
+#
+# === SPECIAL CASE: PyAudio Installation Guide ===
+# PyAudio is a common audio library that requires special attention.
+# If you need PyAudio, use this complete setup:
+#
+# RUN apt-get update && \
+#     apt-get install -y \
+#     gcc python3-dev \
+#     libasound2-dev libportaudio2 portaudio19-dev \
+#     && rm -rf /var/lib/apt/lists/*
+#
+# Important notes for PyAudio:
+# 1. Install system packages BEFORE installing Python packages
+# 2. Use portaudio19-dev (not just libportaudio2) for full compatibility
+# 3. Some systems may also need: libportaudiocpp0 libportaudio0
+# 4. For production use, consider using sounddevice instead of pyaudio
+#    (sounddevice is more modern and has better error handling)
+#
+# Alternative for audio: Use soundfile + sounddevice instead of pyaudio:
+# RUN apt-get update && \
+#     apt-get install -y \
+#     gcc python3-dev \
+#     libasound2-dev libsndfile1-dev \
+#     && rm -rf /var/lib/apt/lists/*
+#
+# Then in your requirements: soundfile sounddevice (instead of pyaudio)
+#
+# Minimal setup (works for most pure Python packages):
 RUN apt-get update && \
     apt-get install -y \
     gcc \
     python3-dev \
     && rm -rf /var/lib/apt/lists/*
 
-USER appuser
-
-RUN mkdir -p /home/appuser/.cache
-RUN chown -R appuser /home/appuser/.cache
-
+# Set the working directory to the user's home directory
+# This is where our application code will live
 WORKDIR /home/appuser
 
+# Copy requirements first for better Docker layer caching
+# If requirements don't change, Docker can reuse the pip install layer
 COPY requirements.txt .
-RUN python -m pip install --user --no-cache-dir -r requirements.txt
 
+# Install Python dependencies as root (needed for system-wide packages)
+# --no-cache-dir reduces image size by not caching pip downloads
+RUN python -m pip install --no-cache-dir -r requirements.txt
+
+# Copy all application files into the container
+# This includes source code, configuration files, etc.
+# (Excludes files specified in .dockerignore)
 COPY . .
 
-# ensure that any dependent models are downloaded at build-time
+# Change ownership of all app files to the non-privileged user
+# This ensures the application can read/write files as needed
+RUN chown -R appuser:appuser /home/appuser
+
+# Switch to the non-privileged user for all subsequent operations
+# This improves security by not running as root
+USER appuser
+
+# Create a cache directory for the user
+# This is used by pip and Python for caching packages and bytecode
+RUN mkdir -p /home/appuser/.cache
+
+# Pre-download any ML models or files the agent needs
+# This ensures the container is ready to run immediately without downloading
+# dependencies at runtime, which improves startup time and reliability
 RUN python "$PROGRAM_MAIN" download-files
 
-# expose healthcheck port
+# Expose the healthcheck port
+# This allows Docker and orchestration systems to check if the container is healthy
 EXPOSE 8081
 
-# Run the application.
+# Run the application
+# The "start" command tells the worker to connect to LiveKit and begin waiting for jobs
 CMD ["python", "$PROGRAM_MAIN", "start"]
 
+# === TROUBLESHOOTING COMMON BUILD ISSUES ===
+#
+# 1. "Package not found" or compilation errors:
+#    - Check that required system packages are installed (see examples above)
+#    - Ensure packages are installed BEFORE Python package installation
+#    - For C extensions: you need gcc and python3-dev
+#
+# 2. "Permission denied" errors:
+#    - Verify USER appuser comes after chown commands
+#    - Check that working directory is /home/appuser
+#    - Make sure all files are owned by appuser:appuser
+#
+# 3. "requirements.txt not found" or install errors:
+#    - Ensure requirements.txt is in your project root
+#    - Check that requirements.txt is not in .dockerignore
+#    - Pin package versions for reproducible builds (e.g., flask==2.3.0)
+#
+# 4. Large image sizes:
+#    - Consider multi-stage builds for production
+#    - Remove unnecessary packages after installation
+#    - Use .dockerignore to exclude large files
+#    - Consider switching to Alpine Linux for smaller base image
+#
+# 5. Slow builds:
+#    - Consider switching to UV for faster dependency resolution
+#    - Use Docker BuildKit for better caching: DOCKER_BUILDKIT=1 docker build
+#    - Order Dockerfile commands from least to most frequently changed
+#    - Copy requirements.txt before other files for better layer caching
+#
+# 6. Runtime issues:
+#    - Check healthcheck endpoint (port 8081)
+#    - Verify environment variables are set
+#    - Ensure agent can connect to LiveKit server
+#    - Check that required models/files are downloaded
+#
+# 7. Audio/video issues:
+#    - Install required system libraries (see PyAudio guide above)
+#    - Test with minimal audio setup first
+#    - Consider using sounddevice instead of pyaudio
+#
+# For more help: https://docs.livekit.io/agents/
diff --git a/pkg/agentfs/examples/python.uv.Dockerfile b/pkg/agentfs/examples/python.uv.Dockerfile