diff --git a/Dockerfile b/Dockerfile index f4f465f775..11fff9a797 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,125 +1,151 @@ -FROM ghcr.io/astral-sh/uv:0.8.6-python3.12-bookworm-slim - -ENV HOST=0.0.0.0 -ENV PORT=8000 -# Enable bytecode compilation -ENV UV_COMPILE_BYTECODE=1 -# Copy from the cache instead of linking since it's a mounted volume -ENV UV_LINK_MODE=copy - -# Install required apt packages (this now creates ALL cache directories) -COPY scripts/install-packages.sh . -RUN chmod +x install-packages.sh && \ - ./install-packages.sh && \ - rm install-packages.sh - -COPY scripts/auto-update.sh ./auto-update.sh -RUN chmod +x auto-update.sh && \ - ./auto-update.sh && \ - rm auto-update.sh - -# Create the apiuser with specific UID/GID -RUN groupadd -g 1001 apiuser && \ - useradd -m -u 1001 -g apiuser apiuser - -# Ensure all required directories exist and copy pre-cached files -RUN mkdir -p /home/apiuser/.cache/uv /home/apiuser/.cache/deno /home/apiuser/.cache/s3 /home/apiuser/.cache/tmp && \ - mkdir -p /home/apiuser/.local/lib/node_modules && \ - cp -r /opt/deno-cache/* /home/apiuser/.cache/deno/ 2>/dev/null || true && \ - cp -r /opt/node_modules/* /home/apiuser/.local/lib/node_modules/ 2>/dev/null || true && \ - rm -rf /opt/deno-cache /opt/node_modules - -# Set environment variables for Python and package managers -ENV PYTHONUSERBASE="/home/apiuser/.local" -ENV UV_CACHE_DIR="/home/apiuser/.cache/uv" -ENV PYTHONPATH=/home/apiuser/.local:$PYTHONPATH -ENV PATH=/home/apiuser/.local/bin:$PATH - -# Set deno environment variables to use user-owned directories -ENV DENO_DIR="/home/apiuser/.cache/deno" -ENV NODE_MODULES_DIR="/home/apiuser/.local/lib/node_modules" +# syntax=docker/dockerfile:1.7 + +######################### +# Builder (has compilers, pip/uv tooling, etc.) +######################### +FROM cgr.dev/tracecat.com/python:3.12-dev AS builder + +# We’ll work as root for installs, then return to nonroot for runtime. +USER root + +ENV HOST=0.0.0.0 \ + PORT=8000 \ + UV_COMPILE_BYTECODE=1 \ + UV_LINK_MODE=copy \ + PYTHONUSERBASE="/home/nonroot/.local" \ + UV_CACHE_DIR="/home/nonroot/.cache/uv" \ + PYTHONPATH=/home/nonroot/.local:$PYTHONPATH \ + PATH=/home/nonroot/.local/bin:$PATH \ + DENO_DIR="/home/nonroot/.cache/deno" \ + NODE_MODULES_DIR="/home/nonroot/.local/lib/node_modules" \ + TMPDIR="/home/nonroot/.cache/tmp" \ + TEMP="/home/nonroot/.cache/tmp" \ + TMP="/home/nonroot/.cache/tmp" \ + UV_PYTHON=/usr/bin/python UV_PYTHON_DOWNLOADS=never -# Set temporary directory environment variables for apiuser -ENV TMPDIR="/home/apiuser/.cache/tmp" -ENV TEMP="/home/apiuser/.cache/tmp" -ENV TMP="/home/apiuser/.cache/tmp" - -# Temp directory is now created above with other cache directories - -# Set the working directory inside the container WORKDIR /app -# Install the project's dependencies using the lockfile and settings -# WIHTOUT installing the project for better caching +# (Optional) System packages, Deno, Node caches, etc. +# Keep your script idempotent and non-interactive. +COPY scripts/install-packages.sh /tmp/install-packages.sh +RUN chmod +x /tmp/install-packages.sh && \ + /tmp/install-packages.sh && \ + rm -f /tmp/install-packages.sh + +# Prepare user-owned dirs and copy any pre-cached bits +RUN mkdir -p /home/nonroot/.cache/uv \ + /home/nonroot/.cache/deno \ + /home/nonroot/.cache/s3 \ + /home/nonroot/.cache/tmp \ + /home/nonroot/.local/lib/node_modules +# If your script staged caches under /opt, move them into the user-owned locations +# (These globs are best-effort; ignore if missing) +RUN cp -r /opt/deno-cache/* /home/nonroot/.cache/deno/ 2>/dev/null || true && \ + cp -r /opt/node_modules/* /home/nonroot/.local/lib/node_modules/ 2>/dev/null || true && \ + rm -rf /opt/deno-cache /opt/node_modules + +# ---------- Dependency layer (best cacheability) ---------- +# Bind-mount the lockfile and pyproject for uv to resolve deps without copying the whole tree. RUN --mount=type=cache,target=/root/.cache/uv \ - --mount=type=bind,source=uv.lock,target=uv.lock \ - --mount=type=bind,source=pyproject.toml,target=pyproject.toml \ - --mount=type=bind,source=packages,target=packages \ - uv sync --locked --no-install-project --no-dev --no-editable - -# Then, add the rest of the project source code and install it -# Installing separately from its dependencies allows optimal layer caching -# Copy the application files into the container and set ownership -COPY --chown=apiuser:apiuser ./tracecat /app/tracecat -COPY --chown=apiuser:apiuser ./packages/tracecat-registry /app/packages/tracecat-registry -COPY --chown=apiuser:apiuser ./packages/tracecat-ee /app/packages/tracecat-ee -COPY --chown=apiuser:apiuser ./pyproject.toml /app/pyproject.toml -COPY --chown=apiuser:apiuser ./uv.lock /app/uv.lock -COPY --chown=apiuser:apiuser ./.python-version /app/.python-version -COPY --chown=apiuser:apiuser ./README.md /app/README.md -COPY --chown=apiuser:apiuser ./LICENSE /app/LICENSE -COPY --chown=apiuser:apiuser ./alembic.ini /app/alembic.ini -COPY --chown=apiuser:apiuser ./alembic /app/alembic - -# Copy the entrypoint script and health check script -COPY --chown=apiuser:apiuser scripts/entrypoint.sh /app/entrypoint.sh -COPY scripts/check_tmp.py /usr/local/bin/check_tmp.py -RUN chmod +x /app/entrypoint.sh && chmod +x /usr/local/bin/check_tmp.py - -# Install the project with EE features + --mount=type=bind,source=uv.lock,target=uv.lock,readonly \ + --mount=type=bind,source=pyproject.toml,target=pyproject.toml,readonly \ + --mount=type=bind,source=packages,target=packages,readonly \ + uv sync --locked --no-install-project --no-dev --no-editable --python /usr/bin/python + +# ---------- App source & installation ---------- +# Copy only what you need for installation; keep ownership consistent +COPY --chown=nonroot:nonroot ./tracecat /app/tracecat +COPY --chown=nonroot:nonroot ./packages/tracecat-registry /app/packages/tracecat-registry +COPY --chown=nonroot:nonroot ./packages/tracecat-ee /app/packages/tracecat-ee +COPY --chown=nonroot:nonroot ./pyproject.toml ./uv.lock ./.python-version /app/ +COPY --chown=nonroot:nonroot ./README.md ./LICENSE ./alembic.ini /app/ +COPY --chown=nonroot:nonroot ./alembic /app/alembic + +# Entrypoints & helpers +COPY --chown=nonroot:nonroot scripts/entrypoint.py /app/entrypoint.py +COPY --chown=root:root scripts/check_tmp.py /usr/local/bin/check_tmp.py +RUN chmod +x /usr/local/bin/check_tmp.py + +# Install the project (prod extras but no dev) RUN --mount=type=cache,target=/root/.cache/uv \ uv sync --locked --no-dev --no-editable -# Place executables in the environment at the front of the path +# Ensure the venv is first on PATH and provide a uv shim for tools that expect it in PATH ENV PATH="/app/.venv/bin:$PATH" +RUN mkdir -p /home/nonroot/.local/bin && \ + ln -sf "$(command -v uv)" /home/nonroot/.local/bin/uv && \ + chown -R nonroot:nonroot /home/nonroot /app + +# Quick sanity checks (optional; stays in builder layer) +RUN python -V && uv --version + +######################### +# Final (minimal runtime) +######################### +FROM cgr.dev/tracecat.com/python:3.12 AS final + +# Re-declare runtime ENV (metadata doesn’t come from COPY) +ENV HOST=0.0.0.0 \ + PORT=8000 \ + PYTHONUSERBASE="/home/nonroot/.local" \ + PYTHONPATH=/home/nonroot/.local:$PYTHONPATH \ + PATH="/app/.venv/bin:/home/nonroot/.local/bin:$PATH" \ + DENO_DIR="/home/nonroot/.cache/deno" \ + NODE_MODULES_DIR="/home/nonroot/.local/lib/node_modules" \ + VIRTUAL_ENV=/app/.venv -# Ensure uv binary is available where Ray expects it -RUN mkdir -p /home/apiuser/.local/bin && \ - ln -s $(which uv) /home/apiuser/.local/bin/uv && \ - chown -R apiuser:apiuser /home/apiuser/.local/bin - -# Fix ownership of all apiuser directories after root operations -# This ensures apiuser can access all necessary files and directories -RUN chown -R apiuser:apiuser /home/apiuser /app/.scripts - -# Ensure apiuser owns everything in /app -RUN chown -R apiuser:apiuser /app/.venv - -# Verify permissions are correctly set before switching users -RUN ls -la /home/apiuser/ && \ - ls -la /home/apiuser/.cache/ && \ - ls -ld /home/apiuser/.cache/uv && \ - echo "UV cache directory permissions: $(stat -c '%a %U:%G' /home/apiuser/.cache/uv)" && \ - echo "Permission verification complete" - -# Change to the non-root user -USER apiuser - -# Verify apiuser can access required directories and binaries -RUN deno --version && \ - python3 -c "import os; print(f'DENO_DIR accessible: {os.access(os.environ[\"DENO_DIR\"], os.R_OK | os.W_OK)}')" && \ - python3 -c "import os; print(f'UV_CACHE_DIR accessible: {os.access(os.environ[\"UV_CACHE_DIR\"], os.R_OK | os.W_OK)}')" && \ - python3 -c "import os, tempfile; f=tempfile.NamedTemporaryFile(dir=os.environ['UV_CACHE_DIR'], delete=True); print(f'UV_CACHE_DIR write test: SUCCESS - {f.name}')" && \ - python3 -c "import os; print(f'NODE_MODULES_DIR accessible: {os.access(os.environ[\"NODE_MODULES_DIR\"], os.R_OK | os.W_OK)}')" && \ - python3 -c "import os; print(f'/app/.scripts accessible: {os.access(\"/app/.scripts\", os.R_OK | os.W_OK)}')" && \ - python3 -c "import os; print(f'S3 cache accessible: {os.access(\"/home/apiuser/.cache/s3\", os.R_OK | os.W_OK)}')" && \ - python3 -c "import tempfile; print(f'Temp dir: {tempfile.gettempdir()}')" && \ - python3 -c "import os; print(f'Entrypoint executable: {os.access(\"/app/entrypoint.sh\", os.R_OK | os.X_OK)}')" && \ - ls -la /app/entrypoint.sh && \ - echo "User access verification complete" - -EXPOSE $PORT +WORKDIR /app -ENTRYPOINT ["/app/entrypoint.sh"] +# Copy only what’s needed to run: +# - the virtualenv +# - your app code +# - entrypoint +# - user caches/locals that runtime expects +COPY --chown=nonroot:nonroot --from=builder /app /app +COPY --chown=nonroot:nonroot --from=builder /home/nonroot/.local /home/nonroot/.local +COPY --chown=nonroot:nonroot --from=builder /home/nonroot/.cache/deno /home/nonroot/.cache/deno +COPY --chown=nonroot:nonroot --from=builder /home/nonroot/.cache/s3 /home/nonroot/.cache/s3 +COPY --from=builder /usr/local/bin/deno /usr/local/bin/deno +COPY --from=builder /usr/local/bin/check_tmp.py /usr/local/bin/check_tmp.py + +# Remove unused aiohttp +RUN ["/app/.venv/bin/python", "-c", "\ +import pathlib, shutil; \ +base = pathlib.Path('/app/.venv/lib/python3.12/site-packages/ray/_private/runtime_env/agent/thirdparty_files'); \ +paths = list(base.glob('aiohttp-*')); \ +[ (print('Removing', p), (shutil.rmtree(p) if p.is_dir() else p.unlink())) for p in paths ] \ +"] + +# Deno exists and is runnable +RUN ["/usr/local/bin/deno", "--version"] + +# Python-only verification (permissions + write test) +RUN ["/usr/bin/python", "-c", "\ +import os, tempfile, sys\n\ +\n\ +def check_path(p, want_exec=False):\n\ + ok_r = os.access(p, os.R_OK)\n\ + ok_w = os.access(p, os.W_OK)\n\ + ok_x = os.access(p, os.X_OK)\n\ + print(f'{p}: R={ok_r} W={ok_w}' + (f' X={ok_x}' if want_exec else ''))\n\ + return ok_r and (ok_w or not want_exec) and (ok_x if want_exec else True)\n\ +\n\ +d = os.environ.get('DENO_DIR','/home/nonroot/.cache/deno')\n\ +n = os.environ.get('NODE_MODULES_DIR','/home/nonroot/.local/lib/node_modules')\n\ +ok = True\n\ +print('Checking cache dirs and entrypoint…')\n\ +ok &= check_path(d)\n\ +ok &= check_path(n)\n\ +ok &= check_path('/app/.scripts')\n\ +ok &= check_path('/home/nonroot/.cache/s3')\n\ +# entrypoint is launched via python, so it doesn't need the +x bit; we just need R\n\ +ok &= check_path('/app/entrypoint.py', want_exec=False)\n\ +sys.exit(0 if ok else 1)\n\ +"] + +# Chainguard images default to nonroot; be explicit: +USER nonroot -CMD ["sh", "-c", "python3 -m uvicorn tracecat.api.app:app --host $HOST --port $PORT"] +EXPOSE $PORT +ENTRYPOINT ["/app/.venv/bin/python", "/app/entrypoint.py"] +CMD [] \ No newline at end of file diff --git a/docker-compose.local.yml b/docker-compose.local.yml index 9482eea6bd..2b4552f944 100644 --- a/docker-compose.local.yml +++ b/docker-compose.local.yml @@ -120,7 +120,7 @@ services: SENTRY_DSN: ${SENTRY_DSN} volumes: - ${TRACECAT__LOCAL_REPOSITORY_PATH}:/app/local_registry - command: ["python", "tracecat/dsl/worker.py"] + entrypoint: ["/app/.venv/bin/python", "/app/tracecat/dsl/worker.py"] depends_on: - api - temporal @@ -164,7 +164,6 @@ services: - ${TRACECAT__LOCAL_REPOSITORY_PATH}:/app/local_registry command: [ - "python", "-m", "uvicorn", "tracecat.api.executor:app", @@ -174,7 +173,7 @@ services: "8000", ] healthcheck: - test: ["CMD-SHELL", "check_tmp.py"] + test: ["CMD", "/app/.venv/bin/python", "/usr/local/bin/check_tmp.py"] interval: 30s timeout: 5s retries: 3 diff --git a/scripts/entrypoint.py b/scripts/entrypoint.py new file mode 100644 index 0000000000..9167952a10 --- /dev/null +++ b/scripts/entrypoint.py @@ -0,0 +1,50 @@ +#!/usr/bin/python +import os +import sys +import subprocess + +TRUTHY = {"1", "true", "yes", "on", "y", "t"} + +def as_bool(val: str) -> bool: + return val.lower() in TRUTHY + +def run_migrations() -> bool: + print("Running database migrations...") + try: + # Use the same interpreter; no shell needed. + subprocess.run( + [sys.executable, "-m", "alembic", "upgrade", "head"], + check=True, + ) + except subprocess.CalledProcessError as e: + print("Migration failed!", flush=True) + return False + print("Migrations completed successfully.", flush=True) + return True + +def main(): + # Only run migrations when explicitly requested + run_flag = os.getenv("RUN_MIGRATIONS", "false") + if as_bool(run_flag): + if not run_migrations(): + print("Exiting due to migration failure", flush=True) + sys.exit(1) + + # If args were provided, exec them with the current Python + # This mirrors `exec "$@"` from the Bash script. + args = sys.argv[1:] + if args: + os.execv(sys.executable, [sys.executable] + args) + + # Otherwise, run uvicorn with HOST/PORT from env + host = os.getenv("HOST", "0.0.0.0") + port = os.getenv("PORT", "8000") + os.execv( + sys.executable, + [sys.executable, "-m", "uvicorn", "tracecat.api.app:app", + "--host", host, "--port", str(port)], + ) + +if __name__ == "__main__": + main() + diff --git a/scripts/install-packages.sh b/scripts/install-packages.sh index f3340bcc7e..c6bc75f42a 100644 --- a/scripts/install-packages.sh +++ b/scripts/install-packages.sh @@ -1,118 +1,93 @@ -#!/bin/bash - -# Bash "strict mode", to help catch problems and bugs in the shell -# script. Every bash script you write should include this. See -# http://redsymbol.net/articles/unofficial-bash-strict-mode/ for details. +#!/usr/bin/env bash +# install-packages.sh set -euo pipefail -export DEBIAN_FRONTEND=noninteractive -export DENO_VERSION=2.3.5 -export PYODIDE_VERSION=0.27.6 - -# Detect architecture -ARCH=$(uname -m) -case ${ARCH} in - x86_64) - DENO_ARCH="x86_64-unknown-linux-gnu" - ;; - aarch64|arm64) - DENO_ARCH="aarch64-unknown-linux-gnu" - ;; - *) - echo "Unsupported architecture: ${ARCH}" - exit 1 - ;; -esac +# ---- Versions (override via build args/env if needed) ---- +: "${DENO_VERSION:=2.3.5}" +: "${PYODIDE_VERSION:=0.27.6}" -# Update package lists -apt-get update - -# Install base packages including curl (needed for kubectl installation) -apt-get install -y \ - acl \ - git \ - xmlsec1 \ - libmagic1 \ - curl \ - apt-transport-https \ - ca-certificates \ - gnupg \ - unzip - -# Verify curl is installed and in PATH -which curl || { echo "ERROR: curl not found after installation"; exit 1; } -echo "curl version: $(curl --version | head -n 1)" +# ---- Arch detection for Deno artifact ---- +ARCH="$(uname -m)" +case "$ARCH" in + x86_64) DENO_ARCH="x86_64-unknown-linux-gnu" ;; + aarch64|arm64) DENO_ARCH="aarch64-unknown-linux-gnu" ;; + *) echo "Unsupported architecture: ${ARCH}" >&2; exit 1 ;; +esac -# Install Deno -echo "Installing Deno v${DENO_VERSION} for architecture ${ARCH}..." DENO_ZIP="deno-${DENO_ARCH}.zip" +BASE_URL="https://github.com/denoland/deno/releases/download/v${DENO_VERSION}" +CHECKSUM_URL="${BASE_URL}/${DENO_ZIP}.sha256sum" +DENO_URL="${BASE_URL}/${DENO_ZIP}" + +# ---- Cleanup on exit ---- +cleanup() { + rm -f "${DENO_ZIP}.partial" || true +} +trap cleanup EXIT + +# ---- wget helpers (quiet, with a few retries) ---- +if ! wget --version >/dev/null 2>&1; then + echo "ERROR: wget not installed." >&2 + exit 1 +fi + +wget_stdout() { wget -q --tries=3 --timeout=20 -O- "$1"; } +wget_to() { wget -q --tries=3 --timeout=20 "$1" -O "$2"; } + +echo "Installing Deno v${DENO_VERSION} for ${ARCH} …" +echo "Fetching checksum: ${CHECKSUM_URL}" +DENO_SHA256="$(wget_stdout "${CHECKSUM_URL}" | awk '{print $1}' || true)" + +echo "Downloading: ${DENO_URL}" +# Download to a temp name to avoid half-written files if interrupted +wget_to "${DENO_URL}" "${DENO_ZIP}.partial" +mv "${DENO_ZIP}.partial" "${DENO_ZIP}" -# Fetch the SHA256 checksum from the official release -CHECKSUM_URL="https://github.com/denoland/deno/releases/download/v${DENO_VERSION}/${DENO_ZIP}.sha256sum" -echo "Fetching SHA256 checksum from ${CHECKSUM_URL}" -DENO_SHA256=$(curl -sSL "${CHECKSUM_URL}" | awk '{print $1}') - -if [ -z "${DENO_SHA256}" ]; then - echo "WARNING: Failed to fetch SHA256 checksum, skipping verification" - curl -fsSL "https://github.com/denoland/deno/releases/download/v${DENO_VERSION}/${DENO_ZIP}" -o "${DENO_ZIP}" -else - echo "Using SHA256 checksum: ${DENO_SHA256}" - curl -fsSL "https://github.com/denoland/deno/releases/download/v${DENO_VERSION}/${DENO_ZIP}" -o "${DENO_ZIP}" - echo "${DENO_SHA256} ${DENO_ZIP}" | sha256sum -c - +# Require checksum verification +if [[ -z "${DENO_SHA256}" ]]; then + echo "ERROR: checksum not provided; refusing to install unverified binary." >&2 + exit 1 fi -# Install deno +# Verify checksum +if ! echo "${DENO_SHA256} ${DENO_ZIP}" | sha256sum -c -; then + echo "ERROR: checksum verification failed." >&2 + rm -f "${DENO_ZIP}" + exit 1 +fi + +# Install Deno unzip -o "${DENO_ZIP}" -d /usr/local/bin/ -rm "${DENO_ZIP}" chmod +x /usr/local/bin/deno +rm -f "${DENO_ZIP}" -# Verify deno installation -if ! deno --version; then - echo "ERROR: Failed to install deno" - exit 1 +# Verify install +if ! deno --version >/dev/null 2>&1; then + echo "ERROR: Deno failed to install." >&2 + exit 1 fi -echo "Deno installed successfully" +echo "Deno installed successfully." -# Pre-cache pyodide and dependencies using deno cache -echo "Pre-caching Pyodide v${PYODIDE_VERSION}..." +# ---- Pre-cache Pyodide with Deno (builder layer only) ---- +echo "Pre-caching Pyodide v${PYODIDE_VERSION} …" -# Create ALL cache directories that apiuser will need -# This consolidates directory creation in one place -# Note: Permissions will be set in Dockerfile after user creation +# Create runtime dirs that the final image expects (ownership fixed later) mkdir -p \ - /home/apiuser/.cache/deno \ - /home/apiuser/.cache/uv \ - /home/apiuser/.cache/pyodide-packages \ - /home/apiuser/.cache/s3 \ - /home/apiuser/.local \ - /home/apiuser/.local/lib/node_modules \ - /app/.scripts - -# Set DENO_DIR for caching during build (use root-owned location) + /home/nonroot/.cache/deno \ + /home/nonroot/.cache/uv \ + /home/nonroot/.cache/pyodide-packages \ + /home/nonroot/.cache/s3 \ + /home/nonroot/.cache/tmp \ + /home/nonroot/.local/lib/node_modules \ + /app/.scripts + +# Use a root-owned build cache for Deno in the builder layer export DENO_DIR="/opt/deno-cache" -mkdir -p "$DENO_DIR" +mkdir -p "${DENO_DIR}" -# Use deno cache to download pyodide module and its dependencies -# This runs as root and creates root-owned cache that will be copied later -# Note: node_modules will be created automatically in the current directory -cd /opt +# Place node_modules under /opt so we can selectively COPY to final if needed +pushd /opt >/dev/null deno cache --node-modules-dir=auto "npm:pyodide@${PYODIDE_VERSION}" +popd >/dev/null -echo "Deno and Pyodide installation complete" - -# Apply security updates -apt-get -y upgrade - -# Remove install only dependencies -apt-get purge -y curl gnupg apt-transport-https unzip -apt-get autoremove -y - -# Check if git is installed by checking the version -if ! git --version &> /dev/null; then - echo "ERROR: Failed to install git" - exit 1 -fi - -# Clean up -apt-get clean -rm -rf /var/lib/apt/lists/* +echo "Deno + Pyodide setup complete." \ No newline at end of file