inkeep · nick-inkeep · Mar 25, 2026
diff --git a/.ai-dev/Dockerfile b/.ai-dev/Dockerfile
@@ -7,6 +7,7 @@ RUN apt-get update && apt-get install -y \
     ca-certificates \
     sudo \
     jq \
+    tmux \
     && rm -rf /var/lib/apt/lists/*
 
 # Install GitHub CLI
@@ -35,7 +36,7 @@ WORKDIR /home/agent
 RUN curl -fsSL https://claude.ai/install.sh | bash
 
 # Add Claude Code to PATH
-ENV PATH="/home/agent/.claude/bin:$PATH"
+ENV PATH="/home/agent/.local/bin:${PATH}"
 
 WORKDIR /workspace
 

diff --git a/.ai-dev/README.md b/.ai-dev/README.md
@@ -44,6 +44,8 @@ docker compose -p ai-dev-auth -f .ai-dev/docker-compose.yml exec sandbox bash
 docker compose -p ai-dev-auth -f .ai-dev/docker-compose.yml down
 ```
 
+For parallel `/ship` instances working on different features, see [Running parallel instances](#running-parallel-instances) below.
+
 ## When to use Docker vs host execution
 
 | Scenario | Recommended | Why |
@@ -73,18 +75,30 @@ When passed, Ralph runs `ralph.sh` inside the Docker sandbox instead of on the h
 ## Prerequisites
 
 - Docker and Docker Compose
-- The `/ralph` skill installed (for Phase 1-2 on host)
-- `ANTHROPIC_API_KEY` set in your environment
+- `ANTHROPIC_API_KEY` or Claude Code OAuth login (see auth setup below)
 
 ## Quick start
 
 ### One-time setup
 
 ```bash
 cd .ai-dev
-cp .env.example .env
-# Edit .env — add your ANTHROPIC_API_KEY
 
+# Auth — choose one:
+# Option A: API key
+echo "ANTHROPIC_API_KEY=sk-ant-..." > .env
+
+# Option B: Extract OAuth token from macOS Keychain (if logged into Claude Code)
+TOKEN=$(security find-generic-password -s "Claude Code-credentials" -w 2>/dev/null | \
+  python3 -c 'import sys,json; print(json.loads(sys.stdin.read())["claudeAiOauth"]["accessToken"])' 2>/dev/null)
+echo "CLAUDE_CODE_OAUTH_TOKEN=$TOKEN" > .env
+
+# Plugin setup — REQUIRED for local marketplace plugins (/ship, /implement, etc.)
+# Local directory marketplaces are symlinks that break inside Docker.
+# Copy them to make them real directories:
+cp -r ~/team-skills ~/.claude/plugins/marketplaces/inkeep-team-skills 2>/dev/null || true
+
+# Build
 docker compose build
 ```
 
@@ -180,6 +194,82 @@ claude
 
 Note: Interactive spec work is less convenient inside Docker (no browser tools, no macOS computer use). Pattern A is preferred for most workflows.
 
+#### Pattern D: Headless /ship (fully autonomous)
+
+Launch `/ship` inside a detachable tmux session for fully autonomous execution. The process persists independently of the `docker exec` connection.
+
+```bash
+# Create feature branch + spec on host, then launch headless:
+docker compose exec -d sandbox tmux new-session -d -s ship \
+  'cd /workspace && claude -p "/eng:ship specs/my-feature/SPEC.md" \
+    --dangerously-skip-permissions --max-turns 150 \
+    --output-format stream-json --verbose \
+    2>&1 | tee /workspace/tmp/ship/stream.jsonl; \
+    echo $? > /workspace/tmp/ship/exit-code'
+```
+
+**Gotchas:**
+- `--output-format stream-json` **requires** the `--verbose` flag when used with `-p` (Claude Code errors without it)
+- Always use `claude` (not a full path) — the Dockerfile `ENV PATH` fix ensures it's found in all contexts
+- `tmp/ship/state.json` is the primary monitoring channel (readable from host via bind mount)
+
+**Monitoring:**
+
+```bash
+# Attach to the tmux session for live output:
+docker compose exec sandbox tmux attach -t ship
+
+# Check if ship is still running:
+docker compose exec sandbox tmux has-session -t ship 2>/dev/null && echo "Running" || echo "Done"
+
+# Read current phase from host:
+jq -r '.currentPhase // "waiting"' tmp/ship/state.json
+
+# Monitor loop (run on host):
+while true; do
+  echo "=== $(date '+%H:%M:%S') ==="
+  jq -r '.currentPhase // "waiting"' tmp/ship/state.json 2>/dev/null
+  docker compose -f .ai-dev/docker-compose.yml exec sandbox \
+    tmux has-session -t ship 2>/dev/null \
+    && echo "Status: running" \
+    || echo "Status: DONE (exit: $(cat tmp/ship/exit-code 2>/dev/null))"
+  docker stats --no-stream --format "Memory: {{.MemUsage}}" \
+    "$(docker compose -f .ai-dev/docker-compose.yml ps -q sandbox)" 2>/dev/null
+  echo "---"
+  sleep 30
+done
+```
+
+### Running parallel instances
+
+Git worktrees don't work with Docker bind mounts — `.git/worktrees/` references break when only the worktree is mounted. Use full repo copies instead.
+
+```bash
+# Create isolated copies
+cp -r ~/agents ~/agents-a && cd ~/agents-a && git checkout -b feat/task-a
+cp -r ~/agents ~/agents-b && cd ~/agents-b && git checkout -b feat/task-b
+
+# Launch with separate project names
+# Instance A (uses ~/agents as workspace by default):
+docker compose -p ship-a -f ~/agents/.ai-dev/docker-compose.yml up -d
+
+# Instance B (override workspace volume):
+docker compose -p ship-b \
+  -f ~/agents/.ai-dev/docker-compose.yml \
+  -f <(cat <<'EOF'
+services:
+  sandbox:
+    volumes:
+      - ~/agents-b:/workspace
+      - claude-data:/home/agent/.claude
+      - squid-certs:/certs:ro
+      - ${HOME}/.claude/plugins:/host-plugins:ro
+EOF
+) up -d
+```
+
+Each instance gets its own containers, volumes, and network. Use the same `-p <name>` for all follow-up commands (`exec`, `logs`, `down`).
+
 ## What the sandbox can and cannot access
 
 ### Network (controlled by squid.conf)
@@ -189,19 +279,25 @@ Note: Interactive spec work is less convenient inside Docker (no browser tools,
 | `*.anthropic.com` | Full | Claude API calls |
 | `*.claude.com` | Full | Claude Code authentication |
 | `registry.npmjs.org` | Full | pnpm install |
+| `*.sentry.io` | Full | Claude Code error reporting (startup hangs without) |
+| `*.statsig.com` | Full | Claude Code feature flags / telemetry |
+| `*.googleapis.com` | Full | Google Fonts (Next.js build), Claude Code updates |
+| `*.gstatic.com` | Full | Font file CDN |
 | `*.inkeep.com` | Full | Organization services |
 | `api.github.com` | Full | GitHub API (PR, reviews, CI) |
-| `github.com/inkeep/*` | Path-restricted | Git push/pull (org repos only) |
-| `*.githubusercontent.com/inkeep/*` | Path-restricted | GitHub raw content |
+| `github.com/(inkeep\|anthropics)/*` | Path-restricted | Git push/pull (org + Anthropic repos) |
+| `*.githubusercontent.com/(inkeep\|anthropics)/*` | Path-restricted | GitHub raw content + security.json |
 | Everything else | **Blocked** | |
 
+**Note on web tools:** Claude Code's `WebSearch` tool works inside the container (it's server-side, routes through `api.anthropic.com`). `WebFetch` is client-side — it makes direct HTTP requests from the container, so it's blocked for non-allowed domains. This means agents can search the web but can't fetch arbitrary URLs.
+
 ### Filesystem
 
 | Path | Container access | Notes |
 |------|-----------------|-------|
 | `/workspace/` | Read-write | Bind mount of repo root — same files, same `.git` |
 | `/home/agent/.claude/` | Read-write | Docker volume — persists Claude auth across restarts |
-| `/host-plugins/` | Read-only | Host's `~/.claude/plugins/` — copied to container on startup by entrypoint |
+| `/host-plugins/` | Read-only | Host's `~/.claude/plugins/` — used by `CLAUDE_CODE_PLUGIN_SEED_DIR` for plugin discovery at runtime (no copy) |
 | Everything else | Container-only | Lost when container is removed |
 
 ### What the container CANNOT do
@@ -244,7 +340,7 @@ Edit `docker-compose.yml`:
 deploy:
   resources:
     limits:
-      memory: 16G  # Default: 14G
+      memory: 24G  # Default: 20G
 ```
 
 ### Pushing from inside the container
@@ -254,10 +350,9 @@ To enable git push and PR creation from inside the container:
 1. Set `GITHUB_TOKEN` in `.env`
 2. The `gh` CLI and git are pre-installed in the container
 3. The GitHub API is already in the allowlist
+4. The entrypoint auto-configures a git credential helper and SSH→HTTPS rewrite when `GITHUB_TOKEN` is set
 
-**Note:** `gh` CLI reads `GITHUB_TOKEN` from the environment and works out of the box. For `git push` to authenticate, you would also need a git credential helper configured — this is not set up by default. A future enhancement could add `git config --global credential.helper '!f() { echo "password=$GITHUB_TOKEN"; }; f'` to `entrypoint.sh`.
-
-This changes the trust model — the container can now push code and create PRs on your behalf.
+Both `gh` and `git push` work out of the box when the token is set. This changes the trust model — the container can push code and create PRs on your behalf.
 
 ## ralph.sh
 
@@ -295,8 +390,9 @@ Run `.claude/ralph.sh --help` for CLI options.
 
 The container runs `entrypoint.sh` on startup, which:
 
-1. **Copies host plugins** — If `~/.claude/plugins/` is mounted at `/host-plugins/`, copies them into the container's `~/.claude/plugins/` so skills and hooks are available inside Docker.
-2. **Configures sandbox** — Sets `enableWeakerNestedSandbox: true` in Claude Code's settings. Claude Code's bubblewrap sandbox cannot run in unprivileged Docker; this flag tells it to use a weaker sandbox and rely on the Docker container + Squid proxy as the security boundary.
+1. **Configures sandbox** — Sets `enableWeakerNestedSandbox: true` in Claude Code's settings. Claude Code's bubblewrap sandbox cannot run in unprivileged Docker; this flag tells it to use a weaker sandbox and rely on the Docker container + Squid proxy as the security boundary.
+2. **Enables plugins** — Reads `installed_plugins.json` from the `CLAUDE_CODE_PLUGIN_SEED_DIR` mount and sets `enabledPlugins` in `settings.json`. The seed directory mechanism handles path resolution at runtime (no copy needed), but plugins must still be explicitly enabled.
+3. **Configures git** — Sets `safe.directory /workspace` (required for bind-mounted repos with UID mismatch) and, if `GITHUB_TOKEN` is set, configures a credential helper and SSH→HTTPS rewrite for git push through the proxy.
 
 ## Container image
 
@@ -351,21 +447,26 @@ Run `pnpm install` on the host before starting Docker. The container accesses `n
 
 If a story requires a NEW dependency, the container can install it (npm registry is in the allowlist). But run `pnpm install` on the host afterward to ensure the lockfile is consistent.
 
+### `pnpm test --run` doubles the `--run` flag
+
+If the package.json script already includes `vitest --run`, passing `--run` again causes an error. Use `pnpm vitest --run` or `npx vitest --run` directly instead.
+
+### `docker compose` commands fail
+
+All `docker compose` commands must be run from the `.ai-dev/` directory or use `-f .ai-dev/docker-compose.yml`.
+
 ## Future work
 
-The current sandbox is **execution-only** — ralph.sh iterates inside Docker, host handles everything else (spec, push, PR, review). A future upgrade could enable full autonomous operation inside the container:
+The sandbox now supports full autonomous `/ship` execution inside Docker, including headless mode, plugin loading, git push, and parallel instances. Remaining enhancements:
 
 | Item | What it enables | Trigger to revisit |
 |------|----------------|-------------------|
-| Git credential helper in entrypoint | `git push` from inside the container | Want to push/PR from Docker instead of host |
+| Lean seed directory builder script | Mounts only referenced plugin versions instead of full `~/.claude/plugins/` | Container startup slow due to large host plugin directory |
+| Parallel automation script | Automates repo copy + compose launch for N parallel instances | Running 3+ parallel `/ship` instances regularly |
+| `NODE_OPTIONS=--max-old-space-size` tuning | Better OOM diagnostics and control | OOM still occurs at 20GB |
 | `CLAUDE_SPECS_DIR=/workspace/.claude/specs` | `/spec` output persists via bind mount | Want to run `/spec` inside the container |
-| `gh` auth config (`GH_TOKEN` export) | `gh pr create`, `gh api` from inside | Want full `/ship` review loop inside Docker |
-| Git URL HTTPS rewrite (`insteadOf ssh`) | Prevents SSH attempts through the proxy | If tools default to SSH and silently fail |
-| Health checks in docker-compose | Proxy readiness before sandbox starts | Intermittent startup failures |
 | Convenience start script | Pre-flight checks (token set, plugins exist) | Team onboarding friction |
 
-See `~/.claude/specs/docker-sandbox-upgrade/SPEC.md` for the full analysis that drove these decisions.
-
 ## Security notes
 
 - **SSL inspection**: The proxy performs MITM on HTTPS traffic for URL path filtering. All traffic is decrypted by the proxy.

diff --git a/.ai-dev/docker-compose.yml b/.ai-dev/docker-compose.yml
@@ -10,6 +10,12 @@ services:
       - external-net
       - internal-net
     restart: unless-stopped
+    healthcheck:
+      test: ["CMD-SHELL", "bash -c 'echo > /dev/tcp/127.0.0.1/3128'"]
+      interval: 5s
+      timeout: 3s
+      retries: 5
+      start_period: 10s
     entrypoint: ["/bin/bash", "-c", "cp /etc/squid/ssl/squid-ca.crt /certs/ && chmod 644 /certs/squid-ca.crt && exec squid -N"]
 
   sandbox:
@@ -25,10 +31,14 @@ services:
       - NODE_EXTRA_CA_CERTS=/certs/squid-ca.crt
       - REQUESTS_CA_BUNDLE=/certs/squid-ca.crt
       - SSL_CERT_FILE=/certs/squid-ca.crt
-      # API key — required for Claude Code
+      # Auth — at least one must be set. API key is simplest.
+      # OAuth token is for Max subscription users without an API key.
       - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
+      - CLAUDE_CODE_OAUTH_TOKEN=${CLAUDE_CODE_OAUTH_TOKEN:-}
       # GitHub token — optional, only if pushing/creating PRs from inside container
       - GITHUB_TOKEN=${GITHUB_TOKEN:-}
+      - CLAUDE_PROJECT_DIR=/workspace
+      - CLAUDE_CODE_PLUGIN_SEED_DIR=/host-plugins
     volumes:
       - ..:/workspace
       - claude-data:/home/agent/.claude
@@ -38,13 +48,14 @@ services:
     networks:
       - internal-net
     depends_on:
-      - proxy
+      proxy:
+        condition: service_healthy
     stdin_open: true
     tty: true
     deploy:
       resources:
         limits:
-          memory: 14G
+          memory: 20G
 
 networks:
   internal-net:

diff --git a/.ai-dev/entrypoint.sh b/.ai-dev/entrypoint.sh
@@ -1,12 +1,6 @@
 #!/bin/bash
 set -e
 
-# Copy plugins from host mount (if available)
-if [ -d "/host-plugins" ] && [ "$(ls -A /host-plugins 2>/dev/null)" ]; then
-    mkdir -p /home/agent/.claude/plugins
-    cp -r /host-plugins/* /home/agent/.claude/plugins/
-fi
-
 # Enable weaker nested sandbox for Docker environment.
 # Claude Code's bubblewrap sandbox cannot run in unprivileged Docker containers.
 # Our security boundary is the Docker container + Squid proxy network jail.
@@ -17,6 +11,37 @@ fi
 if command -v jq &>/dev/null; then
     tmp=$(jq '.sandbox.enableWeakerNestedSandbox = true' "$SETTINGS_FILE")
     echo "$tmp" > "$SETTINGS_FILE"
+
+    # Enable plugins discovered in the seed directory.
+    # CLAUDE_CODE_PLUGIN_SEED_DIR handles path resolution at runtime, but
+    # enabledPlugins must still be set in settings.json for Claude Code to
+    # actually load them. (Known issue: https://github.com/anthropics/claude-code/issues/20661)
+    if [ -d "${CLAUDE_CODE_PLUGIN_SEED_DIR:-}" ]; then
+        INSTALLED_FILE="${CLAUDE_CODE_PLUGIN_SEED_DIR}/installed_plugins.json"
+        if [ -f "$INSTALLED_FILE" ]; then
+            for key in $(jq -r '.plugins | keys[]' "$INSTALLED_FILE" 2>/dev/null); do
+                tmp=$(jq --arg k "$key" '.enabledPlugins[$k] = true' "$SETTINGS_FILE")
+                echo "$tmp" > "$SETTINGS_FILE"
+            done
+        fi
+    fi
 fi
 
+# Git config — run from HOME to avoid issues when /workspace is a git worktree
+# whose .git file references a host path that doesn't exist in the container.
+(
+    cd /home/agent
+
+    # safe.directory — bind-mounted /workspace has different ownership than
+    # the container's agent user. Without this, git refuses to operate.
+    git config --global --add safe.directory /workspace
+
+    # Git credential helper — enables git push from inside the container using
+    # GITHUB_TOKEN env var.
+    if [ -n "${GITHUB_TOKEN:-}" ]; then
+        git config --global credential.helper '!f() { echo "username=x-access-token"; echo "password=$GITHUB_TOKEN"; }; f'
+        git config --global url."https://github.com/".insteadOf "git@github.com:"
+    fi
+)
+
 exec "$@"
diff --git a/.ai-dev/squid.conf b/.ai-dev/squid.conf
@@ -25,6 +25,15 @@ acl claude dstdomain .claude.com
 # npm registry — required for pnpm install
 acl npm_registry dstdomain registry.npmjs.org
 
+# Telemetry — Claude Code sends telemetry to Sentry and Statsig.
+# Without these, startup can hang waiting for telemetry endpoints.
+# Ref: https://code.claude.com/docs/en/network-config
+acl sentry dstdomain .sentry.io
+acl statsig dstdomain statsig.anthropic.com .statsig.com
+
+# Google services — fonts for Next.js build, storage for Claude updates
+acl google dstdomain .googleapis.com .gstatic.com
+
 # Organization domains
 acl inkeep_domains dstdomain .inkeep.com
 
@@ -33,11 +42,11 @@ acl github_api dstdomain api.github.com
 
 # GitHub git operations — path-restricted to organization repos
 acl github dstdomain github.com
-acl github_org_path urlpath_regex ^/inkeep(/|$)
+acl github_org_path urlpath_regex ^/(inkeep|anthropics)(/|$)
 
 # GitHub raw content — path-restricted to organization repos
 acl githubusercontent dstdomain .githubusercontent.com
-acl githubusercontent_org urlpath_regex ^/inkeep(/|$)
+acl githubusercontent_org urlpath_regex ^/(inkeep|anthropics)(/|$)
 
 # === Access Rules ===
 acl SSL_ports port 443
@@ -50,6 +59,9 @@ http_access deny !Safe_ports
 http_access allow anthropic
 http_access allow claude
 http_access allow npm_registry
+http_access allow sentry
+http_access allow statsig
+http_access allow google
 http_access allow inkeep_domains
 http_access allow github_api
 http_access allow github github_org_path

diff --git a/.npmrc b/.npmrc
@@ -0,0 +1,7 @@
+supportedArchitectures[os][]=current
+supportedArchitectures[os][]=linux
+supportedArchitectures[cpu][]=current
+supportedArchitectures[cpu][]=arm64
+supportedArchitectures[libc][]=current
+supportedArchitectures[libc][]=glibc
+supportedArchitectures[libc][]=musl