diff --git a/.gitignore b/.gitignore index f7474787..8baa390d 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,6 @@ node_modules .env mise.local.toml + +dist/ +packages/michael/michael \ No newline at end of file diff --git a/.mise/config.toml b/.mise/config.toml index 6ae80d27..f0905494 100644 --- a/.mise/config.toml +++ b/.mise/config.toml @@ -1,6 +1,7 @@ [tools] node = "25.4.0" pnpm = "10.28.1" +go = "1.24" restic = "0.18.0" gh = "2.25.0" @@ -17,7 +18,7 @@ depends = ["*:build"] [tasks."common:build"] description = "Build all common packages" -depends = ["common:*:build"] +depends = ["install:deps", "common:*:build"] [tasks.test] description = "Run all unit tests" @@ -71,3 +72,8 @@ OIDC_CLIENT_SECRET = "client secret" OIDC_ALLOW_INSECURE = true OIDC_REDIRECT_URI = 'http://localhost:5173/api/auth/oidc/callback' OIDC_LOGOUT_REDIRECT_URI = 'http://localhost:5173' +LOG_LEVEL = "debug" +OTLP_METRICS_ENDPOINT = "localhost:8428" +OTLP_METRICS_URL_PATH = "/opentelemetry/v1/metrics" +OTLP_LOGS_ENDPOINT = "localhost:9428" +OTLP_LOGS_URL_PATH = "/insert/opentelemetry/v1/logs" diff --git a/.mise/tasks/bench b/.mise/tasks/bench new file mode 100755 index 00000000..db664a80 --- /dev/null +++ b/.mise/tasks/bench @@ -0,0 +1,665 @@ +#!/usr/bin/env bash +#MISE description="Benchmark restic backends (restic-api, michael, or both)" +#MISE depends=["docker:start", "restic-api:build", "michael:build"] +#MISE dir="{{config_root}}" +# +# Benchmark restic-compatible backends using the real restic CLI. +# +# Supports restic-api (Node.js/NestJS) and michael (Go), with an optional +# comparison mode that runs both sequentially and prints a side-by-side table. +# +# Prerequisites: +# - MinIO running (mise docker:start) +# - restic, node, perl installed (mise install) +# - For --compare: pnpm deps + go (mise install:deps) +# +# Usage: +# # Benchmark both backends and compare (default): +# mise bench +# +# # Benchmark a single backend: +# mise bench -- --backend restic-api +# mise bench -- --backend michael +# +# Options: +# --backend NAME Benchmark only one backend: restic-api | michael +# --small-files N Number of small files (default: 1000) +# --medium-files N Number of medium files (default: 1000) +# --large-files N Number of large files (default: 100) +# --large-size-mb N Size of each large file (default: 100) +# --no-cleanup Keep S3 buckets after run +# +set -euo pipefail + +REPO_ROOT="$PWD" + +# ─── Configuration ────────────────────────────────────────────────────────────── + +API_PORT="${RESTIC_API_PORT:-3010}" +API_HOST="${RESTIC_API_HOST:-localhost}" +export JWT_SECRET="${JWT_SECRET:-cca13c34b450a77c1d4b9ecd25dff6aebc6d7417afdb31864f5943c59abd03a1}" + +BACKEND="" +COMPARE=true +SMALL_COUNT=50 +SMALL_SIZE_KB=100 +MEDIUM_COUNT=50 +MEDIUM_SIZE_MB=20 +LARGE_COUNT=20 +LARGE_SIZE_MB=100 +DO_CLEANUP=true + +while [[ $# -gt 0 ]]; do + case "$1" in + --backend) BACKEND="$2"; COMPARE=false; shift 2 ;; + --small-files) SMALL_COUNT="$2"; shift 2 ;; + --medium-files) MEDIUM_COUNT="$2"; shift 2 ;; + --large-files) LARGE_COUNT="$2"; shift 2 ;; + --large-size-mb) LARGE_SIZE_MB="$2"; shift 2 ;; + --no-cleanup) DO_CLEANUP=false; shift ;; + *) echo "Unknown option: $1"; exit 1 ;; + esac +done + +BENCH_DIR=$(mktemp -d) +MEM_PID="" +API_PID="" +MANAGED_PID="" +REPOS=() + +# ─── Formatting ───────────────────────────────────────────────────────────────── + +bold() { printf '\033[1m%s\033[0m' "$*"; } +dim() { printf '\033[2m%s\033[0m' "$*"; } +green() { printf '\033[32m%s\033[0m' "$*"; } +red() { printf '\033[31m%s\033[0m' "$*"; } + +# ─── Helpers ──────────────────────────────────────────────────────────────────── + +on_exit() { + stop_mem_monitor + stop_backend + if [[ "$DO_CLEANUP" == true ]] && (( ${#REPOS[@]} > 0 )); then + cleanup_repos + fi + rm -rf "$BENCH_DIR" +} +trap on_exit EXIT + +now_ms() { + perl -MTime::HiRes -e 'printf "%.0f\n", Time::HiRes::time() * 1000' +} + +elapsed_s() { + perl -e "printf '%.3f', ($2 - $1) / 1000" +} + +throughput_mbs() { + local bytes="$1" secs="$2" + perl -e "if ($secs > 0) { printf '%.1f', $bytes / 1048576 / $secs } else { print 'inf' }" +} + +human_size() { + perl -e ' + my $b = $ARGV[0]; + if ($b >= 1073741824) { printf "%.1f GB", $b/1073741824 } + elsif ($b >= 1048576) { printf "%.1f MB", $b/1048576 } + elsif ($b >= 1024) { printf "%.1f KB", $b/1024 } + else { printf "%d B", $b } + ' "$1" +} + +make_jwt() { + local repo="$1" + node --no-warnings -e " + const c = require('node:crypto'); + const h = Buffer.from(JSON.stringify({alg:'HS256',typ:'JWT'})).toString('base64url'); + const p = Buffer.from(JSON.stringify({ + user: c.randomUUID(), + repository: process.argv[1], + writeOnce: false, + iat: Math.floor(Date.now()/1000), + exp: Math.floor(Date.now()/1000) + 7200 + })).toString('base64url'); + const s = c.createHmac('sha256', process.env.JWT_SECRET) + .update(h+'.'+p).digest('base64url'); + process.stdout.write(h+'.'+p+'.'+s); + " "$repo" +} + +init_repo() { + local repo + repo=$(node --no-warnings -e "process.stdout.write(require('node:crypto').randomUUID())") + local token + token=$(make_jwt "$repo") + REPOS+=("$repo") + + export RESTIC_REPOSITORY="rest:http://_:${token}@${API_HOST}:${API_PORT}/${repo}" + export RESTIC_PASSWORD="bench" +} + +cleanup_repos() { + echo "" + echo " $(dim "Cleaning up ${#REPOS[@]} benchmark repositories...")" + (cd "$REPO_ROOT/packages/restic-api" && node --no-warnings -e " + const { S3Client, ListObjectsV2Command, DeleteObjectsCommand, DeleteBucketCommand } + = require('@aws-sdk/client-s3'); + const client = new S3Client({ + credentials: { + accessKeyId: process.env.S3_ACCESS_KEY_ID || 'minio', + secretAccessKey: process.env.S3_SECRET_ACCESS_KEY || 'miniominio', + }, + region: process.env.S3_REGION || 'minio', + endpoint: process.env.S3_ENDPOINT || 'http://localhost:9000', + forcePathStyle: true, + }); + async function nuke(Bucket) { + try { + const { Contents } = await client.send(new ListObjectsV2Command({ Bucket })); + if (Contents?.length) { + await client.send(new DeleteObjectsCommand({ + Bucket, Delete: { Objects: Contents.map(({ Key }) => ({ Key })) } + })); + } + await client.send(new DeleteBucketCommand({ Bucket })); + } catch {} + } + Promise.all(process.argv.slice(1).map(nuke)).then(() => process.exit(0)); + " "${REPOS[@]}") || true +} + +# ─── Results I/O ──────────────────────────────────────────────────────────────── + +save_result() { + echo "$1=$2" >> "$BENCH_DIR/results/${CURRENT_BACKEND}.txt" +} + +read_result() { + local key="$1" file="$2" + grep "^${key}=" "$file" 2>/dev/null | tail -1 | cut -d= -f2 +} + +# ─── Memory monitoring ────────────────────────────────────────────────────────── + +start_mem_monitor() { + API_PID=$(lsof -ti ":$API_PORT" -sTCP:LISTEN 2>/dev/null | head -1 || true) + if [[ -z "$API_PID" ]]; then + echo " $(dim '(could not find API process — memory stats unavailable)')" + return + fi + local peak_file="$BENCH_DIR/.mem_peak" + echo "0" > "$peak_file" + ( + while kill -0 "$API_PID" 2>/dev/null; do + rss_kb=$(ps -o rss= -p "$API_PID" 2>/dev/null | tr -d ' ' || echo 0) + peak=$(cat "$peak_file") + if (( rss_kb > peak )); then echo "$rss_kb" > "$peak_file"; fi + sleep 0.5 + done + ) & + MEM_PID=$! +} + +stop_mem_monitor() { + [[ -n "${MEM_PID:-}" ]] && kill "$MEM_PID" 2>/dev/null || true + MEM_PID="" + API_PID="" +} + +mem_rss_kb() { + [[ -z "${API_PID:-}" ]] && echo 0 && return + ps -o rss= -p "$API_PID" 2>/dev/null | tr -d ' ' || echo 0 +} + +mem_peak_kb() { + local peak_file="$BENCH_DIR/.mem_peak" + [[ -f "$peak_file" ]] && cat "$peak_file" || echo 0 +} + +fmt_mem_mb() { + perl -e "printf '%.1f MB', $1 / 1024" +} + +# ─── Backend lifecycle ────────────────────────────────────────────────────────── + +start_backend() { + local name="$1" + echo " $(dim "Starting ${name}...")" + + # Ensure port is free before starting + lsof -ti ":$API_PORT" -sTCP:LISTEN 2>/dev/null | xargs kill 2>/dev/null || true + local elapsed=0 + while nc -z "$API_HOST" "$API_PORT" 2>/dev/null; do + sleep 0.2 + elapsed=$((elapsed + 1)) + if (( elapsed > 50 )); then + echo "ERROR: port $API_PORT still in use after 10 seconds" + exit 1 + fi + done + + case "$name" in + restic-api) + (cd "$REPO_ROOT" && exec pnpm --filter restic-api start 2>&1) > "$BENCH_DIR/backend.log" 2>&1 & + MANAGED_PID=$! + ;; + michael) + (cd "$REPO_ROOT/packages/michael" && \ + OTLP_METRICS_ENDPOINT=localhost:8428 \ + OTLP_METRICS_URL_PATH=/opentelemetry/api/v1/push \ + exec go run . 2>&1) > "$BENCH_DIR/backend.log" 2>&1 & + MANAGED_PID=$! + ;; + *) + echo "ERROR: unknown backend '$name' (expected restic-api or michael)" + exit 1 + ;; + esac + + local elapsed=0 + while ! nc -z "$API_HOST" "$API_PORT" 2>/dev/null; do + sleep 0.2 + elapsed=$((elapsed + 1)) + if (( elapsed > 150 )); then + echo "ERROR: $name did not start within 30 seconds" + echo "--- backend log ---" + cat "$BENCH_DIR/backend.log" + exit 1 + fi + done + echo " $(dim "${name} ready on :${API_PORT}")" +} + +stop_backend() { + if [[ -n "${MANAGED_PID:-}" ]]; then + kill "$MANAGED_PID" 2>/dev/null || true + wait "$MANAGED_PID" 2>/dev/null || true + MANAGED_PID="" + fi + # Also kill anything still on the port (child processes) + lsof -ti ":$API_PORT" -sTCP:LISTEN 2>/dev/null | xargs kill 2>/dev/null || true + + # Wait for port to free up + local elapsed=0 + while nc -z "$API_HOST" "$API_PORT" 2>/dev/null; do + sleep 0.2 + elapsed=$((elapsed + 1)) + if (( elapsed > 50 )); then break; fi + done +} + +# ─── Prerequisites ────────────────────────────────────────────────────────────── + +check_prereqs() { + local missing=() + command -v restic >/dev/null 2>&1 || missing+=("restic") + command -v node >/dev/null 2>&1 || missing+=("node") + command -v perl >/dev/null 2>&1 || missing+=("perl") + + if [[ "$COMPARE" == true ]]; then + command -v pnpm >/dev/null 2>&1 || missing+=("pnpm") + command -v go >/dev/null 2>&1 || missing+=("go") + fi + + if (( ${#missing[@]} > 0 )); then + echo "ERROR: missing required tools: ${missing[*]}" + exit 1 + fi +} + +check_api_reachable() { + local http_code + http_code=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 3 \ + "http://${API_HOST}:${API_PORT}/" 2>/dev/null || echo "000") + if [[ "$http_code" == "000" ]]; then + echo "ERROR: backend not reachable at http://${API_HOST}:${API_PORT}" + echo " Start it with: mise restic-api:dev or mise michael:dev" + exit 1 + fi +} + +# ─── Data generation ──────────────────────────────────────────────────────────── + +generate_data() { + echo "$(bold 'Generating test data...')" + + local dir="$BENCH_DIR/data/small" && mkdir -p "$dir" + for i in $(seq 1 "$SMALL_COUNT"); do + dd if=/dev/urandom of="$dir/f_$i" bs=1024 count="$SMALL_SIZE_KB" 2>/dev/null + done + echo " small: ${SMALL_COUNT} files x ${SMALL_SIZE_KB} KB" + + dir="$BENCH_DIR/data/medium" && mkdir -p "$dir" + for i in $(seq 1 "$MEDIUM_COUNT"); do + dd if=/dev/urandom of="$dir/f_$i" bs=1048576 count="$MEDIUM_SIZE_MB" 2>/dev/null + done + echo " medium: ${MEDIUM_COUNT} files x ${MEDIUM_SIZE_MB} MB" + + dir="$BENCH_DIR/data/large" && mkdir -p "$dir" + for i in $(seq 1 "$LARGE_COUNT"); do + dd if=/dev/urandom of="$dir/f_$i" bs=1048576 count="$LARGE_SIZE_MB" 2>/dev/null + done + echo " large: ${LARGE_COUNT} files x ${LARGE_SIZE_MB} MB" + + local total_bytes=$(( SMALL_COUNT * SMALL_SIZE_KB * 1024 + MEDIUM_COUNT * MEDIUM_SIZE_MB * 1048576 + LARGE_COUNT * LARGE_SIZE_MB * 1048576 )) + local peak_bytes=$(( total_bytes * 3 )) + echo "" + echo " $(bold "total: $(human_size $total_bytes)") $(dim "(~$(human_size $peak_bytes) peak disk + S3)")" + echo "" +} + +# ─── Throughput scenario ──────────────────────────────────────────────────────── + +# run_throughput