|
| 1 | +#!/usr/bin/env bash |
| 2 | +set -euo pipefail |
| 3 | + |
| 4 | +# Memory Profiling for Benchmarks (cargo-instruments) |
| 5 | +# |
| 6 | +# Runs the `etl-benchmarks` crate's benchmark under Apple Instruments for |
| 7 | +# memory profiling. Defaults to the "Allocations" template and the |
| 8 | +# `table_copies` bench target. |
| 9 | +# |
| 10 | +# Prerequisites: |
| 11 | +# - macOS with Xcode Command Line Tools |
| 12 | +# - cargo-instruments (install: `cargo install cargo-instruments`) |
| 13 | +# - Postgres reachable per your env (same as other bench scripts) |
| 14 | +# |
| 15 | +# Environment Variables: |
| 16 | +# BENCH_NAME Bench target name. Default: table_copies |
| 17 | +# PACKAGE Cargo package. Default: etl-benchmarks |
| 18 | +# TEMPLATE Instruments template (Allocations|Leaks|VM Tracker|Time Profiler). Default: Allocations |
| 19 | +# OPEN_TRACE Open Instruments UI after run (true|false). Default: false |
| 20 | +# RUN_LABEL Run name label for trace. Default: auto timestamped |
| 21 | +# TRACE_DIR Output directory for traces. Default: target/instruments |
| 22 | +# LOG_TARGET Benchmark logs target (terminal|file). Default: terminal |
| 23 | +# DESTINATION Destination (null|big-query). Default: null |
| 24 | +# |
| 25 | +# Database connection (same defaults as benchmark.sh / prepare_tpcc.sh): |
| 26 | +# POSTGRES_USER Default: postgres |
| 27 | +# POSTGRES_PASSWORD Default: postgres |
| 28 | +# POSTGRES_DB Default: bench |
| 29 | +# POSTGRES_PORT Default: 5430 |
| 30 | +# POSTGRES_HOST Default: localhost |
| 31 | +# Benchmark params: |
| 32 | +# PUBLICATION_NAME Default: bench_pub |
| 33 | +# BATCH_MAX_SIZE Default: 1000000 |
| 34 | +# BATCH_MAX_FILL_MS Default: 10000 |
| 35 | +# MAX_TABLE_SYNC_WORKERS Default: 8 |
| 36 | +# BQ_PROJECT_ID, BQ_DATASET_ID, BQ_SA_KEY_FILE (if DESTINATION=big-query) |
| 37 | +# |
| 38 | +# Examples: |
| 39 | +# # Profile allocations for the default bench (null destination) |
| 40 | +# ./etl-benchmarks/scripts/mem_profile.sh |
| 41 | +# |
| 42 | +# # Open the Instruments UI afterwards |
| 43 | +# OPEN_TRACE=true ./etl-benchmarks/scripts/mem_profile.sh |
| 44 | +# |
| 45 | +# # Use Leaks template and skip prepare |
| 46 | +# TEMPLATE="Leaks" ./etl-benchmarks/scripts/mem_profile.sh |
| 47 | +# |
| 48 | +# # Profile BigQuery destination |
| 49 | +# DESTINATION=big-query \ |
| 50 | +# BQ_PROJECT_ID=my-project \ |
| 51 | +# BQ_DATASET_ID=my_dataset \ |
| 52 | +# BQ_SA_KEY_FILE=/path/to/sa-key.json \ |
| 53 | +# ./etl-benchmarks/scripts/mem_profile.sh |
| 54 | + |
| 55 | +# --- Checks --- |
| 56 | +# Require macOS |
| 57 | +if [[ "$(uname -s)" != "Darwin" ]]; then |
| 58 | + echo "❌ This script requires macOS (Apple Instruments)." >&2 |
| 59 | + exit 1 |
| 60 | +fi |
| 61 | + |
| 62 | +if ! command -v cargo-instruments >/dev/null 2>&1; then |
| 63 | + echo "❌ cargo-instruments not found. Install with: cargo install cargo-instruments" >&2 |
| 64 | + exit 1 |
| 65 | +fi |
| 66 | + |
| 67 | +# Ensure xctrace is available (part of full Xcode, not just CLT) |
| 68 | +if ! xcrun --find xctrace >/dev/null 2>&1; then |
| 69 | + cat >&2 << 'EOF' |
| 70 | +❌ xctrace not found. |
| 71 | +
|
| 72 | +Apple's xctrace is part of full Xcode (v12+). To install and make it available: |
| 73 | + 1) Install Xcode from the App Store (not only Command Line Tools). |
| 74 | + 2) Point the developer dir to Xcode: |
| 75 | + sudo xcode-select -s /Applications/Xcode.app/Contents/Developer |
| 76 | + 3) Run first-launch setup and accept the license: |
| 77 | + sudo xcodebuild -runFirstLaunch |
| 78 | + 4) Verify: |
| 79 | + xcrun --find xctrace |
| 80 | +
|
| 81 | +After installing, rerun this script. |
| 82 | +EOF |
| 83 | + exit 1 |
| 84 | +fi |
| 85 | + |
| 86 | +# --- Config --- |
| 87 | +BENCH_NAME="${BENCH_NAME:=table_copies}" |
| 88 | +PACKAGE="${PACKAGE:=etl-benchmarks}" |
| 89 | +TEMPLATE="${TEMPLATE:=Allocations}" |
| 90 | +OPEN_TRACE="${OPEN_TRACE:=false}" |
| 91 | +TRACE_DIR="${TRACE_DIR:=target/instruments}" |
| 92 | +RUN_LABEL="${RUN_LABEL:=etl-benchmarks-${TEMPLATE// /-}-$(date +%Y%m%d%H%M%S)}" |
| 93 | + |
| 94 | +# Database defaults |
| 95 | +DB_USER="${POSTGRES_USER:=postgres}" |
| 96 | +DB_PASSWORD="${POSTGRES_PASSWORD:=postgres}" |
| 97 | +DB_NAME="${POSTGRES_DB:=bench}" |
| 98 | +DB_PORT="${POSTGRES_PORT:=5430}" |
| 99 | +DB_HOST="${POSTGRES_HOST:=localhost}" |
| 100 | + |
| 101 | +# Benchmark defaults |
| 102 | +PUBLICATION_NAME="${PUBLICATION_NAME:=bench_pub}" |
| 103 | +BATCH_MAX_SIZE="${BATCH_MAX_SIZE:=1000000}" |
| 104 | +BATCH_MAX_FILL_MS="${BATCH_MAX_FILL_MS:=10000}" |
| 105 | +MAX_TABLE_SYNC_WORKERS="${MAX_TABLE_SYNC_WORKERS:=8}" |
| 106 | +LOG_TARGET="${LOG_TARGET:=terminal}" |
| 107 | +DESTINATION="${DESTINATION:=null}" |
| 108 | + |
| 109 | +# Validate destination |
| 110 | +if [[ "${DESTINATION}" != "null" && "${DESTINATION}" != "big-query" ]]; then |
| 111 | + echo "❌ Invalid DESTINATION='${DESTINATION}'. Supported: null, big-query" >&2 |
| 112 | + exit 1 |
| 113 | +fi |
| 114 | +if [[ "${LOG_TARGET}" != "terminal" && "${LOG_TARGET}" != "file" ]]; then |
| 115 | + echo "❌ Invalid LOG_TARGET='${LOG_TARGET}'. Supported: terminal, file" >&2 |
| 116 | + exit 1 |
| 117 | +fi |
| 118 | + |
| 119 | +if [[ "${DESTINATION}" == "big-query" ]]; then |
| 120 | + : "${BQ_PROJECT_ID:?❌ BQ_PROJECT_ID is required for DESTINATION=big-query}" |
| 121 | + : "${BQ_DATASET_ID:?❌ BQ_DATASET_ID is required for DESTINATION=big-query}" |
| 122 | + : "${BQ_SA_KEY_FILE:?❌ BQ_SA_KEY_FILE is required for DESTINATION=big-query}" |
| 123 | + if [[ ! -f "${BQ_SA_KEY_FILE}" ]]; then |
| 124 | + echo "❌ BigQuery SA key file not found: ${BQ_SA_KEY_FILE}" >&2 |
| 125 | + exit 1 |
| 126 | + fi |
| 127 | +fi |
| 128 | + |
| 129 | +echo "🧪 Memory profiling with cargo-instruments" |
| 130 | +echo " Template: ${TEMPLATE}" |
| 131 | +echo " Package: ${PACKAGE}" |
| 132 | +echo " Bench: ${BENCH_NAME}" |
| 133 | +echo " Label: ${RUN_LABEL}" |
| 134 | +echo " Trace dir:${TRACE_DIR}" |
| 135 | +echo " Open UI: ${OPEN_TRACE}" |
| 136 | +echo " Dest: ${DESTINATION}" |
| 137 | + |
| 138 | +# Build common bench arg tail |
| 139 | +build_bench_args() { |
| 140 | + local args=("--log-target" "${LOG_TARGET}") |
| 141 | + args+=("run" "--host" "${DB_HOST}" "--port" "${DB_PORT}" "--database" "${DB_NAME}" "--username" "${DB_USER}") |
| 142 | + if [[ -n "${DB_PASSWORD}" ]]; then |
| 143 | + args+=("--password" "${DB_PASSWORD}") |
| 144 | + fi |
| 145 | + args+=("--publication-name" "${PUBLICATION_NAME}" "--batch-max-size" "${BATCH_MAX_SIZE}" "--batch-max-fill-ms" "${BATCH_MAX_FILL_MS}" "--max-table-sync-workers" "${MAX_TABLE_SYNC_WORKERS}") |
| 146 | + |
| 147 | + # For table_copies we require explicit table ids; fetch via psql like benchmark.sh |
| 148 | + echo "🔍 Fetching TPC-C table OIDs..." >&2 |
| 149 | + local oids |
| 150 | + if ! command -v psql >/dev/null 2>&1; then |
| 151 | + echo "❌ psql not found; required to query table IDs." >&2 |
| 152 | + exit 1 |
| 153 | + fi |
| 154 | + oids=$(PGPASSWORD="${DB_PASSWORD}" psql -h "${DB_HOST}" -U "${DB_USER}" -p "${DB_PORT}" -d "${DB_NAME}" -tAc " |
| 155 | + select string_agg(oid::text, ',') |
| 156 | + from pg_class |
| 157 | + where relname in ('customer','district','item','new_order','order_line','orders','stock','warehouse') |
| 158 | + and relkind = 'r'; |
| 159 | + " 2>/dev/null || true) |
| 160 | + if [[ -z "${oids}" ]]; then |
| 161 | + echo "❌ Could not retrieve table IDs. Ensure TPC-C tables exist. Run etl-benchmarks/scripts/prepare_tpcc.sh first." >&2 |
| 162 | + exit 1 |
| 163 | + fi |
| 164 | + echo "✅ Table OIDs: ${oids}" >&2 |
| 165 | + args+=("--table-ids" "${oids}") |
| 166 | + |
| 167 | + args+=("--destination" "${DESTINATION}") |
| 168 | + if [[ "${DESTINATION}" == "big-query" ]]; then |
| 169 | + args+=("--bq-project-id" "${BQ_PROJECT_ID}" "--bq-dataset-id" "${BQ_DATASET_ID}" "--bq-sa-key-file" "${BQ_SA_KEY_FILE}") |
| 170 | + fi |
| 171 | + printf '%q ' "${args[@]}" |
| 172 | +} |
| 173 | + |
| 174 | +# Run Instruments on the bench's run phase |
| 175 | +echo "🚀 Launching cargo instruments (${TEMPLATE})…" |
| 176 | +mkdir -p "${TRACE_DIR}" |
| 177 | + |
| 178 | +# Use explicit .trace path to encode label in filename |
| 179 | +TRACE_PATH="${TRACE_DIR}/${RUN_LABEL}.trace" |
| 180 | + |
| 181 | +INSTR_ARGS=(cargo instruments -t "${TEMPLATE}" --package "${PACKAGE}" --bench "${BENCH_NAME}" --output "${TRACE_PATH}") |
| 182 | +# cargo-instruments opens the trace by default; add --no-open when OPEN_TRACE=false |
| 183 | +if [[ "${OPEN_TRACE}" != "true" ]]; then |
| 184 | + INSTR_ARGS+=(--no-open) |
| 185 | +fi |
| 186 | + |
| 187 | +BENCH_TAIL=$(build_bench_args) |
| 188 | +echo "$ ${INSTR_ARGS[*]} -- ${BENCH_TAIL}" |
| 189 | +eval "${INSTR_ARGS[*]}" -- ${BENCH_TAIL} |
| 190 | + |
| 191 | +echo "✨ Trace saved to: ${TRACE_PATH}" |
0 commit comments