Skip to content

Commit cb1edcc

Browse files
authored
feat(profiling): Add script to perform memory profiling (#343)
1 parent 8b4c98b commit cb1edcc

File tree

3 files changed

+195
-1
lines changed

3 files changed

+195
-1
lines changed

etl-benchmarks/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,6 @@ tracing = { workspace = true, default-features = true }
2626
[[bench]]
2727
name = "table_copies"
2828
harness = false
29+
30+
[profile.bench]
31+
debug = true

etl-benchmarks/benches/table_copies.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ enum Commands {
110110
#[arg(long)]
111111
bq_max_staleness_mins: Option<u16>,
112112
/// BigQuery maximum concurrent streams (optional)
113-
#[arg(long, default_value = "1")]
113+
#[arg(long, default_value = "32")]
114114
bq_max_concurrent_streams: usize,
115115
},
116116
/// Prepare the benchmark environment by cleaning up replication slots
Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
#!/usr/bin/env bash
2+
set -euo pipefail
3+
4+
# Memory Profiling for Benchmarks (cargo-instruments)
5+
#
6+
# Runs the `etl-benchmarks` crate's benchmark under Apple Instruments for
7+
# memory profiling. Defaults to the "Allocations" template and the
8+
# `table_copies` bench target.
9+
#
10+
# Prerequisites:
11+
# - macOS with Xcode Command Line Tools
12+
# - cargo-instruments (install: `cargo install cargo-instruments`)
13+
# - Postgres reachable per your env (same as other bench scripts)
14+
#
15+
# Environment Variables:
16+
# BENCH_NAME Bench target name. Default: table_copies
17+
# PACKAGE Cargo package. Default: etl-benchmarks
18+
# TEMPLATE Instruments template (Allocations|Leaks|VM Tracker|Time Profiler). Default: Allocations
19+
# OPEN_TRACE Open Instruments UI after run (true|false). Default: false
20+
# RUN_LABEL Run name label for trace. Default: auto timestamped
21+
# TRACE_DIR Output directory for traces. Default: target/instruments
22+
# LOG_TARGET Benchmark logs target (terminal|file). Default: terminal
23+
# DESTINATION Destination (null|big-query). Default: null
24+
#
25+
# Database connection (same defaults as benchmark.sh / prepare_tpcc.sh):
26+
# POSTGRES_USER Default: postgres
27+
# POSTGRES_PASSWORD Default: postgres
28+
# POSTGRES_DB Default: bench
29+
# POSTGRES_PORT Default: 5430
30+
# POSTGRES_HOST Default: localhost
31+
# Benchmark params:
32+
# PUBLICATION_NAME Default: bench_pub
33+
# BATCH_MAX_SIZE Default: 1000000
34+
# BATCH_MAX_FILL_MS Default: 10000
35+
# MAX_TABLE_SYNC_WORKERS Default: 8
36+
# BQ_PROJECT_ID, BQ_DATASET_ID, BQ_SA_KEY_FILE (if DESTINATION=big-query)
37+
#
38+
# Examples:
39+
# # Profile allocations for the default bench (null destination)
40+
# ./etl-benchmarks/scripts/mem_profile.sh
41+
#
42+
# # Open the Instruments UI afterwards
43+
# OPEN_TRACE=true ./etl-benchmarks/scripts/mem_profile.sh
44+
#
45+
# # Use Leaks template and skip prepare
46+
# TEMPLATE="Leaks" ./etl-benchmarks/scripts/mem_profile.sh
47+
#
48+
# # Profile BigQuery destination
49+
# DESTINATION=big-query \
50+
# BQ_PROJECT_ID=my-project \
51+
# BQ_DATASET_ID=my_dataset \
52+
# BQ_SA_KEY_FILE=/path/to/sa-key.json \
53+
# ./etl-benchmarks/scripts/mem_profile.sh
54+
55+
# --- Checks ---
56+
# Require macOS
57+
if [[ "$(uname -s)" != "Darwin" ]]; then
58+
echo "❌ This script requires macOS (Apple Instruments)." >&2
59+
exit 1
60+
fi
61+
62+
if ! command -v cargo-instruments >/dev/null 2>&1; then
63+
echo "❌ cargo-instruments not found. Install with: cargo install cargo-instruments" >&2
64+
exit 1
65+
fi
66+
67+
# Ensure xctrace is available (part of full Xcode, not just CLT)
68+
if ! xcrun --find xctrace >/dev/null 2>&1; then
69+
cat >&2 << 'EOF'
70+
❌ xctrace not found.
71+
72+
Apple's xctrace is part of full Xcode (v12+). To install and make it available:
73+
1) Install Xcode from the App Store (not only Command Line Tools).
74+
2) Point the developer dir to Xcode:
75+
sudo xcode-select -s /Applications/Xcode.app/Contents/Developer
76+
3) Run first-launch setup and accept the license:
77+
sudo xcodebuild -runFirstLaunch
78+
4) Verify:
79+
xcrun --find xctrace
80+
81+
After installing, rerun this script.
82+
EOF
83+
exit 1
84+
fi
85+
86+
# --- Config ---
87+
BENCH_NAME="${BENCH_NAME:=table_copies}"
88+
PACKAGE="${PACKAGE:=etl-benchmarks}"
89+
TEMPLATE="${TEMPLATE:=Allocations}"
90+
OPEN_TRACE="${OPEN_TRACE:=false}"
91+
TRACE_DIR="${TRACE_DIR:=target/instruments}"
92+
RUN_LABEL="${RUN_LABEL:=etl-benchmarks-${TEMPLATE// /-}-$(date +%Y%m%d%H%M%S)}"
93+
94+
# Database defaults
95+
DB_USER="${POSTGRES_USER:=postgres}"
96+
DB_PASSWORD="${POSTGRES_PASSWORD:=postgres}"
97+
DB_NAME="${POSTGRES_DB:=bench}"
98+
DB_PORT="${POSTGRES_PORT:=5430}"
99+
DB_HOST="${POSTGRES_HOST:=localhost}"
100+
101+
# Benchmark defaults
102+
PUBLICATION_NAME="${PUBLICATION_NAME:=bench_pub}"
103+
BATCH_MAX_SIZE="${BATCH_MAX_SIZE:=1000000}"
104+
BATCH_MAX_FILL_MS="${BATCH_MAX_FILL_MS:=10000}"
105+
MAX_TABLE_SYNC_WORKERS="${MAX_TABLE_SYNC_WORKERS:=8}"
106+
LOG_TARGET="${LOG_TARGET:=terminal}"
107+
DESTINATION="${DESTINATION:=null}"
108+
109+
# Validate destination
110+
if [[ "${DESTINATION}" != "null" && "${DESTINATION}" != "big-query" ]]; then
111+
echo "❌ Invalid DESTINATION='${DESTINATION}'. Supported: null, big-query" >&2
112+
exit 1
113+
fi
114+
if [[ "${LOG_TARGET}" != "terminal" && "${LOG_TARGET}" != "file" ]]; then
115+
echo "❌ Invalid LOG_TARGET='${LOG_TARGET}'. Supported: terminal, file" >&2
116+
exit 1
117+
fi
118+
119+
if [[ "${DESTINATION}" == "big-query" ]]; then
120+
: "${BQ_PROJECT_ID:?❌ BQ_PROJECT_ID is required for DESTINATION=big-query}"
121+
: "${BQ_DATASET_ID:?❌ BQ_DATASET_ID is required for DESTINATION=big-query}"
122+
: "${BQ_SA_KEY_FILE:?❌ BQ_SA_KEY_FILE is required for DESTINATION=big-query}"
123+
if [[ ! -f "${BQ_SA_KEY_FILE}" ]]; then
124+
echo "❌ BigQuery SA key file not found: ${BQ_SA_KEY_FILE}" >&2
125+
exit 1
126+
fi
127+
fi
128+
129+
echo "🧪 Memory profiling with cargo-instruments"
130+
echo " Template: ${TEMPLATE}"
131+
echo " Package: ${PACKAGE}"
132+
echo " Bench: ${BENCH_NAME}"
133+
echo " Label: ${RUN_LABEL}"
134+
echo " Trace dir:${TRACE_DIR}"
135+
echo " Open UI: ${OPEN_TRACE}"
136+
echo " Dest: ${DESTINATION}"
137+
138+
# Build common bench arg tail
139+
build_bench_args() {
140+
local args=("--log-target" "${LOG_TARGET}")
141+
args+=("run" "--host" "${DB_HOST}" "--port" "${DB_PORT}" "--database" "${DB_NAME}" "--username" "${DB_USER}")
142+
if [[ -n "${DB_PASSWORD}" ]]; then
143+
args+=("--password" "${DB_PASSWORD}")
144+
fi
145+
args+=("--publication-name" "${PUBLICATION_NAME}" "--batch-max-size" "${BATCH_MAX_SIZE}" "--batch-max-fill-ms" "${BATCH_MAX_FILL_MS}" "--max-table-sync-workers" "${MAX_TABLE_SYNC_WORKERS}")
146+
147+
# For table_copies we require explicit table ids; fetch via psql like benchmark.sh
148+
echo "🔍 Fetching TPC-C table OIDs..." >&2
149+
local oids
150+
if ! command -v psql >/dev/null 2>&1; then
151+
echo "❌ psql not found; required to query table IDs." >&2
152+
exit 1
153+
fi
154+
oids=$(PGPASSWORD="${DB_PASSWORD}" psql -h "${DB_HOST}" -U "${DB_USER}" -p "${DB_PORT}" -d "${DB_NAME}" -tAc "
155+
select string_agg(oid::text, ',')
156+
from pg_class
157+
where relname in ('customer','district','item','new_order','order_line','orders','stock','warehouse')
158+
and relkind = 'r';
159+
" 2>/dev/null || true)
160+
if [[ -z "${oids}" ]]; then
161+
echo "❌ Could not retrieve table IDs. Ensure TPC-C tables exist. Run etl-benchmarks/scripts/prepare_tpcc.sh first." >&2
162+
exit 1
163+
fi
164+
echo "✅ Table OIDs: ${oids}" >&2
165+
args+=("--table-ids" "${oids}")
166+
167+
args+=("--destination" "${DESTINATION}")
168+
if [[ "${DESTINATION}" == "big-query" ]]; then
169+
args+=("--bq-project-id" "${BQ_PROJECT_ID}" "--bq-dataset-id" "${BQ_DATASET_ID}" "--bq-sa-key-file" "${BQ_SA_KEY_FILE}")
170+
fi
171+
printf '%q ' "${args[@]}"
172+
}
173+
174+
# Run Instruments on the bench's run phase
175+
echo "🚀 Launching cargo instruments (${TEMPLATE})…"
176+
mkdir -p "${TRACE_DIR}"
177+
178+
# Use explicit .trace path to encode label in filename
179+
TRACE_PATH="${TRACE_DIR}/${RUN_LABEL}.trace"
180+
181+
INSTR_ARGS=(cargo instruments -t "${TEMPLATE}" --package "${PACKAGE}" --bench "${BENCH_NAME}" --output "${TRACE_PATH}")
182+
# cargo-instruments opens the trace by default; add --no-open when OPEN_TRACE=false
183+
if [[ "${OPEN_TRACE}" != "true" ]]; then
184+
INSTR_ARGS+=(--no-open)
185+
fi
186+
187+
BENCH_TAIL=$(build_bench_args)
188+
echo "$ ${INSTR_ARGS[*]} -- ${BENCH_TAIL}"
189+
eval "${INSTR_ARGS[*]}" -- ${BENCH_TAIL}
190+
191+
echo "✨ Trace saved to: ${TRACE_PATH}"

0 commit comments

Comments
 (0)