Skip to content

Commit 18a91de

Browse files
committed
feat: Enhance benchmark infrastructure with profiling and parallelism matrix
Changes to benchmark-docker.sh: - Use GUIDES_PROFILING=1 for accurate memory metrics via memory_get_peak_usage() - Add 4th parameter for parallel mode: sequential, auto, or number (e.g., 16) - Include parallel_mode in result filenames and JSON output - Memory now sourced from PHP profiling instead of /usr/bin/time New benchmark-main.sh: - Dedicated script for benchmarking main branch with official container - Uses ghcr.io/typo3-documentation/render-guides:latest - Memory via /usr/bin/time (PHP profiling not available in official image) New run-full-matrix.sh: - Runs complete benchmark matrix: * main branch (official container) * feature branch: sequential (--parallel-workers=-1) * feature branch: auto (--parallel-workers=0) * feature branch: 16 workers (--parallel-workers=16) - Supports small, large, changelog, or 'all' doc types Usage examples: ./benchmark/benchmark-docker.sh cold 3 large sequential ./benchmark/benchmark-docker.sh cold 3 large auto ./benchmark/benchmark-docker.sh cold 3 large 16 ./benchmark/run-full-matrix.sh large
1 parent 1772643 commit 18a91de

File tree

3 files changed

+426
-20
lines changed

3 files changed

+426
-20
lines changed

benchmark/benchmark-docker.sh

Lines changed: 59 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,11 @@
22
#
33
# Run benchmarks inside Docker container for reproducibility
44
#
5-
# Usage: ./benchmark/benchmark-docker.sh [scenario] [runs] [docs-type]
5+
# Usage: ./benchmark/benchmark-docker.sh [scenario] [runs] [docs-type] [parallel-mode]
66
#
77
# Scenarios: cold, warm, partial, all
8-
# Docs: small (Documentation-rendertest), large (TYPO3CMS-Reference-CoreApi)
8+
# Docs: small (Documentation-rendertest), large (TYPO3CMS-Reference-CoreApi), changelog
9+
# Parallel modes: auto (default), sequential, 16, or any number
910
#
1011

1112
set -euo pipefail
@@ -17,6 +18,24 @@ RESULTS_DIR="$SCRIPT_DIR/results"
1718
SCENARIO="${1:-cold}"
1819
RUNS="${2:-3}"
1920
DOCS_TYPE="${3:-small}"
21+
PARALLEL_MODE="${4:-auto}"
22+
23+
# Convert parallel mode to --parallel-workers value
24+
case "$PARALLEL_MODE" in
25+
auto)
26+
PARALLEL_WORKERS="0"
27+
PARALLEL_LABEL="auto"
28+
;;
29+
sequential|seq|none)
30+
PARALLEL_WORKERS="-1"
31+
PARALLEL_LABEL="sequential"
32+
;;
33+
*)
34+
# Assume it's a number
35+
PARALLEL_WORKERS="$PARALLEL_MODE"
36+
PARALLEL_LABEL="p${PARALLEL_MODE}"
37+
;;
38+
esac
2039

2140
BRANCH=$(cd "$PROJECT_DIR" && git rev-parse --abbrev-ref HEAD 2>/dev/null | sed 's/\//_/g' || echo "unknown")
2241
COMMIT=$(cd "$PROJECT_DIR" && git rev-parse --short HEAD 2>/dev/null || echo "unknown")
@@ -82,21 +101,22 @@ clean_caches() {
82101
# Clean shared cache directory (Twig cache, inventory cache, etc.)
83102
rm -rf /tmp/typo3-guides-benchmark-cache/* 2>/dev/null || true
84103
# Use docker to clean root-owned files from previous runs
85-
docker run --rm -v /tmp:/tmp alpine sh -c "rm -rf /tmp/typo3-guides-* /tmp/benchmark-output /tmp/benchmark-log*" 2>/dev/null || true
104+
docker run --rm -v /tmp:/tmp alpine sh -c "rm -rf /tmp/typo3-guides-* /tmp/benchmark-output /tmp/benchmark-log* /tmp/benchmark-profiling*" 2>/dev/null || true
86105
# Remove incremental rendering cache from docs directory (if stored there)
87106
rm -f "$PROJECT_DIR/$DOCS_INPUT/_build_meta.json" 2>/dev/null || true
88107
# Remove .cache directory used by incremental rendering
89108
rm -rf "$PROJECT_DIR/.cache" 2>/dev/null || true
90109
}
91110

92-
# Run single benchmark with CPU and memory metrics via /usr/bin/time inside container
111+
# Run single benchmark with profiling for accurate memory metrics
93112
# Pass "fresh" as second arg to force clean output directory
94113
run_benchmark_simple() {
95114
local run_num=$1
96115
local fresh_output="${2:-no}"
97116
local output_dir="/tmp/benchmark-output"
98117
local log_file="/tmp/benchmark-log-$run_num.txt"
99118
local time_file="/tmp/benchmark-time-$run_num.txt"
119+
local profiling_file="/tmp/benchmark-profiling-$run_num.json"
100120

101121
# Only clean output dir if fresh is requested (cold scenario)
102122
if [ "$fresh_output" = "fresh" ]; then
@@ -110,37 +130,52 @@ run_benchmark_simple() {
110130
config_arg="--config=$DOCS_INPUT"
111131
fi
112132

113-
# Run container with /usr/bin/time -v INSIDE the container for accurate metrics
114-
# Note: project mounted read-write so incremental rendering cache can be written
115133
# Mount shared /tmp for Twig cache persistence between warm runs
116134
local shared_tmp="/tmp/typo3-guides-benchmark-cache"
117135
mkdir -p "$shared_tmp"
118136

119-
# Run time inside container, output time stats to stderr which we capture
137+
# Run with:
138+
# - /usr/bin/time -v for wall time and CPU%
139+
# - GUIDES_PROFILING=1 for PHP-reported memory via memory_get_peak_usage()
140+
# - GUIDES_PROFILING_OUTPUT for JSON output
120141
docker run --rm \
121142
--user "$(id -u):$(id -g)" \
122143
-v "$PROJECT_DIR:/project" \
123144
-v "$output_dir:/output" \
124145
-v "$shared_tmp:/tmp" \
146+
-e GUIDES_PROFILING=1 \
147+
-e GUIDES_PROFILING_OUTPUT="/tmp/profiling.json" \
125148
--entrypoint /usr/bin/time \
126149
"$IMAGE_TAG" \
127-
-v php /opt/guides/vendor/bin/guides --no-progress $config_arg --output=/output "$DOCS_INPUT" \
150+
-v php /opt/guides/vendor/bin/guides --no-progress $config_arg --output=/output --parallel-workers="$PARALLEL_WORKERS" "$DOCS_INPUT" \
128151
> "$log_file" 2> "$time_file"
129152
local docker_exit=$?
130153

131-
# Parse /usr/bin/time output for metrics (GNU time format)
132-
local elapsed user_time sys_time peak_memory_kb cpu_percent
154+
# Copy profiling output from container's /tmp (which is shared_tmp)
155+
cp "$shared_tmp/profiling.json" "$profiling_file" 2>/dev/null || true
156+
157+
# Parse /usr/bin/time output for wall time and CPU%
158+
local elapsed user_time sys_time cpu_percent
133159
elapsed=$(grep "Elapsed (wall clock)" "$time_file" | sed 's/.*: //' | awk -F: '{if (NF==3) print $1*3600+$2*60+$3; else if (NF==2) print $1*60+$2; else print $1}')
134160
user_time=$(grep "User time" "$time_file" | awk '{print $NF}')
135161
sys_time=$(grep "System time" "$time_file" | awk '{print $NF}')
136-
peak_memory_kb=$(grep "Maximum resident set size" "$time_file" | awk '{print $NF}')
137162
cpu_percent=$(grep "Percent of CPU" "$time_file" | sed 's/.*: //' | tr -d '%')
138163

139-
# Convert to MB and calculate totals
140-
local peak_memory_mb cpu_time
141-
peak_memory_mb=$(echo "scale=1; ${peak_memory_kb:-0} / 1024" | bc)
164+
local cpu_time
142165
cpu_time=$(echo "scale=2; ${user_time:-0} + ${sys_time:-0}" | bc)
143166

167+
# Get memory from PHP profiling (accurate memory_get_peak_usage)
168+
local peak_memory_mb
169+
if [ -f "$profiling_file" ]; then
170+
peak_memory_mb=$(jq -r '.memory_mb.peak // 0' "$profiling_file" 2>/dev/null || echo "0")
171+
else
172+
# Fallback to /usr/bin/time if profiling not available
173+
local peak_memory_kb
174+
peak_memory_kb=$(grep "Maximum resident set size" "$time_file" | awk '{print $NF}')
175+
peak_memory_mb=$(echo "scale=1; ${peak_memory_kb:-0} / 1024" | bc)
176+
log_warn "Profiling output not found, using /usr/bin/time for memory (less accurate)"
177+
fi
178+
144179
# Count output files
145180
local file_count
146181
file_count=$(find "$output_dir" -name "*.html" 2>/dev/null | wc -l | tr -d ' ')
@@ -159,7 +194,7 @@ run_scenario() {
159194
local memories=()
160195
local files=0
161196

162-
log_info "Running scenario: $scenario ($RUNS runs, docs: $DOCS_TYPE)"
197+
log_info "Running scenario: $scenario ($RUNS runs, docs: $DOCS_TYPE, parallel: $PARALLEL_LABEL)"
163198

164199
case "$scenario" in
165200
cold)
@@ -254,16 +289,18 @@ run_scenario() {
254289
local cpu_pct_avg=$(echo "scale=0; $cpu_pct_sum / ${#cpu_percents[@]}" | bc)
255290
local mem_avg=$(echo "scale=1; $mem_sum / ${#memories[@]}" | bc)
256291

257-
# Save to JSON
292+
# Save to JSON - include parallel mode in filename
258293
mkdir -p "$RESULTS_DIR"
259-
local result_file="$RESULTS_DIR/${BRANCH}_${scenario}_${DOCS_TYPE}_${TIMESTAMP}.json"
294+
local result_file="$RESULTS_DIR/${BRANCH}_${PARALLEL_LABEL}_${scenario}_${DOCS_TYPE}_${TIMESTAMP}.json"
260295

261296
cat > "$result_file" << EOF
262297
{
263298
"branch": "$BRANCH",
264299
"commit": "$COMMIT",
265300
"scenario": "$scenario",
266301
"docs_type": "$DOCS_TYPE",
302+
"parallel_mode": "$PARALLEL_LABEL",
303+
"parallel_workers": "$PARALLEL_WORKERS",
267304
"timestamp": "$TIMESTAMP",
268305
"runs": $RUNS,
269306
"metrics": {
@@ -281,7 +318,8 @@ run_scenario() {
281318
"memory": {
282319
"avg_mb": $mem_avg,
283320
"min_mb": $mem_min,
284-
"max_mb": $mem_max
321+
"max_mb": $mem_max,
322+
"source": "php_profiling"
285323
},
286324
"files_rendered": $files
287325
},
@@ -296,10 +334,10 @@ EOF
296334

297335
# Print summary
298336
echo ""
299-
echo "=== $scenario Summary ==="
337+
echo "=== $scenario Summary (parallel: $PARALLEL_LABEL) ==="
300338
echo " Wall Time: ${time_avg}s (min: ${time_min}s, max: ${time_max}s)"
301339
echo " CPU Time: ${cpu_avg}s (~${cpu_pct_avg}% utilization)"
302-
echo " Memory: ${mem_avg}MB peak"
340+
echo " Memory: ${mem_avg}MB peak (from PHP profiling)"
303341
echo " Files: $files"
304342
echo ""
305343
}
@@ -309,6 +347,7 @@ echo "============================================"
309347
echo "Benchmark: $SCENARIO"
310348
echo "Branch: $BRANCH ($COMMIT)"
311349
echo "Docs: $DOCS_TYPE ($DOCS_INPUT)"
350+
echo "Parallel: $PARALLEL_LABEL (--parallel-workers=$PARALLEL_WORKERS)"
312351
echo "Runs: $RUNS"
313352
echo "============================================"
314353
echo ""

0 commit comments

Comments
 (0)