TYPO3-Documentation
diff --git a/‎benchmark/benchmark-docker.sh‎
Lines changed: 44 additions & 112 deletions b/‎benchmark/benchmark-docker.sh‎
Lines changed: 44 additions & 112 deletions
diff --git a/‎docs/.nojekyll‎ b/‎docs/.nojekyll‎
@@ -2,11 +2,10 @@
 #
 # Run benchmarks inside Docker container for reproducibility
 #
-# Usage: ./benchmark/benchmark-docker.sh [scenario] [runs] [docs-type] [parallel-mode]
+# Usage: ./benchmark/benchmark-docker.sh [scenario] [runs] [docs-type]
 #
 # Scenarios: cold, warm, partial, all
-# Docs: small (Documentation-rendertest), large (TYPO3CMS-Reference-CoreApi), changelog
-# Parallel modes: auto (default), sequential, 16, or any number
+# Docs: small (Documentation-rendertest), large (TYPO3CMS-Reference-CoreApi)
 #
 
 set -euo pipefail
@@ -18,24 +17,6 @@ RESULTS_DIR="$SCRIPT_DIR/results"
 SCENARIO="${1:-cold}"
 RUNS="${2:-3}"
 DOCS_TYPE="${3:-small}"
-PARALLEL_MODE="${4:-auto}"
-
-# Convert parallel mode to --parallel-workers value
-case "$PARALLEL_MODE" in
-    auto)
-        PARALLEL_WORKERS="0"
-        PARALLEL_LABEL="auto"
-        ;;
-    sequential|seq|none)
-        PARALLEL_WORKERS="-1"
-        PARALLEL_LABEL="sequential"
-        ;;
-    *)
-        # Assume it's a number
-        PARALLEL_WORKERS="$PARALLEL_MODE"
-        PARALLEL_LABEL="p${PARALLEL_MODE}"
-        ;;
-esac
 
 BRANCH=$(cd "$PROJECT_DIR" && git rev-parse --abbrev-ref HEAD 2>/dev/null | sed 's/\//_/g' || echo "unknown")
 COMMIT=$(cd "$PROJECT_DIR" && git rev-parse --short HEAD 2>/dev/null || echo "unknown")
@@ -67,14 +48,6 @@ case "$DOCS_TYPE" in
             "$SCRIPT_DIR/download-test-docs.sh" TYPO3CMS-Reference-CoreApi
         fi
         ;;
-    changelog)
-        DOCS_INPUT="benchmark/test-docs/TYPO3-Core-Changelog/typo3/sysext/core/Documentation"
-        # Ensure changelog docs are downloaded
-        if [ ! -d "$PROJECT_DIR/benchmark/test-docs/TYPO3-Core-Changelog" ]; then
-            log_info "Downloading TYPO3 Core Changelog documentation..."
-            "$SCRIPT_DIR/download-test-docs.sh" TYPO3-Core-Changelog
-        fi
-        ;;
     *)
         # Assume it's a custom path
         DOCS_INPUT="$DOCS_TYPE"
@@ -101,22 +74,20 @@ clean_caches() {
     # Clean shared cache directory (Twig cache, inventory cache, etc.)
     rm -rf /tmp/typo3-guides-benchmark-cache/* 2>/dev/null || true
     # Use docker to clean root-owned files from previous runs
-    docker run --rm -v /tmp:/tmp alpine sh -c "rm -rf /tmp/typo3-guides-* /tmp/benchmark-output /tmp/benchmark-log* /tmp/benchmark-profiling*" 2>/dev/null || true
+    docker run --rm -v /tmp:/tmp alpine sh -c "rm -rf /tmp/typo3-guides-* /tmp/benchmark-output /tmp/benchmark-log*" 2>/dev/null || true
     # Remove incremental rendering cache from docs directory (if stored there)
     rm -f "$PROJECT_DIR/$DOCS_INPUT/_build_meta.json" 2>/dev/null || true
     # Remove .cache directory used by incremental rendering
     rm -rf "$PROJECT_DIR/.cache" 2>/dev/null || true
 }
 
-# Run single benchmark with profiling for accurate memory metrics
+# Run single benchmark using wall-clock timing
 # Pass "fresh" as second arg to force clean output directory
 run_benchmark_simple() {
     local run_num=$1
     local fresh_output="${2:-no}"
     local output_dir="/tmp/benchmark-output"
     local log_file="/tmp/benchmark-log-$run_num.txt"
-    local time_file="/tmp/benchmark-time-$run_num.txt"
-    local profiling_file="/tmp/benchmark-profiling-$run_num.json"
 
     # Only clean output dir if fresh is requested (cold scenario)
     if [ "$fresh_output" = "fresh" ]; then
@@ -130,71 +101,60 @@ run_benchmark_simple() {
         config_arg="--config=$DOCS_INPUT"
     fi
 
+    # Run guides with wall-clock timing
+    local start_time end_time elapsed
+    start_time=$(date +%s.%N)
+
+    # Run container - mount full project at /project, output at /output
+    # Use relative paths for input and config
+    # Use --user to match host user for output permissions
+    # Note: project mounted read-write so incremental rendering cache can be written
     # Mount shared /tmp for Twig cache persistence between warm runs
     local shared_tmp="/tmp/typo3-guides-benchmark-cache"
     mkdir -p "$shared_tmp"
-
-    # Run with:
-    # - /usr/bin/time -v for wall time and CPU%
-    # - GUIDES_PROFILING=1 for PHP-reported memory via memory_get_peak_usage()
-    # - GUIDES_PROFILING_OUTPUT for JSON output
     docker run --rm \
         --user "$(id -u):$(id -g)" \
         -v "$PROJECT_DIR:/project" \
         -v "$output_dir:/output" \
         -v "$shared_tmp:/tmp" \
-        -e GUIDES_PROFILING=1 \
-        -e GUIDES_PROFILING_OUTPUT="/tmp/profiling.json" \
-        --entrypoint /usr/bin/time \
         "$IMAGE_TAG" \
-        -v php /opt/guides/vendor/bin/guides --no-progress $config_arg --output=/output --parallel-workers="$PARALLEL_WORKERS" "$DOCS_INPUT" \
-        > "$log_file" 2> "$time_file"
+        --no-progress $config_arg --output=/output "$DOCS_INPUT" > "$log_file" 2>&1
     local docker_exit=$?
 
-    # Copy profiling output from container's /tmp (which is shared_tmp)
-    cp "$shared_tmp/profiling.json" "$profiling_file" 2>/dev/null || true
-
-    # Parse /usr/bin/time output for wall time and CPU%
-    local elapsed user_time sys_time cpu_percent
-    elapsed=$(grep "Elapsed (wall clock)" "$time_file" | sed 's/.*: //' | awk -F: '{if (NF==3) print $1*3600+$2*60+$3; else if (NF==2) print $1*60+$2; else print $1}')
-    user_time=$(grep "User time" "$time_file" | awk '{print $NF}')
-    sys_time=$(grep "System time" "$time_file" | awk '{print $NF}')
-    cpu_percent=$(grep "Percent of CPU" "$time_file" | sed 's/.*: //' | tr -d '%')
-
-    local cpu_time
-    cpu_time=$(echo "scale=2; ${user_time:-0} + ${sys_time:-0}" | bc)
-
-    # Get memory from PHP profiling (accurate memory_get_peak_usage)
-    local peak_memory_mb
-    if [ -f "$profiling_file" ]; then
-        peak_memory_mb=$(jq -r '.memory_mb.peak // 0' "$profiling_file" 2>/dev/null || echo "0")
-    else
-        # Fallback to /usr/bin/time if profiling not available
-        local peak_memory_kb
-        peak_memory_kb=$(grep "Maximum resident set size" "$time_file" | awk '{print $NF}')
-        peak_memory_mb=$(echo "scale=1; ${peak_memory_kb:-0} / 1024" | bc)
-        log_warn "Profiling output not found, using /usr/bin/time for memory (less accurate)"
-    fi
+    end_time=$(date +%s.%N)
+    elapsed=$(echo "$end_time - $start_time" | bc)
 
     # Count output files
     local file_count
     file_count=$(find "$output_dir" -name "*.html" 2>/dev/null | wc -l | tr -d ' ')
 
-    # Output JSON result with extended metrics
-    echo "{\"total_time_seconds\": $elapsed, \"cpu_time_seconds\": $cpu_time, \"cpu_percent\": ${cpu_percent:-0}, \"peak_memory_mb\": $peak_memory_mb, \"files_rendered\": $file_count}"
+    # Estimate memory from container (rough estimate based on output size)
+    local output_size_kb estimated_memory_mb
+    output_size_kb=$(du -sk "$output_dir" 2>/dev/null | awk '{print $1}')
+    output_size_kb=${output_size_kb:-0}
+    # Rough heuristic: memory is typically 50-100x output size for docs rendering
+    if [ "$output_size_kb" -gt 0 ]; then
+        estimated_memory_mb=$(echo "scale=0; ($output_size_kb * 60) / 1024" | bc)
+    else
+        estimated_memory_mb=64
+    fi
+    if [ "$estimated_memory_mb" -lt 50 ]; then
+        estimated_memory_mb=64  # minimum reasonable estimate
+    fi
+
+    # Output JSON result
+    echo "{\"total_time_seconds\": $elapsed, \"peak_memory_mb\": $estimated_memory_mb, \"files_rendered\": $file_count}"
 }
 
 # Run scenario and collect results
 run_scenario() {
     local scenario=$1
     local results=()
     local times=()
-    local cpu_times=()
-    local cpu_percents=()
     local memories=()
     local files=0
 
-    log_info "Running scenario: $scenario ($RUNS runs, docs: $DOCS_TYPE, parallel: $PARALLEL_LABEL)"
+    log_info "Running scenario: $scenario ($RUNS runs, docs: $DOCS_TYPE)"
 
     case "$scenario" in
         cold)
@@ -204,11 +164,9 @@ run_scenario() {
                 result=$(run_benchmark_simple $i fresh)
                 results+=("$result")
                 time_s=$(echo "$result" | jq -r '.total_time_seconds')
-                cpu_s=$(echo "$result" | jq -r '.cpu_time_seconds')
-                cpu_pct=$(echo "$result" | jq -r '.cpu_percent')
                 memory_mb=$(echo "$result" | jq -r '.peak_memory_mb')
                 files=$(echo "$result" | jq -r '.files_rendered')
-                log_success "    Time: ${time_s}s, CPU: ${cpu_s}s (${cpu_pct}%), Memory: ${memory_mb}MB, Files: $files"
+                log_success "    Time: ${time_s}s, Memory: ~${memory_mb}MB, Files: $files"
             done
             ;;
         warm)
@@ -222,11 +180,9 @@ run_scenario() {
                 result=$(run_benchmark_simple $i)  # Reuse existing cache
                 results+=("$result")
                 time_s=$(echo "$result" | jq -r '.total_time_seconds')
-                cpu_s=$(echo "$result" | jq -r '.cpu_time_seconds')
-                cpu_pct=$(echo "$result" | jq -r '.cpu_percent')
                 memory_mb=$(echo "$result" | jq -r '.peak_memory_mb')
                 files=$(echo "$result" | jq -r '.files_rendered')
-                log_success "    Time: ${time_s}s, CPU: ${cpu_s}s (${cpu_pct}%), Memory: ${memory_mb}MB, Files: $files"
+                log_success "    Time: ${time_s}s, Memory: ~${memory_mb}MB, Files: $files"
             done
             ;;
         partial)
@@ -247,85 +203,63 @@ run_scenario() {
                 result=$(run_benchmark_simple $i)  # Reuse existing cache
                 results+=("$result")
                 time_s=$(echo "$result" | jq -r '.total_time_seconds')
-                cpu_s=$(echo "$result" | jq -r '.cpu_time_seconds')
-                cpu_pct=$(echo "$result" | jq -r '.cpu_percent')
                 memory_mb=$(echo "$result" | jq -r '.peak_memory_mb')
                 files=$(echo "$result" | jq -r '.files_rendered')
-                log_success "    Time: ${time_s}s, CPU: ${cpu_s}s (${cpu_pct}%), Memory: ${memory_mb}MB, Files: $files"
+                log_success "    Time: ${time_s}s, Memory: ~${memory_mb}MB, Files: $files"
             done
             ;;
     esac
 
     # Extract values for aggregation
     for result in "${results[@]}"; do
         times+=($(echo "$result" | jq -r '.total_time_seconds'))
-        cpu_times+=($(echo "$result" | jq -r '.cpu_time_seconds'))
-        cpu_percents+=($(echo "$result" | jq -r '.cpu_percent'))
         memories+=($(echo "$result" | jq -r '.peak_memory_mb'))
     done
 
     # Calculate aggregates
-    local time_sum=0 cpu_sum=0 cpu_pct_sum=0 mem_sum=0
+    local time_sum=0 mem_sum=0
     local time_min=${times[0]} time_max=${times[0]}
-    local cpu_min=${cpu_times[0]} cpu_max=${cpu_times[0]}
     local mem_min=${memories[0]} mem_max=${memories[0]}
 
     for i in "${!times[@]}"; do
         time_sum=$(echo "$time_sum + ${times[$i]}" | bc)
-        cpu_sum=$(echo "$cpu_sum + ${cpu_times[$i]}" | bc)
-        cpu_pct_sum=$(echo "$cpu_pct_sum + ${cpu_percents[$i]}" | bc)
         mem_sum=$(echo "$mem_sum + ${memories[$i]}" | bc)
 
         if (( $(echo "${times[$i]} < $time_min" | bc -l) )); then time_min=${times[$i]}; fi
         if (( $(echo "${times[$i]} > $time_max" | bc -l) )); then time_max=${times[$i]}; fi
-        if (( $(echo "${cpu_times[$i]} < $cpu_min" | bc -l) )); then cpu_min=${cpu_times[$i]}; fi
-        if (( $(echo "${cpu_times[$i]} > $cpu_max" | bc -l) )); then cpu_max=${cpu_times[$i]}; fi
         if (( $(echo "${memories[$i]} < $mem_min" | bc -l) )); then mem_min=${memories[$i]}; fi
         if (( $(echo "${memories[$i]} > $mem_max" | bc -l) )); then mem_max=${memories[$i]}; fi
     done
 
     local time_avg=$(echo "scale=3; $time_sum / ${#times[@]}" | bc)
-    local cpu_avg=$(echo "scale=2; $cpu_sum / ${#cpu_times[@]}" | bc)
-    local cpu_pct_avg=$(echo "scale=0; $cpu_pct_sum / ${#cpu_percents[@]}" | bc)
     local mem_avg=$(echo "scale=1; $mem_sum / ${#memories[@]}" | bc)
 
-    # Save to JSON - include parallel mode in filename
+    # Save to JSON
     mkdir -p "$RESULTS_DIR"
-    local result_file="$RESULTS_DIR/${BRANCH}_${PARALLEL_LABEL}_${scenario}_${DOCS_TYPE}_${TIMESTAMP}.json"
+    local result_file="$RESULTS_DIR/${BRANCH}_${scenario}_${DOCS_TYPE}_${TIMESTAMP}.json"
 
     cat > "$result_file" << EOF
 {
     "branch": "$BRANCH",
     "commit": "$COMMIT",
     "scenario": "$scenario",
     "docs_type": "$DOCS_TYPE",
-    "parallel_mode": "$PARALLEL_LABEL",
-    "parallel_workers": "$PARALLEL_WORKERS",
     "timestamp": "$TIMESTAMP",
     "runs": $RUNS,
     "metrics": {
-        "wall_time": {
+        "time": {
             "avg_seconds": $time_avg,
             "min_seconds": $time_min,
             "max_seconds": $time_max
         },
-        "cpu_time": {
-            "avg_seconds": $cpu_avg,
-            "min_seconds": $cpu_min,
-            "max_seconds": $cpu_max,
-            "avg_percent": $cpu_pct_avg
-        },
         "memory": {
             "avg_mb": $mem_avg,
             "min_mb": $mem_min,
-            "max_mb": $mem_max,
-            "source": "php_profiling"
+            "max_mb": $mem_max
         },
         "files_rendered": $files
     },
-    "raw_wall_times_seconds": [$(IFS=,; echo "${times[*]}")],
-    "raw_cpu_times_seconds": [$(IFS=,; echo "${cpu_times[*]}")],
-    "raw_cpu_percents": [$(IFS=,; echo "${cpu_percents[*]}")],
+    "raw_times_seconds": [$(IFS=,; echo "${times[*]}")],
     "raw_memories_mb": [$(IFS=,; echo "${memories[*]}")]
 }
 EOF
@@ -334,10 +268,9 @@ EOF
 
     # Print summary
     echo ""
-    echo "=== $scenario Summary (parallel: $PARALLEL_LABEL) ==="
-    echo "  Wall Time:  ${time_avg}s (min: ${time_min}s, max: ${time_max}s)"
-    echo "  CPU Time:   ${cpu_avg}s (~${cpu_pct_avg}% utilization)"
-    echo "  Memory:     ${mem_avg}MB peak (from PHP profiling)"
+    echo "=== $scenario Summary ==="
+    echo "  Avg Time:   ${time_avg}s (min: ${time_min}s, max: ${time_max}s)"
+    echo "  Avg Memory: ~${mem_avg}MB (estimated)"
     echo "  Files:      $files"
     echo ""
 }
@@ -347,7 +280,6 @@ echo "============================================"
 echo "Benchmark: $SCENARIO"
 echo "Branch:    $BRANCH ($COMMIT)"
 echo "Docs:      $DOCS_TYPE ($DOCS_INPUT)"
-echo "Parallel:  $PARALLEL_LABEL (--parallel-workers=$PARALLEL_WORKERS)"
 echo "Runs:      $RUNS"
 echo "============================================"
 echo ""