refactor(benchmark): split k6 metrics into throughput and latency outputs

tobyhede · tobyhede · commit 72128361181f · 2026-01-21T16:02:25.000+11:00
Separate benchmark-action tracking for proper regression detection:
- Throughput (customBiggerIsBetter): rate in iter/s
- Latency (customSmallerIsBetter): p95 and p99 in ms

Adds p99 metric and configures summaryTrendStats for k6.
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
@@ -58,18 +58,33 @@ jobs:
           RUST_BACKTRACE: "1"
         run: mise run k6:benchmark:continuous
 
-      # Store k6 benchmark result
-      - name: Store k6 benchmark result
+      # Store k6 throughput benchmark (higher is better)
+      - name: Store k6 throughput benchmark
         uses: benchmark-action/github-action-benchmark@v1
         with:
+          name: 'k6 Throughput'
           tool: 'customBiggerIsBetter'
-          output-file-path: tests/benchmark/results/k6-output.json
+          output-file-path: tests/benchmark/results/k6-throughput.json
           github-token: ${{ secrets.GITHUB_TOKEN }}
           fail-on-alert: true
           comment-on-alert: true
           summary-always: true
           auto-push: true
-          benchmark-data-dir-path: docs/k6
+          benchmark-data-dir-path: docs/k6/throughput
+
+      # Store k6 latency benchmark (lower is better)
+      - name: Store k6 latency benchmark
+        uses: benchmark-action/github-action-benchmark@v1
+        with:
+          name: 'k6 Latency'
+          tool: 'customSmallerIsBetter'
+          output-file-path: tests/benchmark/results/k6-latency.json
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          fail-on-alert: true
+          comment-on-alert: true
+          summary-always: true
+          auto-push: true
+          benchmark-data-dir-path: docs/k6/latency
 
       # Download previous benchmark result from cache (if exists)
       - name: Download previous benchmark data
diff --git a/tests/benchmark/k6/scripts/lib/config.js b/tests/benchmark/k6/scripts/lib/config.js
@@ -35,6 +35,7 @@ export function getDefaultOptions(thresholds = {}) {
         duration: __ENV.K6_DURATION || '30s',
       },
     },
+    summaryTrendStats: ['min', 'avg', 'med', 'p(90)', 'p(95)', 'p(99)', 'max'],
     thresholds: {
       'iteration_duration': ['p(95)<500'],
       ...thresholds,
diff --git a/tests/benchmark/k6/scripts/lib/summary.js b/tests/benchmark/k6/scripts/lib/summary.js
@@ -1,5 +1,5 @@
 // benchmark-action compatible output formatter
-// Outputs JSON array for github-action-benchmark
+// Outputs separate JSON files for throughput (bigger is better) and latency (smaller is better)
 //
 // Usage in scripts:
 //   import { createSummaryHandler } from './lib/summary.js';
@@ -15,22 +15,37 @@ export function createSummaryHandler(scriptName) {
       ? data.metrics.iteration_duration.values['p(95)']
       : 0;
 
-    const output = [
+    const p99Duration = data.metrics.iteration_duration
+      ? data.metrics.iteration_duration.values['p(99)']
+      : 0;
+
+    // Throughput metrics (customBiggerIsBetter)
+    const throughputOutput = [
       {
-        name: `${scriptName}_iterations_per_second`,
-        unit: 'Number',
+        name: `${scriptName}_rate`,
+        unit: 'iter/s',
         value: Math.round(iterationsPerSecond * 100) / 100,
       },
+    ];
+
+    // Latency metrics (customSmallerIsBetter)
+    const latencyOutput = [
       {
-        name: `${scriptName}_p95_ms`,
+        name: `${scriptName}_p95`,
         unit: 'ms',
         value: Math.round(p95Duration * 100) / 100,
       },
+      {
+        name: `${scriptName}_p99`,
+        unit: 'ms',
+        value: Math.round(p99Duration * 100) / 100,
+      },
     ];
 
     return {
       'stdout': textSummary(data),
-      [`results/k6/${scriptName}-output.json`]: JSON.stringify(output, null, 2),
+      [`results/k6/${scriptName}-throughput.json`]: JSON.stringify(throughputOutput, null, 2),
+      [`results/k6/${scriptName}-latency.json`]: JSON.stringify(latencyOutput, null, 2),
     };
   };
 }
@@ -48,8 +63,11 @@ function textSummary(data) {
 
   if (data.metrics.iteration_duration) {
     const dur = data.metrics.iteration_duration.values;
-    lines.push(`duration p95: ${dur['p(95)'].toFixed(2)}ms`);
+    lines.push(`duration min: ${dur.min.toFixed(2)}ms`);
     lines.push(`duration avg: ${dur.avg.toFixed(2)}ms`);
+    lines.push(`duration p95: ${dur['p(95)'].toFixed(2)}ms`);
+    lines.push(`duration p99: ${dur['p(99)'].toFixed(2)}ms`);
+    lines.push(`duration max: ${dur.max.toFixed(2)}ms`);
   }
 
   lines.push('');
diff --git a/tests/benchmark/mise.toml b/tests/benchmark/mise.toml
@@ -230,16 +230,17 @@ echo '# k6 CI Benchmark: jsonb-ste-vec-insert via proxy'
 echo '###############################################'
 echo
 
-mise run k6_run --script=jsonb-ste-vec-insert --target=proxy --vus=10 --duration=30s
+mise run k6_run --script=jsonb-ste-vec-insert --target=proxy --vus=10 --duration=60s
 
 echo
 echo '###############################################'
 echo '# k6 CI Benchmark: jsonb-large-payload via proxy'
 echo '###############################################'
 echo
 
-mise run k6_run --script=jsonb-large-payload --target=proxy --vus=10 --duration=30s
+mise run k6_run --script=jsonb-large-payload --target=proxy --vus=10 --duration=60s
 
-# Merge outputs for benchmark-action
-jq -s 'add' results/k6/jsonb-ste-vec-insert-output.json results/k6/jsonb-large-payload-output.json > results/k6-output.json
+# Merge outputs for benchmark-action (separate files for throughput vs latency)
+jq -s 'add' results/k6/*-throughput.json > results/k6-throughput.json
+jq -s 'add' results/k6/*-latency.json > results/k6-latency.json
 """