Performance release gate (#9068)

bric3 · ddyurchenko · web-flow · commit 4e4c286275f5 · 2025-07-22T10:33:21.000Z
* chore(ci): Basic slo breach prototype * chore(ci): PR review * chore: Collect reports from benchmarks * tweak: Include benchmarkType in file name * tweak: Store artifacts that were used for checking regression, so we can debug * chore(ci): Tweak thresholds * chore(ci): Tweak to recommended thresholds See https://github.com/DataDog/dd-trace-java/pull/9068/files#r2210474360 * chore(ci): Another tweak to recommended thresholds See https://github.com/DataDog/dd-trace-java/pull/9068/files#r2210474360 * chore(ci): Revert hack to run the release gate (it needed the all macrobenchmarks) --------- Co-authored-by: Dmytro Yurchenko <dmytro.yurchenko@datadoghq.com> Co-authored-by: Dmytro Yurchenko <88330911+ddyurchenko@users.noreply.github.com>
diff --git a/.gitlab/benchmarks/bp-runner.fail-on-breach.yml b/.gitlab/benchmarks/bp-runner.fail-on-breach.yml
@@ -0,0 +1,48 @@
+# Thresholds set based on guidance in https://datadoghq.atlassian.net/wiki/x/LgI1LgE#How-to-choose-thresholds-for-pre-release-gates%3F
+
+experiments:
+  - name: Run SLO breach check
+    steps:
+      - name: SLO breach check
+        run: fail_on_breach
+        # https://datadoghq.atlassian.net/wiki/x/LgI1LgE#How-to-choose-a-warning-range-for-pre-release-gates%3F
+        warning_range: 10
+        # File spec
+        #   https://datadoghq.atlassian.net/wiki/x/LgI1LgE#Specification
+        # Measurements
+        #   https://benchmarking.us1.prod.dog/trends?projectId=4&branch=master&trendsTab=per_scenario
+        scenarios:
+          # Note that thresholds there are choosen based the confidence interval with a 10% adjustment.
+
+          # Standard macrobenchmarks
+          # https://benchmarking.us1.prod.dog/trends?projectId=4&branch=master&trendsTab=per_scenario&scenario=normal_operation%2Fonly-tracing&trendsType=scenario
+          - name: normal_operation/only-tracing
+            thresholds:
+              - agg_http_req_duration_p50 < 2.6 ms
+              - agg_http_req_duration_p99 < 8.5 ms
+          # https://benchmarking.us1.prod.dog/trends?projectId=4&branch=master&trendsTab=per_scenario&scenario=normal_operation%2Fotel-latest&trendsType=scenario
+          - name: normal_operation/otel-latest
+            thresholds:
+              - agg_http_req_duration_p50 < 2.5 ms
+              - agg_http_req_duration_p99 < 10 ms
+
+          # https://benchmarking.us1.prod.dog/trends?projectId=4&branch=master&trendsTab=per_scenario&scenario=high_load%2Fonly-tracing&trendsType=scenario
+          - name: high_load/only-tracing
+            thresholds:
+              - throughput > 1100.0 op/s
+          # https://benchmarking.us1.prod.dog/trends?projectId=4&branch=master&trendsTab=per_scenario&scenario=high_load%2Fotel-latest&trendsType=scenario
+          - name: high_load/otel-latest
+            thresholds:
+              - throughput > 1100.0 op/s
+
+          # Startup macrobenchmarks
+          # https://benchmarking.us1.prod.dog/trends?projectId=4&branch=master&trendsTab=per_scenario&scenario=startup%3Apetclinic%3Atracing%3AGlobalTracer&trendsType=scenario
+          # https://benchmarking.us1.prod.dog/trends?projectId=4&branch=master&trendsTab=per_scenario&scenario=startup%3Apetclinic%3Aappsec%3AGlobalTracer&trendsType=scenario
+          # https://benchmarking.us1.prod.dog/trends?projectId=4&branch=master&trendsTab=per_scenario&scenario=startup%3Apetclinic%3Aiast%3AGlobalTracer&trendsType=scenario
+          - name: "startup:petclinic:(tracing|appsec|iast):GlobalTracer"
+            thresholds:
+              - execution_time < 280 ms
+          # https://benchmarking.us1.prod.dog/trends?projectId=4&branch=master&trendsTab=per_scenario&scenario=startup%3Apetclinic%3Aprofiling%3AGlobalTracer&trendsType=scenario
+          - name: "startup:petclinic:profiling:GlobalTracer"
+            thresholds:
+              - execution_time < 420 ms
diff --git a/.gitlab/macrobenchmarks.yml b/.gitlab/macrobenchmarks.yml
@@ -1,3 +1,8 @@
+include:
+  project: 'DataDog/benchmarking-platform-tools'
+  file: 'images/templates/gitlab/notify-slo-breaches.template.yml'
+  ref: '925e0a3e7dd628885f6fc69cdaea5c8cc9e212bc'
+
 .macrobenchmarks:
   stage: macrobenchmarks
   rules:
@@ -68,3 +73,66 @@ otel-latest:
     BP_BENCHMARKS_CONFIGURATION: otel-latest
     TRACER_OPTS: -javaagent:/app/otel-java-agent.jar -Ddd.env=otel-latest -Ddd.service=bp-java-petclinic
     JAVA_OPTS: -javaagent:/app/memcheck/stability-testing-memwatch.jar -Xmx128M
+
+
+check-slo-breaches:
+  stage: macrobenchmarks
+  interruptible: true
+  tags: ["arch:amd64"]
+  image: registry.ddbuild.io/images/benchmarking-platform-tools-ubuntu:latest
+  when: on_success
+  needs:
+    - job: baseline
+      artifacts: true
+    - job: only-tracing
+      artifacts: true
+    - job: otel-latest
+      artifacts: true
+    - job: benchmarks-startup
+      artifacts: true
+    - job: benchmarks-load
+      artifacts: true
+    - job: benchmarks-dacapo
+      artifacts: true
+  script:
+    # macrobenchmarks are located here, files are already in "converted" format
+    - export ARTIFACTS_DIR="$(pwd)/platform/artifacts/" && mkdir -p "${ARTIFACTS_DIR}"
+
+    # Need to move the artifacts the benchmarks-* job
+    - |
+      export BENCHMARKS_ARTIFACTS_DIR="$(pwd)/reports" && mkdir -p "${BENCHMARKS_ARTIFACTS_DIR}"
+      for benchmarkType in startup load dacapo; do
+          find "$BENCHMARKS_ARTIFACTS_DIR/$benchmarkType" -name "benchmark-baseline.json" -o -name "benchmark-candidate.json" | while read file; do
+            relpath="${file#$BENCHMARKS_ARTIFACTS_DIR/$benchmarkType/}"
+            prefix="${relpath%/benchmark-*}" # Remove the trailing /benchmark-(baseline|candidate).json
+            prefix="${prefix#./}" # Remove any leading ./
+            prefix="${prefix//\//-}" # Replace / with -
+            case "$file" in
+              *benchmark-baseline.json) type="baseline" ;;
+              *benchmark-candidate.json) type="candidate" ;;
+            esac
+            echo "Moving $file to $ARTIFACTS_DIR/${type}-${benchmarkType}-${prefix}.converted.json"
+            cp "$file" "$ARTIFACTS_DIR/${type}-${benchmarkType}-${prefix}.converted.json"
+          done
+      done
+    - ls -lah "$ARTIFACTS_DIR"
+    - bp-runner .gitlab/benchmarks/bp-runner.fail-on-breach.yml
+  artifacts:
+    name: "artifacts"
+    when: always
+    paths:
+      - platform/artifacts/
+    expire_in: 1 week
+  variables:
+    UPSTREAM_PROJECT_ID: $CI_PROJECT_ID # The ID of the current project. This ID is unique across all projects on the GitLab instance.
+    UPSTREAM_PROJECT_NAME: $CI_PROJECT_NAME # "dd-trace-java"
+    UPSTREAM_BRANCH: $CI_COMMIT_REF_NAME # The branch or tag name for which project is built.
+    UPSTREAM_COMMIT_SHA: $CI_COMMIT_SHA # The commit revision the project is built for.
+
+notify-slo-breaches:
+  extends: .notify-slo-breaches
+  stage: macrobenchmarks
+  needs: ["check-slo-breaches"]
+  when: always
+  variables:
+    CHANNEL: "apm-release-platform"