shayancoin · shayancoin · Oct 17, 2025 · Oct 16, 2025 · Oct 16, 2025 · Oct 17, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -102,3 +102,76 @@ jobs:
 
     - name: Build
       run: npm run build
+
+  performance-budget:
+    runs-on: ubuntu-latest
+    needs:
+      - frontend-tests
+    env:
+      PERF_BUDGET_HEADLESS: 'true'
+    steps:
+      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+
+      - name: Set up Node.js
+        uses: actions/setup-node@0a44ba78451273a1ed8ac2fee4e347c72dfd377f
+        with:
+          node-version: '20'
+          cache: 'npm'
+          cache-dependency-path: ./frontend/package-lock.json
+
+      - name: Install dependencies
+        working-directory: ./frontend
+        run: npm ci
+
+      - name: Start application stack
+        run: |
+          docker compose -f docker-compose.dev.yml up -d --build
+
+      - name: Wait for API
+        run: |
+          for i in {1..60}; do curl -sf http://localhost:8000/healthcheck && break || sleep 2; done
+
+      - name: Wait for Frontend
+        run: |
+          for i in {1..60}; do curl -sf http://localhost:3000/models/manifest.json && break || sleep 2; done
+
+      - name: Run performance budget checks
+        working-directory: ./frontend
+        env:
+          PERF_BUDGET_OUTPUT_DIR: ../test-results/perf
+        run: npm run perf:budget
+
+      - name: Upload performance budget report
+        if: always()
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
+        with:
+          name: perf-budget
+          path: test-results/perf
+
+      - name: Shutdown stack
+        if: always()
+        run: |
+          docker compose -f docker-compose.dev.yml down
-          for i in {1..60}; do curl -sf http://localhost:8000/healthcheck && break || sleep 2; done
-
-      - name: Wait for Frontend
-        run: |
-          for i in {1..60}; do curl -sf http://localhost:3000/models/manifest.json && break || sleep 2; done
-
-      - name: Run performance budget checks
-        working-directory: ./frontend
-        env:
-          PERF_BUDGET_OUTPUT_DIR: ../test-results/perf
-        run: npm run perf:budget
-
-      - name: Upload performance budget report
-        if: always()
-        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
-        with:
-          name: perf-budget
-          path: test-results/perf
-
-      - name: Shutdown stack
-        if: always()
-        run: |
-          docker compose -f docker-compose.dev.yml down
+      - name: Wait for API
+        run: |
+          for i in {1..60}; do
+            if curl -sf http://localhost:8000/healthcheck; then
+              exit 0
+            fi
+            sleep 2
+          done
+          echo "API did not become ready in time" >&2
+          exit 1
+
+      - name: Wait for Frontend
+        run: |
+          for i in {1..60}; do
+            if curl -sf http://localhost:3000/models/manifest.json; then
+              exit 0
+            fi
+            sleep 2
+          done
+          echo "Frontend did not become ready in time" >&2
+          exit 1
-          for i in {1..60}; do curl -sf http://localhost:8000/healthcheck && break || sleep 2; done
-
-      - name: Wait for Frontend
-        run: |
-          for i in {1..60}; do curl -sf http://localhost:3000/models/manifest.json && break || sleep 2; done
-
-      - name: Run performance budget checks
-        working-directory: ./frontend
-        env:
-          PERF_BUDGET_OUTPUT_DIR: ../test-results/perf
-        run: npm run perf:budget
-
-      - name: Upload performance budget report
-        if: always()
-        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
-        with:
-          name: perf-budget
-          path: test-results/perf
-
-      - name: Shutdown stack
-        if: always()
-        run: |
-          docker compose -f docker-compose.dev.yml down
+      - name: Wait for API
+        run: |
+          for i in {1..60}; do
+            if curl -sf http://localhost:8000/healthcheck; then
+              exit 0
+            fi
+            sleep 2
+          done
+          echo "API did not become ready in time" >&2
+          exit 1
+
+      - name: Wait for Frontend
+        run: |
+          for i in {1..60}; do
+            if curl -sf http://localhost:3000/models/manifest.json; then
+              exit 0
+            fi
+            sleep 2
+          done
+          echo "Frontend did not become ready in time" >&2
+          exit 1
+
+  observability-budgets:
+    runs-on: ubuntu-latest
+    needs:
+      - performance-budget
+    steps:
+      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
+
+      - name: Set up Python
+        uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c
+        with:
+          python-version: '3.12'
+
+      - name: Install dependencies
+        run: pip install pyyaml
+
+      - name: Check observability budgets
+        env:
+          PROMETHEUS_URL: ${{ secrets.PROMETHEUS_URL }}
+          PROMETHEUS_BEARER_TOKEN: ${{ secrets.PROMETHEUS_BEARER_TOKEN }}
+          TEMPO_URL: ${{ secrets.TEMPO_URL }}
+          TEMPO_BEARER_TOKEN: ${{ secrets.TEMPO_BEARER_TOKEN }}
+        run: python tools/ci/check_observability_budgets.py --config observability-budgets.yml
diff --git a/docs/release-checklist.md b/docs/release-checklist.md
@@ -0,0 +1,39 @@
+# Release Checklist
+
+This checklist ties together continuous integration signal, Grafana alerting, and the on-call rotation so that preview deployments are gated on healthy performance and reliability metrics.
+
+## 1. Verify CI Observability Gates
+
+- Check the **performance-budget** job in GitHub Actions CI. This job runs the Playwright-based budget defined in `perf-budget.yml` and publishes a JUnit report that Grafana can ingest. If it fails, fix the regression before proceeding.
+- Confirm that the **observability-budgets** job has passed. It queries Prometheus and Tempo spanmetrics using `observability-budgets.yml` and fails when P95 latency or error-rate thresholds are exceeded compared to the previous day.
+- Export any new failure signatures into the on-call runbook.
+
+## 2. Review Grafana Dashboards
+
+- Open the "Configurator Experience" dashboard and confirm the panels for:
+  - `ci_perf_budget_value` vs `ci_perf_budget_threshold` (pushed from the Playwright budget run).
+  - Prometheus latency and error-rate panels that use the same queries as the CI job.
+- Ensure alert rules are configured to page the on-call engineer whenever the CI metrics breach thresholds for two consecutive runs or when runtime metrics cross the defined budgets.
+
+## 3. Coordinate On-call Notifications
+
+- Tag the current on-call engineer in the release Slack channel with a summary of CI and Grafana status.
+- Verify PagerDuty (or the configured paging tool) has matching alerts for the Grafana rules referenced above.
+- Record the acknowledgement in the release ticket.
+
+## 4. Gate Preview Environments
+
+- Do not promote a preview environment until:
+  - All CI jobs, including `performance-budget` and `observability-budgets`, pass.
+  - Grafana dashboards show no active alerts for the release window.
+  - The on-call engineer confirms readiness.
+- If any alert is firing, pause the release and create an incident in the on-call tracking tool.
+
+## 5. Final Release Sign-off
+
+- Update the release ticket with links to:
+  - The successful CI run.
+  - Grafana dashboard screenshots showing green status.
+  - PagerDuty acknowledgement (or equivalent) from the on-call engineer.
+- Archive the Grafana dashboard snapshot for auditability.
+- Communicate the release completion to stakeholders.
diff --git a/frontend/package-lock.json b/frontend/package-lock.json
diff --git a/frontend/package.json b/frontend/package.json
@@ -13,7 +13,8 @@
     "assets:validate": "python ../scripts/glb_validate.py public/models/*.glb --fail-on-warning",
     "assets:manifest": "python ../scripts/gen_glb_manifest.py > public/models/manifest.json",
     "assets:all": "npm run assets:gen && npm run assets:pack && npm run assets:validate && npm run assets:manifest",
-    "test:manifest": "vitest run --reporter=dot"
+    "test:manifest": "vitest run --reporter=dot",
+    "perf:budget": "node ./tools/perf/run-perf-budget.js"
   },
   "dependencies": {
     "@chakra-ui/icons": "^2.1.1",
@@ -52,7 +53,10 @@
     "ts-jest": "^29.2.5",
     "ts-node": "^10.9.2",
     "typescript": "^5",
-    "vitest": "^1.6.0"
+    "vitest": "^1.6.0",
+    "ts-node": "^10.9.2",
+    "js-yaml": "^4.1.0",
+    "xmlbuilder2": "^4.0.0"
   },
   "jest": {
     "setupFilesAfterEnv": [