Benchmark #76
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Benchmark | |
| permissions: | |
| contents: read | |
| on: | |
| workflow_dispatch: | |
| jobs: | |
| benchmark: | |
| name: ${{ matrix.workload.kind }} (${{ matrix.backend.id }}, ${{ matrix.workload.display }}) | |
| runs-on: ${{ matrix.workload.runner }} | |
| timeout-minutes: ${{ matrix.workload.timeout }} | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| backend: | |
| - id: memory | |
| compose_file: compose.prometheus-memory-store.yml | |
| workload: | |
| - id: scenario-minimal-scale | |
| display: Minimal production scale | |
| kind: scenario | |
| store_workers: 4 | |
| runner: | |
| - self-hosted | |
| - 1ES.Pool=agl-runner-cpu | |
| timeout: 120 | |
| args: >- | |
| --mode batch | |
| --total-tasks 4096 | |
| --batch-size 256 | |
| --n-runners 32 | |
| --max-rounds 6 | |
| --sleep-seconds 0.5 | |
| - id: scenario-medium-scale | |
| display: Medium production scale | |
| kind: scenario | |
| store_workers: 16 | |
| runner: | |
| - self-hosted | |
| - 1ES.Pool=agl-runner-cpu | |
| timeout: 120 | |
| args: >- | |
| --mode batch | |
| --total-tasks 10000 | |
| --batch-size 1000 | |
| --n-runners 100 | |
| --max-rounds 10 | |
| --sleep-seconds 0.1 | |
| - id: scenario-midhigh-scale | |
| display: Mid-high production scale | |
| kind: scenario | |
| store_workers: 24 | |
| runner: | |
| - self-hosted | |
| - 1ES.Pool=agl-runner-cpu | |
| timeout: 120 | |
| args: >- | |
| --mode batch | |
| --total-tasks 20000 | |
| --batch-size 2048 | |
| --n-runners 300 | |
| --max-rounds 6 | |
| --sleep-seconds 0.1 | |
| - id: scenario-large-batch | |
| display: Large batch waves | |
| kind: scenario | |
| store_workers: 32 | |
| runner: | |
| - self-hosted | |
| - 1ES.Pool=agl-runner-cpu-high | |
| timeout: 120 | |
| args: >- | |
| --mode batch | |
| --total-tasks 50000 | |
| --batch-size 8192 | |
| --n-runners 1000 | |
| --max-rounds 3 | |
| --sleep-seconds 0.1 | |
| - id: scenario-long-queues | |
| display: Long rollout queues | |
| kind: scenario | |
| store_workers: 32 | |
| runner: | |
| - self-hosted | |
| - 1ES.Pool=agl-runner-cpu | |
| timeout: 120 | |
| args: >- | |
| --mode batch_partial | |
| --total-tasks 50000 | |
| --batch-size 1024 | |
| --n-runners 256 | |
| --remaining-tasks 4096 | |
| --max-rounds 4 | |
| --sleep-seconds 0.1 | |
| - id: scenario-high-concurrency | |
| display: High-throughput concurrent requests | |
| kind: scenario | |
| store_workers: 32 | |
| runner: | |
| - self-hosted | |
| - 1ES.Pool=agl-runner-cpu-high | |
| timeout: 120 | |
| args: >- | |
| --mode single | |
| --total-tasks 50000 | |
| --concurrency 2048 | |
| --n-runners 256 | |
| --max-rounds 2 | |
| --sleep-seconds 0.1 | |
| - id: scenario-heavy-traces | |
| display: Heavy rollouts with deep traces | |
| kind: scenario | |
| store_workers: 64 | |
| runner: | |
| - self-hosted | |
| - 1ES.Pool=agl-runner-cpu-high | |
| timeout: 120 | |
| args: >- | |
| --mode batch_partial | |
| --total-tasks 10000 | |
| --batch-size 1024 | |
| --remaining-tasks 256 | |
| --n-runners 512 | |
| --max-rounds 20 | |
| --sleep-seconds 1.0 | |
| env: | |
| STORE_URL: http://localhost:4747 | |
| STORE_API_URL: http://localhost:4747 | |
| PROM_URL: http://localhost:9090 | |
| WORKLOAD_KIND: ${{ matrix.workload.kind }} | |
| WORKLOAD_ID: ${{ matrix.workload.id }} | |
| BACKEND_ID: ${{ matrix.backend.id }} | |
| ARTIFACT_DIR: ${{ format('artifacts/{0}-{1}', matrix.workload.id, matrix.backend.id) }} | |
| COMPOSE_FILE: ${{ matrix.backend.compose_file }} | |
| AGL_STORE_N_WORKERS: ${{ matrix.workload.store_workers }} | |
| ANALYSIS_FILE: ${{ format('analysis-{0}.log', matrix.workload.id) }} | |
| SUMMARY_FILE: ${{ format('summary-{0}.log', matrix.workload.id) }} | |
| PROM_ARCHIVE_BASENAME: ${{ format('prometheus-{0}-{1}', matrix.workload.id, matrix.backend.id) }} | |
| ARTIFACT_NAME: ${{ format('{0}-{1}', matrix.workload.id, matrix.backend.id) }} | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: astral-sh/setup-uv@v7 | |
| with: | |
| enable-cache: true | |
| python-version: '3.12' | |
| - name: Sync dependencies | |
| run: uv sync --frozen --extra mongo --group core-stable --group dev | |
| - name: Install Legacy Agent-lightning | |
| run: uv pip install agentlightning==0.2.2 | |
| - name: Check disk space | |
| run: df -h | |
| - name: Reset benchmark data directories | |
| run: | | |
| set -euo pipefail | |
| cd docker | |
| rm -rf data | |
| bash setup.sh | |
| - name: Launch ${{ matrix.backend.id }} Prometheus stack | |
| run: | | |
| set -euo pipefail | |
| cd docker | |
| docker compose -f "$COMPOSE_FILE" down -v || true | |
| docker compose -f "$COMPOSE_FILE" up -d --quiet-pull | |
| - name: Wait for store readiness | |
| run: | | |
| set -euo pipefail | |
| for attempt in {1..60}; do | |
| if curl -fsS "$STORE_API_URL/health" >/dev/null 2>&1; then | |
| exit 0 | |
| fi | |
| sleep 1 | |
| done | |
| echo "Store did not become ready in time" >&2 | |
| # show logs for debugging | |
| cd docker && docker compose -f "$COMPOSE_FILE" logs app | |
| exit 1 | |
| - name: Prepare artifact directory | |
| run: mkdir -p "$ARTIFACT_DIR" | |
| - name: Record workload start | |
| run: echo "BENCHMARK_START=$(date -u +%FT%TZ)" >> "$GITHUB_ENV" | |
| - name: (Scenario) Run ${{ matrix.workload.display }} workload | |
| if: ${{ matrix.workload.kind == 'scenario' }} | |
| run: | | |
| set -euo pipefail | |
| source .venv/bin/activate | |
| cd tests | |
| rm -rf types | |
| python -m benchmark.benchmark_store \ | |
| --store-url "$STORE_URL" \ | |
| ${{ matrix.workload.args }} | |
| - name: Record workload end | |
| if: ${{ always() }} | |
| run: echo "BENCHMARK_END=$(date -u +%FT%TZ)" >> "$GITHUB_ENV" | |
| - name: Collect docker logs | |
| if: ${{ always() }} | |
| run: | | |
| set -euo pipefail | |
| mkdir -p "$ARTIFACT_DIR" | |
| cd docker | |
| readarray -t services < <(docker compose -f "$COMPOSE_FILE" config --services) | |
| if [ "${#services[@]}" -eq 0 ]; then | |
| echo "No services defined in compose file." | |
| exit 0 | |
| fi | |
| for service in "${services[@]}"; do | |
| docker compose -f "$COMPOSE_FILE" logs "$service" > "../$ARTIFACT_DIR/docker-${service}-${WORKLOAD_ID}-${BACKEND_ID}.log" || true | |
| done | |
| - name: Upload workload artifacts | |
| if: ${{ always() }} | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ env.ARTIFACT_NAME }} | |
| path: ${{ env.ARTIFACT_DIR }} | |
| if-no-files-found: error |