Skip to content

Benchmark

Benchmark #76

Workflow file for this run

name: Benchmark
permissions:
contents: read
on:
workflow_dispatch:
jobs:
benchmark:
name: ${{ matrix.workload.kind }} (${{ matrix.backend.id }}, ${{ matrix.workload.display }})
runs-on: ${{ matrix.workload.runner }}
timeout-minutes: ${{ matrix.workload.timeout }}
strategy:
fail-fast: false
matrix:
backend:
- id: memory
compose_file: compose.prometheus-memory-store.yml
workload:
- id: scenario-minimal-scale
display: Minimal production scale
kind: scenario
store_workers: 4
runner:
- self-hosted
- 1ES.Pool=agl-runner-cpu
timeout: 120
args: >-
--mode batch
--total-tasks 4096
--batch-size 256
--n-runners 32
--max-rounds 6
--sleep-seconds 0.5
- id: scenario-medium-scale
display: Medium production scale
kind: scenario
store_workers: 16
runner:
- self-hosted
- 1ES.Pool=agl-runner-cpu
timeout: 120
args: >-
--mode batch
--total-tasks 10000
--batch-size 1000
--n-runners 100
--max-rounds 10
--sleep-seconds 0.1
- id: scenario-midhigh-scale
display: Mid-high production scale
kind: scenario
store_workers: 24
runner:
- self-hosted
- 1ES.Pool=agl-runner-cpu
timeout: 120
args: >-
--mode batch
--total-tasks 20000
--batch-size 2048
--n-runners 300
--max-rounds 6
--sleep-seconds 0.1
- id: scenario-large-batch
display: Large batch waves
kind: scenario
store_workers: 32
runner:
- self-hosted
- 1ES.Pool=agl-runner-cpu-high
timeout: 120
args: >-
--mode batch
--total-tasks 50000
--batch-size 8192
--n-runners 1000
--max-rounds 3
--sleep-seconds 0.1
- id: scenario-long-queues
display: Long rollout queues
kind: scenario
store_workers: 32
runner:
- self-hosted
- 1ES.Pool=agl-runner-cpu
timeout: 120
args: >-
--mode batch_partial
--total-tasks 50000
--batch-size 1024
--n-runners 256
--remaining-tasks 4096
--max-rounds 4
--sleep-seconds 0.1
- id: scenario-high-concurrency
display: High-throughput concurrent requests
kind: scenario
store_workers: 32
runner:
- self-hosted
- 1ES.Pool=agl-runner-cpu-high
timeout: 120
args: >-
--mode single
--total-tasks 50000
--concurrency 2048
--n-runners 256
--max-rounds 2
--sleep-seconds 0.1
- id: scenario-heavy-traces
display: Heavy rollouts with deep traces
kind: scenario
store_workers: 64
runner:
- self-hosted
- 1ES.Pool=agl-runner-cpu-high
timeout: 120
args: >-
--mode batch_partial
--total-tasks 10000
--batch-size 1024
--remaining-tasks 256
--n-runners 512
--max-rounds 20
--sleep-seconds 1.0
env:
STORE_URL: http://localhost:4747
STORE_API_URL: http://localhost:4747
PROM_URL: http://localhost:9090
WORKLOAD_KIND: ${{ matrix.workload.kind }}
WORKLOAD_ID: ${{ matrix.workload.id }}
BACKEND_ID: ${{ matrix.backend.id }}
ARTIFACT_DIR: ${{ format('artifacts/{0}-{1}', matrix.workload.id, matrix.backend.id) }}
COMPOSE_FILE: ${{ matrix.backend.compose_file }}
AGL_STORE_N_WORKERS: ${{ matrix.workload.store_workers }}
ANALYSIS_FILE: ${{ format('analysis-{0}.log', matrix.workload.id) }}
SUMMARY_FILE: ${{ format('summary-{0}.log', matrix.workload.id) }}
PROM_ARCHIVE_BASENAME: ${{ format('prometheus-{0}-{1}', matrix.workload.id, matrix.backend.id) }}
ARTIFACT_NAME: ${{ format('{0}-{1}', matrix.workload.id, matrix.backend.id) }}
steps:
- uses: actions/checkout@v4
- uses: astral-sh/setup-uv@v7
with:
enable-cache: true
python-version: '3.12'
- name: Sync dependencies
run: uv sync --frozen --extra mongo --group core-stable --group dev
- name: Install Legacy Agent-lightning
run: uv pip install agentlightning==0.2.2
- name: Check disk space
run: df -h
- name: Reset benchmark data directories
run: |
set -euo pipefail
cd docker
rm -rf data
bash setup.sh
- name: Launch ${{ matrix.backend.id }} Prometheus stack
run: |
set -euo pipefail
cd docker
docker compose -f "$COMPOSE_FILE" down -v || true
docker compose -f "$COMPOSE_FILE" up -d --quiet-pull
- name: Wait for store readiness
run: |
set -euo pipefail
for attempt in {1..60}; do
if curl -fsS "$STORE_API_URL/health" >/dev/null 2>&1; then
exit 0
fi
sleep 1
done
echo "Store did not become ready in time" >&2
# show logs for debugging
cd docker && docker compose -f "$COMPOSE_FILE" logs app
exit 1
- name: Prepare artifact directory
run: mkdir -p "$ARTIFACT_DIR"
- name: Record workload start
run: echo "BENCHMARK_START=$(date -u +%FT%TZ)" >> "$GITHUB_ENV"
- name: (Scenario) Run ${{ matrix.workload.display }} workload
if: ${{ matrix.workload.kind == 'scenario' }}
run: |
set -euo pipefail
source .venv/bin/activate
cd tests
rm -rf types
python -m benchmark.benchmark_store \
--store-url "$STORE_URL" \
${{ matrix.workload.args }}
- name: Record workload end
if: ${{ always() }}
run: echo "BENCHMARK_END=$(date -u +%FT%TZ)" >> "$GITHUB_ENV"
- name: Collect docker logs
if: ${{ always() }}
run: |
set -euo pipefail
mkdir -p "$ARTIFACT_DIR"
cd docker
readarray -t services < <(docker compose -f "$COMPOSE_FILE" config --services)
if [ "${#services[@]}" -eq 0 ]; then
echo "No services defined in compose file."
exit 0
fi
for service in "${services[@]}"; do
docker compose -f "$COMPOSE_FILE" logs "$service" > "../$ARTIFACT_DIR/docker-${service}-${WORKLOAD_ID}-${BACKEND_ID}.log" || true
done
- name: Upload workload artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: ${{ env.ARTIFACT_NAME }}
path: ${{ env.ARTIFACT_DIR }}
if-no-files-found: error