Benchmark #76

Workflow file for this run

.github/workflows/benchmark.yml at 1a7a2e4

	name: Benchmark
	permissions:
	contents: read
	on:
	workflow_dispatch:

	jobs:
	benchmark:
	name: ${{ matrix.workload.kind }} (${{ matrix.backend.id }}, ${{ matrix.workload.display }})
	runs-on: ${{ matrix.workload.runner }}
	timeout-minutes: ${{ matrix.workload.timeout }}
	strategy:
	fail-fast: false
	matrix:
	backend:
	- id: memory
	compose_file: compose.prometheus-memory-store.yml
	workload:
	- id: scenario-minimal-scale
	display: Minimal production scale
	kind: scenario
	store_workers: 4
	runner:
	- self-hosted
	- 1ES.Pool=agl-runner-cpu
	timeout: 120
	args: >-
	--mode batch
	--total-tasks 4096
	--batch-size 256
	--n-runners 32
	--max-rounds 6
	--sleep-seconds 0.5
	- id: scenario-medium-scale
	display: Medium production scale
	kind: scenario
	store_workers: 16
	runner:
	- self-hosted
	- 1ES.Pool=agl-runner-cpu
	timeout: 120
	args: >-
	--mode batch
	--total-tasks 10000
	--batch-size 1000
	--n-runners 100
	--max-rounds 10
	--sleep-seconds 0.1
	- id: scenario-midhigh-scale
	display: Mid-high production scale
	kind: scenario
	store_workers: 24
	runner:
	- self-hosted
	- 1ES.Pool=agl-runner-cpu
	timeout: 120
	args: >-
	--mode batch
	--total-tasks 20000
	--batch-size 2048
	--n-runners 300
	--max-rounds 6
	--sleep-seconds 0.1
	- id: scenario-large-batch
	display: Large batch waves
	kind: scenario
	store_workers: 32
	runner:
	- self-hosted
	- 1ES.Pool=agl-runner-cpu-high
	timeout: 120
	args: >-
	--mode batch
	--total-tasks 50000
	--batch-size 8192
	--n-runners 1000
	--max-rounds 3
	--sleep-seconds 0.1
	- id: scenario-long-queues
	display: Long rollout queues
	kind: scenario
	store_workers: 32
	runner:
	- self-hosted
	- 1ES.Pool=agl-runner-cpu
	timeout: 120
	args: >-
	--mode batch_partial
	--total-tasks 50000
	--batch-size 1024
	--n-runners 256
	--remaining-tasks 4096
	--max-rounds 4
	--sleep-seconds 0.1
	- id: scenario-high-concurrency
	display: High-throughput concurrent requests
	kind: scenario
	store_workers: 32
	runner:
	- self-hosted
	- 1ES.Pool=agl-runner-cpu-high
	timeout: 120
	args: >-
	--mode single
	--total-tasks 50000
	--concurrency 2048
	--n-runners 256
	--max-rounds 2
	--sleep-seconds 0.1
	- id: scenario-heavy-traces
	display: Heavy rollouts with deep traces
	kind: scenario
	store_workers: 64
	runner:
	- self-hosted
	- 1ES.Pool=agl-runner-cpu-high
	timeout: 120
	args: >-
	--mode batch_partial
	--total-tasks 10000
	--batch-size 1024
	--remaining-tasks 256
	--n-runners 512
	--max-rounds 20
	--sleep-seconds 1.0
	env:
	STORE_URL: http://localhost:4747
	STORE_API_URL: http://localhost:4747
	PROM_URL: http://localhost:9090
	WORKLOAD_KIND: ${{ matrix.workload.kind }}
	WORKLOAD_ID: ${{ matrix.workload.id }}
	BACKEND_ID: ${{ matrix.backend.id }}
	ARTIFACT_DIR: ${{ format('artifacts/{0}-{1}', matrix.workload.id, matrix.backend.id) }}
	COMPOSE_FILE: ${{ matrix.backend.compose_file }}
	AGL_STORE_N_WORKERS: ${{ matrix.workload.store_workers }}
	ANALYSIS_FILE: ${{ format('analysis-{0}.log', matrix.workload.id) }}
	SUMMARY_FILE: ${{ format('summary-{0}.log', matrix.workload.id) }}
	PROM_ARCHIVE_BASENAME: ${{ format('prometheus-{0}-{1}', matrix.workload.id, matrix.backend.id) }}
	ARTIFACT_NAME: ${{ format('{0}-{1}', matrix.workload.id, matrix.backend.id) }}
	steps:
	- uses: actions/checkout@v4

	- uses: astral-sh/setup-uv@v7
	with:
	enable-cache: true
	python-version: '3.12'

	- name: Sync dependencies
	run: uv sync --frozen --extra mongo --group core-stable --group dev

	- name: Install Legacy Agent-lightning
	run: uv pip install agentlightning==0.2.2

	- name: Check disk space
	run: df -h

	- name: Reset benchmark data directories
	run: \|
	set -euo pipefail
	cd docker
	rm -rf data
	bash setup.sh

	- name: Launch ${{ matrix.backend.id }} Prometheus stack
	run: \|
	set -euo pipefail
	cd docker
	docker compose -f "$COMPOSE_FILE" down -v \|\| true
	docker compose -f "$COMPOSE_FILE" up -d --quiet-pull

	- name: Wait for store readiness
	run: \|
	set -euo pipefail
	for attempt in {1..60}; do
	if curl -fsS "$STORE_API_URL/health" >/dev/null 2>&1; then
	exit 0
	fi
	sleep 1
	done
	echo "Store did not become ready in time" >&2
	# show logs for debugging
	cd docker && docker compose -f "$COMPOSE_FILE" logs app
	exit 1

	- name: Prepare artifact directory
	run: mkdir -p "$ARTIFACT_DIR"

	- name: Record workload start
	run: echo "BENCHMARK_START=$(date -u +%FT%TZ)" >> "$GITHUB_ENV"

	- name: (Scenario) Run ${{ matrix.workload.display }} workload
	if: ${{ matrix.workload.kind == 'scenario' }}
	run: \|
	set -euo pipefail
	source .venv/bin/activate
	cd tests
	rm -rf types
	python -m benchmark.benchmark_store \
	--store-url "$STORE_URL" \
	${{ matrix.workload.args }}

	- name: Record workload end
	if: ${{ always() }}
	run: echo "BENCHMARK_END=$(date -u +%FT%TZ)" >> "$GITHUB_ENV"

	- name: Collect docker logs
	if: ${{ always() }}
	run: \|
	set -euo pipefail
	mkdir -p "$ARTIFACT_DIR"
	cd docker
	readarray -t services < <(docker compose -f "$COMPOSE_FILE" config --services)
	if [ "${#services[@]}" -eq 0 ]; then
	echo "No services defined in compose file."
	exit 0
	fi
	for service in "${services[@]}"; do
	docker compose -f "$COMPOSE_FILE" logs "$service" > "../$ARTIFACT_DIR/docker-${service}-${WORKLOAD_ID}-${BACKEND_ID}.log" \|\| true
	done

	- name: Upload workload artifacts
	if: ${{ always() }}
	uses: actions/upload-artifact@v4
	with:
	name: ${{ env.ARTIFACT_NAME }}
	path: ${{ env.ARTIFACT_DIR }}
	if-no-files-found: error

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Benchmark #76

Workflow file

Benchmark #76

Uh oh!

Workflow file for this run