Benchmark #94

Workflow file for this run

.github/workflows/benchmark.yml at cb7edf4

	name: Benchmark
	permissions:
	contents: read
	on:
	workflow_dispatch:
	schedule:
	# Every Monday and Thursday at 3 AM UTC+8
	- cron: '0 19 * * 0,3'

	jobs:
	benchmark:
	name: ${{ matrix.workload.kind }} (${{ matrix.backend.id }}, ${{ matrix.workload.display }}, ${{ matrix.trace_sink }})
	runs-on: ${{ matrix.workload.runner }}
	timeout-minutes: ${{ matrix.workload.timeout }}
	strategy:
	fail-fast: false
	matrix:
	backend:
	- id: memory
	compose_file: compose.prometheus-memory-store.yml
	- id: mongo
	compose_file: compose.prometheus-mongo-store.yml
	# trace_sink: [store, kafka]
	trace_sink: [kafka]
	workload:
	- id: scenario-minimal-scale
	display: Minimal production scale
	kind: scenario
	store_workers: 4
	runner:
	- self-hosted
	- 1ES.Pool=agl-runner-cpu
	timeout: 45
	args: >-
	--mode batch
	--total-tasks 4096
	--batch-size 256
	--n-runners 32
	--max-rounds 6
	--sleep-seconds 0.5
	- id: scenario-medium-scale
	display: Medium production scale
	kind: scenario
	store_workers: 16
	runner:
	- self-hosted
	- 1ES.Pool=agl-runner-cpu
	timeout: 45
	args: >-
	--mode batch
	--total-tasks 10000
	--batch-size 1000
	--n-runners 100
	--max-rounds 10
	--sleep-seconds 0.1
	- id: scenario-midhigh-scale
	display: Mid-high production scale
	kind: scenario
	store_workers: 24
	runner:
	- self-hosted
	- 1ES.Pool=agl-runner-cpu
	timeout: 60
	args: >-
	--mode batch
	--total-tasks 20000
	--batch-size 2048
	--n-runners 300
	--max-rounds 6
	--sleep-seconds 0.1
	# - id: scenario-large-batch
	# display: Large batch waves
	# kind: scenario
	# store_workers: 96
	# runner:
	# - self-hosted
	# - 1ES.Pool=agl-runner-cpu-high
	# timeout: 120
	# args: >-
	# --mode batch
	# --total-tasks 50000
	# --batch-size 8192
	# --n-runners 1000
	# --max-rounds 3
	# --sleep-seconds 0.1
	- id: scenario-long-queues
	display: Long rollout queues
	kind: scenario
	store_workers: 48
	runner:
	- self-hosted
	- 1ES.Pool=agl-runner-cpu
	timeout: 120
	args: >-
	--mode batch_partial
	--total-tasks 50000
	--batch-size 1024
	--n-runners 256
	--remaining-tasks 4096
	--max-rounds 4
	--sleep-seconds 0.1
	# - id: scenario-high-concurrency
	# display: High-throughput concurrent requests
	# kind: scenario
	# store_workers: 96
	# runner:
	# - self-hosted
	# - 1ES.Pool=agl-runner-cpu-high
	# timeout: 120
	# args: >-
	# --mode single
	# --total-tasks 50000
	# --concurrency 2048
	# --n-runners 256
	# --max-rounds 2
	# --sleep-seconds 0.1
	# - id: scenario-heavy-traces
	# display: Heavy rollouts with deep traces
	# kind: scenario
	# store_workers: 96
	# runner:
	# - self-hosted
	# - 1ES.Pool=agl-runner-cpu-high
	# timeout: 60
	# args: >-
	# --mode batch_partial
	# --total-tasks 10000
	# --batch-size 1024
	# --remaining-tasks 256
	# --n-runners 512
	# --max-rounds 20
	# --sleep-seconds 1.0

	# - id: micro-worker
	# display: Update worker
	# kind: micro
	# store_workers: 8
	# runner: ubuntu-latest
	# timeout: 30
	# cli: worker
	# - id: micro-dequeue-empty
	# display: Dequeue empty
	# kind: micro
	# store_workers: 8
	# runner: ubuntu-latest
	# timeout: 30
	# cli: dequeue-empty
	# - id: micro-rollout
	# display: Rollout + span
	# kind: micro
	# store_workers: 8
	# runner: ubuntu-latest
	# timeout: 30
	# cli: rollout
	# - id: micro-dequeue-update-attempt
	# display: Dequeue + update attempt
	# kind: micro
	# store_workers: 8
	# runner: ubuntu-latest
	# timeout: 30
	# cli: dequeue-update-attempt
	# - id: micro-dequeue-only
	# display: Dequeue only
	# kind: micro
	# store_workers: 8
	# runner: ubuntu-latest
	# timeout: 30
	# cli: dequeue-only
	# - id: micro-metrics
	# display: Multi-metric fan-out
	# kind: micro
	# store_workers: 8
	# runner: ubuntu-latest
	# timeout: 15
	# cli: metrics
	env:
	PYTHONUNBUFFERED: "1"
	STORE_URL: http://localhost:4747
	STORE_API_URL: http://localhost:4747/v1/agl
	PROM_URL: http://localhost:9090
	GITHUB_ACTIONS_TIMEOUT_MINUTES: ${{ matrix.workload.timeout }}
	WORKLOAD_KIND: ${{ matrix.workload.kind }}
	WORKLOAD_ID: ${{ matrix.workload.id }}
	BACKEND_ID: ${{ matrix.backend.id }}
	TRACE_SINK_ID: ${{ matrix.trace_sink }}
	ARTIFACT_DIR: ${{ format('artifacts/{0}-{1}-{2}', matrix.workload.id, matrix.backend.id, matrix.trace_sink) }}
	COMPOSE_FILE: ${{ matrix.backend.compose_file }}
	AGL_STORE_N_WORKERS: ${{ matrix.workload.store_workers }}
	ANALYSIS_FILE: ${{ format('analysis-{0}-{1}.log', matrix.workload.id, matrix.trace_sink) }}
	SUMMARY_FILE: ${{ format('summary-{0}-{1}.log', matrix.workload.id, matrix.trace_sink) }}
	PROM_ARCHIVE_BASENAME: ${{ format('prometheus-{0}-{1}', matrix.workload.id, matrix.backend.id) }}
	ARTIFACT_NAME: ${{ format('{0}-{1}-{2}', matrix.workload.id, matrix.backend.id, matrix.trace_sink) }}
	steps:
	- uses: actions/checkout@v4

	- uses: astral-sh/setup-uv@v7
	with:
	enable-cache: true
	python-version: '3.12'

	- name: Sync dependencies
	run: uv sync --frozen --extra mongo --group core-stable --group dev

	- name: Check disk space
	run: df -h

	- name: Reset benchmark data directories
	run: \|
	set -euo pipefail
	cd docker
	rm -rf data
	bash setup.sh

	- name: Launch ${{ matrix.backend.id }} Prometheus stack
	run: \|
	set -euo pipefail
	cd docker
	docker compose -f "$COMPOSE_FILE" down -v \|\| true
	docker compose -f "$COMPOSE_FILE" up -d --quiet-pull

	- name: Wait for store readiness
	run: \|
	set -euo pipefail
	for attempt in {1..60}; do
	if curl -fsS "$STORE_API_URL/health" >/dev/null 2>&1; then
	sleep 1
	curl -fsS "$STORE_API_URL/rollouts" # Warm up the scraper
	sleep 15 # Allow some time for the baseline metrics to be established
	exit 0
	fi
	sleep 1
	done
	echo "Store did not become ready in time" >&2
	# show logs for debugging
	cd docker && docker compose -f "$COMPOSE_FILE" logs app
	exit 1


	- name: Configure trace sink (store vs kafka)
	run: \|
	set -euo pipefail
	if [ "${{ matrix.trace_sink }}" = "kafka" ]; then
	echo "AGL_OTLP_ENDPOINT=http://localhost:4318/v1/traces" >> "$GITHUB_ENV"
	fi

	- name: Launch Kafka + OTel Collector (OTLP -> Kafka)
	if: ${{ matrix.trace_sink == 'kafka' }}
	run: \|
	set -euo pipefail
	cd docker

	# Generate OTel Collector config (OTLP/HTTP receiver -> Kafka exporter)
	cat > otelcol-kafka.yml <<'YAML'
	receivers:
	otlp:
	protocols:
	http:
	endpoint: 0.0.0.0:4318
	processors:
	batch: {}
	exporters:
	kafka:
	brokers: ["kafka:9092"]
	topic: "agl-otlp-spans"
	encoding: otlp_proto
	service:
	pipelines:
	traces:
	receivers: [otlp]
	processors: [batch]
	exporters: [kafka]
	YAML

	# Launch Kafka + Zookeeper + OTel Collector
	cat > compose.kafka-otel.yml <<'YAML'
	services:
	zookeeper:
	image: confluentinc/cp-zookeeper:7.6.1
	environment:
	ZOOKEEPER_CLIENT_PORT: 2181
	ZOOKEEPER_TICK_TIME: 2000

	kafka:
	image: confluentinc/cp-kafka:7.6.1
	depends_on: [zookeeper]
	environment:
	KAFKA_BROKER_ID: 1
	KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
	KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
	KAFKA_LISTENERS: PLAINTEXT://0.0.0.0:9092
	KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092
	KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT
	# Enlarge max message size to accommodate large spans
	KAFKA_MESSAGE_MAX_BYTES: "10000000"
	KAFKA_REPLICA_FETCH_MAX_BYTES: "10000000"
	KAFKA_SOCKET_REQUEST_MAX_BYTES: "10000000"

	otelcol:
	image: otel/opentelemetry-collector-contrib:latest
	depends_on: [kafka]
	# command: ["--config=/etc/otelcol/config.yml"]
	command:
	- "--config=/etc/otelcol/config.yml"
	- "--set=service.telemetry.logs.level=debug"
	volumes:
	- ./otelcol-kafka.yml:/etc/otelcol/config.yml:ro
	ports:
	- "4318:4318"
	YAML

	docker compose -p agl-kafka -f compose.kafka-otel.yml down -v \|\| true
	docker compose -p agl-kafka -f compose.kafka-otel.yml up -d --quiet-pull

	# Create topic (idempotent)
	docker compose -p agl-kafka -f compose.kafka-otel.yml exec -T kafka \
	kafka-topics --bootstrap-server kafka:9092 \
	--create --if-not-exists \
	--topic agl-otlp-spans --partitions 3 --replication-factor 1

	# Wait for OTLP/HTTP port to be reachable on the host
	for attempt in {1..30}; do
	if (echo > /dev/tcp/127.0.0.1/4318) >/dev/null 2>&1; then
	exit 0
	fi
	sleep 1
	done
	echo "OTel Collector port 4318 not reachable in time" >&2
	docker compose -p agl-kafka -f compose.kafka-otel.yml logs otelcol \|\| true
	exit 1
	- name: Prepare artifact directory
	run: mkdir -p "$ARTIFACT_DIR"

	- name: Record workload start
	run: echo "BENCHMARK_START=$(date -u +%FT%TZ)" >> "$GITHUB_ENV"

	- name: (Scenario) Run ${{ matrix.workload.display }} workload
	if: ${{ matrix.workload.kind == 'scenario' }}
	run: \|
	set -euo pipefail
	uv run --locked --no-sync python -m tests.benchmark.benchmark_store \
	--store-url "$STORE_URL" \
	${{ matrix.workload.args }}

	- name: (Micro) Run ${{ matrix.workload.display }}
	if: ${{ matrix.workload.kind == 'micro' }}
	run: \|
	set -euo pipefail
	mkdir -p "$ARTIFACT_DIR"
	uv run --locked --no-sync python -m tests.benchmark.micro_benchmark \
	--store-url "$STORE_URL" \
	--summary-file "$ARTIFACT_DIR/$SUMMARY_FILE" \
	"${{ matrix.workload.cli }}" \| tee "$ARTIFACT_DIR/${{ matrix.workload.id }}.txt"

	- name: Record workload end
	if: ${{ always() }}
	run: echo "BENCHMARK_END=$(date -u +%FT%TZ)" >> "$GITHUB_ENV"

	- name: Show micro benchmark summary
	if: ${{ always() && matrix.workload.kind == 'micro' }}
	run: \|
	set -euo pipefail
	summary_file="$ARTIFACT_DIR/$SUMMARY_FILE"
	if [ -f "$summary_file" ]; then
	echo "Micro benchmark summary ($WORKLOAD_ID/$BACKEND_ID):"
	cat "$summary_file"
	else
	echo "Summary file not found: $summary_file"
	fi

	- name: Run workload analysis
	if: ${{ always() }}
	run: \|
	set -euo pipefail
	mkdir -p "$ARTIFACT_DIR"
	if [ -z "${BENCHMARK_START:-}" ] \|\| [ -z "${BENCHMARK_END:-}" ]; then
	echo "Analysis skipped: benchmark window not recorded." > "$ARTIFACT_DIR/$ANALYSIS_FILE"
	exit 1
	fi
	uv run --locked --no-sync python -m tests.benchmark.analysis \
	--prom-url "$PROM_URL" \
	--store-url "$STORE_API_URL" \
	--start "$BENCHMARK_START" \
	--end "$BENCHMARK_END" \
	\| tee "$ARTIFACT_DIR/$ANALYSIS_FILE"

	- name: Collect docker logs
	if: ${{ always() }}
	run: \|
	set -euo pipefail
	mkdir -p "$ARTIFACT_DIR"
	cd docker
	readarray -t services < <(docker compose -f "$COMPOSE_FILE" config --services)
	if [ "${#services[@]}" -eq 0 ]; then
	echo "No services defined in compose file."
	exit 0
	fi
	for service in "${services[@]}"; do
	docker compose -f "$COMPOSE_FILE" logs "$service" > "../$ARTIFACT_DIR/docker-${service}-${WORKLOAD_ID}-${BACKEND_ID}.log" \|\| true
	done


	- name: Collect Kafka + OTel Collector logs
	if: ${{ always() && matrix.trace_sink == 'kafka' }}
	run: \|
	set -euo pipefail
	mkdir -p "$ARTIFACT_DIR"
	cd docker
	if [ -f compose.kafka-otel.yml ]; then
	for service in zookeeper kafka otelcol; do
	docker compose -p agl-kafka -f compose.kafka-otel.yml logs "$service" \
	> "../$ARTIFACT_DIR/docker-kafka-${service}-${WORKLOAD_ID}-${BACKEND_ID}.log" \|\| true
	done
	fi

	- name: Stop Kafka + OTel Collector
	if: ${{ always() && matrix.trace_sink == 'kafka' }}
	run: \|
	set -euo pipefail
	cd docker
	if [ -f compose.kafka-otel.yml ]; then
	docker compose -p agl-kafka -f compose.kafka-otel.yml down -v \|\| true
	fi
	- name: Stop ${{ matrix.backend.id }} Prometheus stack
	if: ${{ always() }}
	run: \|
	set -euo pipefail
	cd docker
	docker compose -f "$COMPOSE_FILE" down -v \|\| true

	- name: Archive Prometheus metrics
	if: ${{ always() }}
	run: \|
	set -euo pipefail
	mkdir -p "$ARTIFACT_DIR"
	if [ -d docker/data/prometheus ]; then
	tar -C docker/data -czf "$ARTIFACT_DIR/${PROM_ARCHIVE_BASENAME}.tar.gz" prometheus
	fi

	- name: Upload workload artifacts
	if: ${{ always() }}
	uses: actions/upload-artifact@v4
	with:
	name: ${{ env.ARTIFACT_NAME }}
	path: ${{ env.ARTIFACT_DIR }}
	if-no-files-found: error

	# collection-benchmarks:
	# name: collection (${{ matrix.backend.id }}, ${{ matrix.workload.id }})
	# runs-on: ${{ matrix.backend.runner }}
	# timeout-minutes: 15
	# strategy:
	# fail-fast: false
	# matrix:
	# backend:
	# - id: memory
	# needs_mongo: false
	# runner: ubuntu-latest
	# - id: mongo
	# needs_mongo: true
	# runner: ubuntu-latest
	# workload:
	# - id: high-insert
	# total_tasks: 50000
	# concurrency: 2048
	# type: insert
	# - id: medium-insert
	# total_tasks: 50000
	# concurrency: 128
	# type: insert
	# - id: low-insert
	# total_tasks: 50000
	# concurrency: 4
	# type: insert
	# - id: high-dequeue
	# total_tasks: 50000
	# concurrency: 2048
	# type: dequeue
	# - id: medium-dequeue
	# total_tasks: 50000
	# concurrency: 128
	# type: dequeue
	# - id: low-dequeue
	# total_tasks: 50000
	# concurrency: 4
	# type: dequeue
	# env:
	# ARTIFACT_DIR: ${{ format('artifacts/{0}-{1}', matrix.backend.id, matrix.workload.id) }}
	# SUMMARY_FILE: ${{ format('artifacts/{0}-{1}/summary-{0}-{1}.jsonl', matrix.backend.id, matrix.workload.id) }}
	# ARTIFACT_NAME: ${{ format('collections-{0}-{1}', matrix.backend.id, matrix.workload.id) }}
	# MONGO_URI: mongodb://localhost:27017/?replicaSet=rs0
	# steps:
	# - uses: actions/checkout@v4

	# - uses: astral-sh/setup-uv@v7
	# with:
	# enable-cache: true
	# python-version: '3.12'

	# - name: Sync dependencies
	# run: uv sync --frozen --extra mongo --group core-stable --group dev

	# - name: Launch MongoDB
	# if: ${{ matrix.backend.needs_mongo }}
	# run: \|
	# set -euo pipefail
	# cd docker
	# docker compose -f compose.mongo.yml down -v \|\| true
	# docker compose -f compose.mongo.yml up -d --quiet-pull
	# for attempt in {1..60}; do
	# if docker compose -f compose.mongo.yml exec -T mongo mongosh --quiet --eval 'db.runCommand({ping:1})' >/dev/null 2>&1; then
	# exit 0
	# fi
	# sleep 2
	# done
	# echo "MongoDB did not become ready in time" >&2
	# docker compose -f compose.mongo.yml logs mongo
	# exit 1

	# - name: Run collection benchmark
	# run: \|
	# set -euo pipefail
	# mkdir -p "$ARTIFACT_DIR"
	# echo "Running collection benchmark (backend=${{ matrix.backend.id }}, workload=${{ matrix.workload.id }})"
	# uv run --locked --no-sync python -m tests.benchmark.collection_benchmark \
	# "${{ matrix.workload.type }}" \
	# --backend "${{ matrix.backend.id }}" \
	# --total-tasks "${{ matrix.workload.total_tasks }}" \
	# --concurrency "${{ matrix.workload.concurrency }}" \
	# --task-prefix "${{ matrix.backend.id }}-${{ matrix.workload.id }}" \
	# --summary-file "$SUMMARY_FILE" \
	# --mongo-uri "$MONGO_URI" \
	# --mongo-database agentlightning_collection_bench

	# - name: Show collection benchmark summary
	# if: ${{ always() }}
	# run: \|
	# set -euo pipefail
	# if [ -f "$SUMMARY_FILE" ]; then
	# echo "Collection benchmark summary (${{ matrix.backend.id }}):"
	# cat "$SUMMARY_FILE"
	# else
	# echo "Summary file not found: $SUMMARY_FILE"
	# fi

	# - name: Stop MongoDB
	# if: ${{ always() && matrix.backend.needs_mongo }}
	# run: \|
	# set -euo pipefail
	# cd docker
	# docker compose -f compose.mongo.yml down -v \|\| true

	# - name: Upload collection artifacts
	# if: ${{ always() }}
	# uses: actions/upload-artifact@v4
	# with:
	# name: ${{ env.ARTIFACT_NAME }}
	# path: ${{ env.ARTIFACT_DIR }}
	# if-no-files-found: error

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Benchmark #94

Workflow file

Benchmark #94

Uh oh!

Workflow file for this run