Skip to content

Benchmark

Benchmark #94

Workflow file for this run

name: Benchmark
permissions:
contents: read
on:
workflow_dispatch:
schedule:
# Every Monday and Thursday at 3 AM UTC+8
- cron: '0 19 * * 0,3'
jobs:
benchmark:
name: ${{ matrix.workload.kind }} (${{ matrix.backend.id }}, ${{ matrix.workload.display }}, ${{ matrix.trace_sink }})
runs-on: ${{ matrix.workload.runner }}
timeout-minutes: ${{ matrix.workload.timeout }}
strategy:
fail-fast: false
matrix:
backend:
- id: memory
compose_file: compose.prometheus-memory-store.yml
- id: mongo
compose_file: compose.prometheus-mongo-store.yml
# trace_sink: [store, kafka]
trace_sink: [kafka]
workload:
- id: scenario-minimal-scale
display: Minimal production scale
kind: scenario
store_workers: 4
runner:
- self-hosted
- 1ES.Pool=agl-runner-cpu
timeout: 45
args: >-
--mode batch
--total-tasks 4096
--batch-size 256
--n-runners 32
--max-rounds 6
--sleep-seconds 0.5
- id: scenario-medium-scale
display: Medium production scale
kind: scenario
store_workers: 16
runner:
- self-hosted
- 1ES.Pool=agl-runner-cpu
timeout: 45
args: >-
--mode batch
--total-tasks 10000
--batch-size 1000
--n-runners 100
--max-rounds 10
--sleep-seconds 0.1
- id: scenario-midhigh-scale
display: Mid-high production scale
kind: scenario
store_workers: 24
runner:
- self-hosted
- 1ES.Pool=agl-runner-cpu
timeout: 60
args: >-
--mode batch
--total-tasks 20000
--batch-size 2048
--n-runners 300
--max-rounds 6
--sleep-seconds 0.1
# - id: scenario-large-batch
# display: Large batch waves
# kind: scenario
# store_workers: 96
# runner:
# - self-hosted
# - 1ES.Pool=agl-runner-cpu-high
# timeout: 120
# args: >-
# --mode batch
# --total-tasks 50000
# --batch-size 8192
# --n-runners 1000
# --max-rounds 3
# --sleep-seconds 0.1
- id: scenario-long-queues
display: Long rollout queues
kind: scenario
store_workers: 48
runner:
- self-hosted
- 1ES.Pool=agl-runner-cpu
timeout: 120
args: >-
--mode batch_partial
--total-tasks 50000
--batch-size 1024
--n-runners 256
--remaining-tasks 4096
--max-rounds 4
--sleep-seconds 0.1
# - id: scenario-high-concurrency
# display: High-throughput concurrent requests
# kind: scenario
# store_workers: 96
# runner:
# - self-hosted
# - 1ES.Pool=agl-runner-cpu-high
# timeout: 120
# args: >-
# --mode single
# --total-tasks 50000
# --concurrency 2048
# --n-runners 256
# --max-rounds 2
# --sleep-seconds 0.1
# - id: scenario-heavy-traces
# display: Heavy rollouts with deep traces
# kind: scenario
# store_workers: 96
# runner:
# - self-hosted
# - 1ES.Pool=agl-runner-cpu-high
# timeout: 60
# args: >-
# --mode batch_partial
# --total-tasks 10000
# --batch-size 1024
# --remaining-tasks 256
# --n-runners 512
# --max-rounds 20
# --sleep-seconds 1.0
# - id: micro-worker
# display: Update worker
# kind: micro
# store_workers: 8
# runner: ubuntu-latest
# timeout: 30
# cli: worker
# - id: micro-dequeue-empty
# display: Dequeue empty
# kind: micro
# store_workers: 8
# runner: ubuntu-latest
# timeout: 30
# cli: dequeue-empty
# - id: micro-rollout
# display: Rollout + span
# kind: micro
# store_workers: 8
# runner: ubuntu-latest
# timeout: 30
# cli: rollout
# - id: micro-dequeue-update-attempt
# display: Dequeue + update attempt
# kind: micro
# store_workers: 8
# runner: ubuntu-latest
# timeout: 30
# cli: dequeue-update-attempt
# - id: micro-dequeue-only
# display: Dequeue only
# kind: micro
# store_workers: 8
# runner: ubuntu-latest
# timeout: 30
# cli: dequeue-only
# - id: micro-metrics
# display: Multi-metric fan-out
# kind: micro
# store_workers: 8
# runner: ubuntu-latest
# timeout: 15
# cli: metrics
env:
PYTHONUNBUFFERED: "1"
STORE_URL: http://localhost:4747
STORE_API_URL: http://localhost:4747/v1/agl
PROM_URL: http://localhost:9090
GITHUB_ACTIONS_TIMEOUT_MINUTES: ${{ matrix.workload.timeout }}
WORKLOAD_KIND: ${{ matrix.workload.kind }}
WORKLOAD_ID: ${{ matrix.workload.id }}
BACKEND_ID: ${{ matrix.backend.id }}
TRACE_SINK_ID: ${{ matrix.trace_sink }}
ARTIFACT_DIR: ${{ format('artifacts/{0}-{1}-{2}', matrix.workload.id, matrix.backend.id, matrix.trace_sink) }}
COMPOSE_FILE: ${{ matrix.backend.compose_file }}
AGL_STORE_N_WORKERS: ${{ matrix.workload.store_workers }}
ANALYSIS_FILE: ${{ format('analysis-{0}-{1}.log', matrix.workload.id, matrix.trace_sink) }}
SUMMARY_FILE: ${{ format('summary-{0}-{1}.log', matrix.workload.id, matrix.trace_sink) }}
PROM_ARCHIVE_BASENAME: ${{ format('prometheus-{0}-{1}', matrix.workload.id, matrix.backend.id) }}
ARTIFACT_NAME: ${{ format('{0}-{1}-{2}', matrix.workload.id, matrix.backend.id, matrix.trace_sink) }}
steps:
- uses: actions/checkout@v4
- uses: astral-sh/setup-uv@v7
with:
enable-cache: true
python-version: '3.12'
- name: Sync dependencies
run: uv sync --frozen --extra mongo --group core-stable --group dev
- name: Check disk space
run: df -h
- name: Reset benchmark data directories
run: |
set -euo pipefail
cd docker
rm -rf data
bash setup.sh
- name: Launch ${{ matrix.backend.id }} Prometheus stack
run: |
set -euo pipefail
cd docker
docker compose -f "$COMPOSE_FILE" down -v || true
docker compose -f "$COMPOSE_FILE" up -d --quiet-pull
- name: Wait for store readiness
run: |
set -euo pipefail
for attempt in {1..60}; do
if curl -fsS "$STORE_API_URL/health" >/dev/null 2>&1; then
sleep 1
curl -fsS "$STORE_API_URL/rollouts" # Warm up the scraper
sleep 15 # Allow some time for the baseline metrics to be established
exit 0
fi
sleep 1
done
echo "Store did not become ready in time" >&2
# show logs for debugging
cd docker && docker compose -f "$COMPOSE_FILE" logs app
exit 1
- name: Configure trace sink (store vs kafka)
run: |
set -euo pipefail
if [ "${{ matrix.trace_sink }}" = "kafka" ]; then
echo "AGL_OTLP_ENDPOINT=http://localhost:4318/v1/traces" >> "$GITHUB_ENV"
fi
- name: Launch Kafka + OTel Collector (OTLP -> Kafka)
if: ${{ matrix.trace_sink == 'kafka' }}
run: |
set -euo pipefail
cd docker
# Generate OTel Collector config (OTLP/HTTP receiver -> Kafka exporter)
cat > otelcol-kafka.yml <<'YAML'
receivers:
otlp:
protocols:
http:
endpoint: 0.0.0.0:4318
processors:
batch: {}
exporters:
kafka:
brokers: ["kafka:9092"]
topic: "agl-otlp-spans"
encoding: otlp_proto
service:
pipelines:
traces:
receivers: [otlp]
processors: [batch]
exporters: [kafka]
YAML
# Launch Kafka + Zookeeper + OTel Collector
cat > compose.kafka-otel.yml <<'YAML'
services:
zookeeper:
image: confluentinc/cp-zookeeper:7.6.1
environment:
ZOOKEEPER_CLIENT_PORT: 2181
ZOOKEEPER_TICK_TIME: 2000
kafka:
image: confluentinc/cp-kafka:7.6.1
depends_on: [zookeeper]
environment:
KAFKA_BROKER_ID: 1
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
KAFKA_LISTENERS: PLAINTEXT://0.0.0.0:9092
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092
KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT
# Enlarge max message size to accommodate large spans
KAFKA_MESSAGE_MAX_BYTES: "10000000"
KAFKA_REPLICA_FETCH_MAX_BYTES: "10000000"
KAFKA_SOCKET_REQUEST_MAX_BYTES: "10000000"
otelcol:
image: otel/opentelemetry-collector-contrib:latest
depends_on: [kafka]
# command: ["--config=/etc/otelcol/config.yml"]
command:
- "--config=/etc/otelcol/config.yml"
- "--set=service.telemetry.logs.level=debug"
volumes:
- ./otelcol-kafka.yml:/etc/otelcol/config.yml:ro
ports:
- "4318:4318"
YAML
docker compose -p agl-kafka -f compose.kafka-otel.yml down -v || true
docker compose -p agl-kafka -f compose.kafka-otel.yml up -d --quiet-pull
# Create topic (idempotent)
docker compose -p agl-kafka -f compose.kafka-otel.yml exec -T kafka \
kafka-topics --bootstrap-server kafka:9092 \
--create --if-not-exists \
--topic agl-otlp-spans --partitions 3 --replication-factor 1
# Wait for OTLP/HTTP port to be reachable on the host
for attempt in {1..30}; do
if (echo > /dev/tcp/127.0.0.1/4318) >/dev/null 2>&1; then
exit 0
fi
sleep 1
done
echo "OTel Collector port 4318 not reachable in time" >&2
docker compose -p agl-kafka -f compose.kafka-otel.yml logs otelcol || true
exit 1
- name: Prepare artifact directory
run: mkdir -p "$ARTIFACT_DIR"
- name: Record workload start
run: echo "BENCHMARK_START=$(date -u +%FT%TZ)" >> "$GITHUB_ENV"
- name: (Scenario) Run ${{ matrix.workload.display }} workload
if: ${{ matrix.workload.kind == 'scenario' }}
run: |
set -euo pipefail
uv run --locked --no-sync python -m tests.benchmark.benchmark_store \
--store-url "$STORE_URL" \
${{ matrix.workload.args }}
- name: (Micro) Run ${{ matrix.workload.display }}
if: ${{ matrix.workload.kind == 'micro' }}
run: |
set -euo pipefail
mkdir -p "$ARTIFACT_DIR"
uv run --locked --no-sync python -m tests.benchmark.micro_benchmark \
--store-url "$STORE_URL" \
--summary-file "$ARTIFACT_DIR/$SUMMARY_FILE" \
"${{ matrix.workload.cli }}" | tee "$ARTIFACT_DIR/${{ matrix.workload.id }}.txt"
- name: Record workload end
if: ${{ always() }}
run: echo "BENCHMARK_END=$(date -u +%FT%TZ)" >> "$GITHUB_ENV"
- name: Show micro benchmark summary
if: ${{ always() && matrix.workload.kind == 'micro' }}
run: |
set -euo pipefail
summary_file="$ARTIFACT_DIR/$SUMMARY_FILE"
if [ -f "$summary_file" ]; then
echo "Micro benchmark summary ($WORKLOAD_ID/$BACKEND_ID):"
cat "$summary_file"
else
echo "Summary file not found: $summary_file"
fi
- name: Run workload analysis
if: ${{ always() }}
run: |
set -euo pipefail
mkdir -p "$ARTIFACT_DIR"
if [ -z "${BENCHMARK_START:-}" ] || [ -z "${BENCHMARK_END:-}" ]; then
echo "Analysis skipped: benchmark window not recorded." > "$ARTIFACT_DIR/$ANALYSIS_FILE"
exit 1
fi
uv run --locked --no-sync python -m tests.benchmark.analysis \
--prom-url "$PROM_URL" \
--store-url "$STORE_API_URL" \
--start "$BENCHMARK_START" \
--end "$BENCHMARK_END" \
| tee "$ARTIFACT_DIR/$ANALYSIS_FILE"
- name: Collect docker logs
if: ${{ always() }}
run: |
set -euo pipefail
mkdir -p "$ARTIFACT_DIR"
cd docker
readarray -t services < <(docker compose -f "$COMPOSE_FILE" config --services)
if [ "${#services[@]}" -eq 0 ]; then
echo "No services defined in compose file."
exit 0
fi
for service in "${services[@]}"; do
docker compose -f "$COMPOSE_FILE" logs "$service" > "../$ARTIFACT_DIR/docker-${service}-${WORKLOAD_ID}-${BACKEND_ID}.log" || true
done
- name: Collect Kafka + OTel Collector logs
if: ${{ always() && matrix.trace_sink == 'kafka' }}
run: |
set -euo pipefail
mkdir -p "$ARTIFACT_DIR"
cd docker
if [ -f compose.kafka-otel.yml ]; then
for service in zookeeper kafka otelcol; do
docker compose -p agl-kafka -f compose.kafka-otel.yml logs "$service" \
> "../$ARTIFACT_DIR/docker-kafka-${service}-${WORKLOAD_ID}-${BACKEND_ID}.log" || true
done
fi
- name: Stop Kafka + OTel Collector
if: ${{ always() && matrix.trace_sink == 'kafka' }}
run: |
set -euo pipefail
cd docker
if [ -f compose.kafka-otel.yml ]; then
docker compose -p agl-kafka -f compose.kafka-otel.yml down -v || true
fi
- name: Stop ${{ matrix.backend.id }} Prometheus stack
if: ${{ always() }}
run: |
set -euo pipefail
cd docker
docker compose -f "$COMPOSE_FILE" down -v || true
- name: Archive Prometheus metrics
if: ${{ always() }}
run: |
set -euo pipefail
mkdir -p "$ARTIFACT_DIR"
if [ -d docker/data/prometheus ]; then
tar -C docker/data -czf "$ARTIFACT_DIR/${PROM_ARCHIVE_BASENAME}.tar.gz" prometheus
fi
- name: Upload workload artifacts
if: ${{ always() }}
uses: actions/upload-artifact@v4
with:
name: ${{ env.ARTIFACT_NAME }}
path: ${{ env.ARTIFACT_DIR }}
if-no-files-found: error
# collection-benchmarks:
# name: collection (${{ matrix.backend.id }}, ${{ matrix.workload.id }})
# runs-on: ${{ matrix.backend.runner }}
# timeout-minutes: 15
# strategy:
# fail-fast: false
# matrix:
# backend:
# - id: memory
# needs_mongo: false
# runner: ubuntu-latest
# - id: mongo
# needs_mongo: true
# runner: ubuntu-latest
# workload:
# - id: high-insert
# total_tasks: 50000
# concurrency: 2048
# type: insert
# - id: medium-insert
# total_tasks: 50000
# concurrency: 128
# type: insert
# - id: low-insert
# total_tasks: 50000
# concurrency: 4
# type: insert
# - id: high-dequeue
# total_tasks: 50000
# concurrency: 2048
# type: dequeue
# - id: medium-dequeue
# total_tasks: 50000
# concurrency: 128
# type: dequeue
# - id: low-dequeue
# total_tasks: 50000
# concurrency: 4
# type: dequeue
# env:
# ARTIFACT_DIR: ${{ format('artifacts/{0}-{1}', matrix.backend.id, matrix.workload.id) }}
# SUMMARY_FILE: ${{ format('artifacts/{0}-{1}/summary-{0}-{1}.jsonl', matrix.backend.id, matrix.workload.id) }}
# ARTIFACT_NAME: ${{ format('collections-{0}-{1}', matrix.backend.id, matrix.workload.id) }}
# MONGO_URI: mongodb://localhost:27017/?replicaSet=rs0
# steps:
# - uses: actions/checkout@v4
# - uses: astral-sh/setup-uv@v7
# with:
# enable-cache: true
# python-version: '3.12'
# - name: Sync dependencies
# run: uv sync --frozen --extra mongo --group core-stable --group dev
# - name: Launch MongoDB
# if: ${{ matrix.backend.needs_mongo }}
# run: |
# set -euo pipefail
# cd docker
# docker compose -f compose.mongo.yml down -v || true
# docker compose -f compose.mongo.yml up -d --quiet-pull
# for attempt in {1..60}; do
# if docker compose -f compose.mongo.yml exec -T mongo mongosh --quiet --eval 'db.runCommand({ping:1})' >/dev/null 2>&1; then
# exit 0
# fi
# sleep 2
# done
# echo "MongoDB did not become ready in time" >&2
# docker compose -f compose.mongo.yml logs mongo
# exit 1
# - name: Run collection benchmark
# run: |
# set -euo pipefail
# mkdir -p "$ARTIFACT_DIR"
# echo "Running collection benchmark (backend=${{ matrix.backend.id }}, workload=${{ matrix.workload.id }})"
# uv run --locked --no-sync python -m tests.benchmark.collection_benchmark \
# "${{ matrix.workload.type }}" \
# --backend "${{ matrix.backend.id }}" \
# --total-tasks "${{ matrix.workload.total_tasks }}" \
# --concurrency "${{ matrix.workload.concurrency }}" \
# --task-prefix "${{ matrix.backend.id }}-${{ matrix.workload.id }}" \
# --summary-file "$SUMMARY_FILE" \
# --mongo-uri "$MONGO_URI" \
# --mongo-database agentlightning_collection_bench
# - name: Show collection benchmark summary
# if: ${{ always() }}
# run: |
# set -euo pipefail
# if [ -f "$SUMMARY_FILE" ]; then
# echo "Collection benchmark summary (${{ matrix.backend.id }}):"
# cat "$SUMMARY_FILE"
# else
# echo "Summary file not found: $SUMMARY_FILE"
# fi
# - name: Stop MongoDB
# if: ${{ always() && matrix.backend.needs_mongo }}
# run: |
# set -euo pipefail
# cd docker
# docker compose -f compose.mongo.yml down -v || true
# - name: Upload collection artifacts
# if: ${{ always() }}
# uses: actions/upload-artifact@v4
# with:
# name: ${{ env.ARTIFACT_NAME }}
# path: ${{ env.ARTIFACT_DIR }}
# if-no-files-found: error