Benchmark #94
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Benchmark | |
| permissions: | |
| contents: read | |
| on: | |
| workflow_dispatch: | |
| schedule: | |
| # Every Monday and Thursday at 3 AM UTC+8 | |
| - cron: '0 19 * * 0,3' | |
| jobs: | |
| benchmark: | |
| name: ${{ matrix.workload.kind }} (${{ matrix.backend.id }}, ${{ matrix.workload.display }}, ${{ matrix.trace_sink }}) | |
| runs-on: ${{ matrix.workload.runner }} | |
| timeout-minutes: ${{ matrix.workload.timeout }} | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| backend: | |
| - id: memory | |
| compose_file: compose.prometheus-memory-store.yml | |
| - id: mongo | |
| compose_file: compose.prometheus-mongo-store.yml | |
| # trace_sink: [store, kafka] | |
| trace_sink: [kafka] | |
| workload: | |
| - id: scenario-minimal-scale | |
| display: Minimal production scale | |
| kind: scenario | |
| store_workers: 4 | |
| runner: | |
| - self-hosted | |
| - 1ES.Pool=agl-runner-cpu | |
| timeout: 45 | |
| args: >- | |
| --mode batch | |
| --total-tasks 4096 | |
| --batch-size 256 | |
| --n-runners 32 | |
| --max-rounds 6 | |
| --sleep-seconds 0.5 | |
| - id: scenario-medium-scale | |
| display: Medium production scale | |
| kind: scenario | |
| store_workers: 16 | |
| runner: | |
| - self-hosted | |
| - 1ES.Pool=agl-runner-cpu | |
| timeout: 45 | |
| args: >- | |
| --mode batch | |
| --total-tasks 10000 | |
| --batch-size 1000 | |
| --n-runners 100 | |
| --max-rounds 10 | |
| --sleep-seconds 0.1 | |
| - id: scenario-midhigh-scale | |
| display: Mid-high production scale | |
| kind: scenario | |
| store_workers: 24 | |
| runner: | |
| - self-hosted | |
| - 1ES.Pool=agl-runner-cpu | |
| timeout: 60 | |
| args: >- | |
| --mode batch | |
| --total-tasks 20000 | |
| --batch-size 2048 | |
| --n-runners 300 | |
| --max-rounds 6 | |
| --sleep-seconds 0.1 | |
| # - id: scenario-large-batch | |
| # display: Large batch waves | |
| # kind: scenario | |
| # store_workers: 96 | |
| # runner: | |
| # - self-hosted | |
| # - 1ES.Pool=agl-runner-cpu-high | |
| # timeout: 120 | |
| # args: >- | |
| # --mode batch | |
| # --total-tasks 50000 | |
| # --batch-size 8192 | |
| # --n-runners 1000 | |
| # --max-rounds 3 | |
| # --sleep-seconds 0.1 | |
| - id: scenario-long-queues | |
| display: Long rollout queues | |
| kind: scenario | |
| store_workers: 48 | |
| runner: | |
| - self-hosted | |
| - 1ES.Pool=agl-runner-cpu | |
| timeout: 120 | |
| args: >- | |
| --mode batch_partial | |
| --total-tasks 50000 | |
| --batch-size 1024 | |
| --n-runners 256 | |
| --remaining-tasks 4096 | |
| --max-rounds 4 | |
| --sleep-seconds 0.1 | |
| # - id: scenario-high-concurrency | |
| # display: High-throughput concurrent requests | |
| # kind: scenario | |
| # store_workers: 96 | |
| # runner: | |
| # - self-hosted | |
| # - 1ES.Pool=agl-runner-cpu-high | |
| # timeout: 120 | |
| # args: >- | |
| # --mode single | |
| # --total-tasks 50000 | |
| # --concurrency 2048 | |
| # --n-runners 256 | |
| # --max-rounds 2 | |
| # --sleep-seconds 0.1 | |
| # - id: scenario-heavy-traces | |
| # display: Heavy rollouts with deep traces | |
| # kind: scenario | |
| # store_workers: 96 | |
| # runner: | |
| # - self-hosted | |
| # - 1ES.Pool=agl-runner-cpu-high | |
| # timeout: 60 | |
| # args: >- | |
| # --mode batch_partial | |
| # --total-tasks 10000 | |
| # --batch-size 1024 | |
| # --remaining-tasks 256 | |
| # --n-runners 512 | |
| # --max-rounds 20 | |
| # --sleep-seconds 1.0 | |
| # - id: micro-worker | |
| # display: Update worker | |
| # kind: micro | |
| # store_workers: 8 | |
| # runner: ubuntu-latest | |
| # timeout: 30 | |
| # cli: worker | |
| # - id: micro-dequeue-empty | |
| # display: Dequeue empty | |
| # kind: micro | |
| # store_workers: 8 | |
| # runner: ubuntu-latest | |
| # timeout: 30 | |
| # cli: dequeue-empty | |
| # - id: micro-rollout | |
| # display: Rollout + span | |
| # kind: micro | |
| # store_workers: 8 | |
| # runner: ubuntu-latest | |
| # timeout: 30 | |
| # cli: rollout | |
| # - id: micro-dequeue-update-attempt | |
| # display: Dequeue + update attempt | |
| # kind: micro | |
| # store_workers: 8 | |
| # runner: ubuntu-latest | |
| # timeout: 30 | |
| # cli: dequeue-update-attempt | |
| # - id: micro-dequeue-only | |
| # display: Dequeue only | |
| # kind: micro | |
| # store_workers: 8 | |
| # runner: ubuntu-latest | |
| # timeout: 30 | |
| # cli: dequeue-only | |
| # - id: micro-metrics | |
| # display: Multi-metric fan-out | |
| # kind: micro | |
| # store_workers: 8 | |
| # runner: ubuntu-latest | |
| # timeout: 15 | |
| # cli: metrics | |
| env: | |
| PYTHONUNBUFFERED: "1" | |
| STORE_URL: http://localhost:4747 | |
| STORE_API_URL: http://localhost:4747/v1/agl | |
| PROM_URL: http://localhost:9090 | |
| GITHUB_ACTIONS_TIMEOUT_MINUTES: ${{ matrix.workload.timeout }} | |
| WORKLOAD_KIND: ${{ matrix.workload.kind }} | |
| WORKLOAD_ID: ${{ matrix.workload.id }} | |
| BACKEND_ID: ${{ matrix.backend.id }} | |
| TRACE_SINK_ID: ${{ matrix.trace_sink }} | |
| ARTIFACT_DIR: ${{ format('artifacts/{0}-{1}-{2}', matrix.workload.id, matrix.backend.id, matrix.trace_sink) }} | |
| COMPOSE_FILE: ${{ matrix.backend.compose_file }} | |
| AGL_STORE_N_WORKERS: ${{ matrix.workload.store_workers }} | |
| ANALYSIS_FILE: ${{ format('analysis-{0}-{1}.log', matrix.workload.id, matrix.trace_sink) }} | |
| SUMMARY_FILE: ${{ format('summary-{0}-{1}.log', matrix.workload.id, matrix.trace_sink) }} | |
| PROM_ARCHIVE_BASENAME: ${{ format('prometheus-{0}-{1}', matrix.workload.id, matrix.backend.id) }} | |
| ARTIFACT_NAME: ${{ format('{0}-{1}-{2}', matrix.workload.id, matrix.backend.id, matrix.trace_sink) }} | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: astral-sh/setup-uv@v7 | |
| with: | |
| enable-cache: true | |
| python-version: '3.12' | |
| - name: Sync dependencies | |
| run: uv sync --frozen --extra mongo --group core-stable --group dev | |
| - name: Check disk space | |
| run: df -h | |
| - name: Reset benchmark data directories | |
| run: | | |
| set -euo pipefail | |
| cd docker | |
| rm -rf data | |
| bash setup.sh | |
| - name: Launch ${{ matrix.backend.id }} Prometheus stack | |
| run: | | |
| set -euo pipefail | |
| cd docker | |
| docker compose -f "$COMPOSE_FILE" down -v || true | |
| docker compose -f "$COMPOSE_FILE" up -d --quiet-pull | |
| - name: Wait for store readiness | |
| run: | | |
| set -euo pipefail | |
| for attempt in {1..60}; do | |
| if curl -fsS "$STORE_API_URL/health" >/dev/null 2>&1; then | |
| sleep 1 | |
| curl -fsS "$STORE_API_URL/rollouts" # Warm up the scraper | |
| sleep 15 # Allow some time for the baseline metrics to be established | |
| exit 0 | |
| fi | |
| sleep 1 | |
| done | |
| echo "Store did not become ready in time" >&2 | |
| # show logs for debugging | |
| cd docker && docker compose -f "$COMPOSE_FILE" logs app | |
| exit 1 | |
| - name: Configure trace sink (store vs kafka) | |
| run: | | |
| set -euo pipefail | |
| if [ "${{ matrix.trace_sink }}" = "kafka" ]; then | |
| echo "AGL_OTLP_ENDPOINT=http://localhost:4318/v1/traces" >> "$GITHUB_ENV" | |
| fi | |
| - name: Launch Kafka + OTel Collector (OTLP -> Kafka) | |
| if: ${{ matrix.trace_sink == 'kafka' }} | |
| run: | | |
| set -euo pipefail | |
| cd docker | |
| # Generate OTel Collector config (OTLP/HTTP receiver -> Kafka exporter) | |
| cat > otelcol-kafka.yml <<'YAML' | |
| receivers: | |
| otlp: | |
| protocols: | |
| http: | |
| endpoint: 0.0.0.0:4318 | |
| processors: | |
| batch: {} | |
| exporters: | |
| kafka: | |
| brokers: ["kafka:9092"] | |
| topic: "agl-otlp-spans" | |
| encoding: otlp_proto | |
| service: | |
| pipelines: | |
| traces: | |
| receivers: [otlp] | |
| processors: [batch] | |
| exporters: [kafka] | |
| YAML | |
| # Launch Kafka + Zookeeper + OTel Collector | |
| cat > compose.kafka-otel.yml <<'YAML' | |
| services: | |
| zookeeper: | |
| image: confluentinc/cp-zookeeper:7.6.1 | |
| environment: | |
| ZOOKEEPER_CLIENT_PORT: 2181 | |
| ZOOKEEPER_TICK_TIME: 2000 | |
| kafka: | |
| image: confluentinc/cp-kafka:7.6.1 | |
| depends_on: [zookeeper] | |
| environment: | |
| KAFKA_BROKER_ID: 1 | |
| KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 | |
| KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 | |
| KAFKA_LISTENERS: PLAINTEXT://0.0.0.0:9092 | |
| KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092 | |
| KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT | |
| # Enlarge max message size to accommodate large spans | |
| KAFKA_MESSAGE_MAX_BYTES: "10000000" | |
| KAFKA_REPLICA_FETCH_MAX_BYTES: "10000000" | |
| KAFKA_SOCKET_REQUEST_MAX_BYTES: "10000000" | |
| otelcol: | |
| image: otel/opentelemetry-collector-contrib:latest | |
| depends_on: [kafka] | |
| # command: ["--config=/etc/otelcol/config.yml"] | |
| command: | |
| - "--config=/etc/otelcol/config.yml" | |
| - "--set=service.telemetry.logs.level=debug" | |
| volumes: | |
| - ./otelcol-kafka.yml:/etc/otelcol/config.yml:ro | |
| ports: | |
| - "4318:4318" | |
| YAML | |
| docker compose -p agl-kafka -f compose.kafka-otel.yml down -v || true | |
| docker compose -p agl-kafka -f compose.kafka-otel.yml up -d --quiet-pull | |
| # Create topic (idempotent) | |
| docker compose -p agl-kafka -f compose.kafka-otel.yml exec -T kafka \ | |
| kafka-topics --bootstrap-server kafka:9092 \ | |
| --create --if-not-exists \ | |
| --topic agl-otlp-spans --partitions 3 --replication-factor 1 | |
| # Wait for OTLP/HTTP port to be reachable on the host | |
| for attempt in {1..30}; do | |
| if (echo > /dev/tcp/127.0.0.1/4318) >/dev/null 2>&1; then | |
| exit 0 | |
| fi | |
| sleep 1 | |
| done | |
| echo "OTel Collector port 4318 not reachable in time" >&2 | |
| docker compose -p agl-kafka -f compose.kafka-otel.yml logs otelcol || true | |
| exit 1 | |
| - name: Prepare artifact directory | |
| run: mkdir -p "$ARTIFACT_DIR" | |
| - name: Record workload start | |
| run: echo "BENCHMARK_START=$(date -u +%FT%TZ)" >> "$GITHUB_ENV" | |
| - name: (Scenario) Run ${{ matrix.workload.display }} workload | |
| if: ${{ matrix.workload.kind == 'scenario' }} | |
| run: | | |
| set -euo pipefail | |
| uv run --locked --no-sync python -m tests.benchmark.benchmark_store \ | |
| --store-url "$STORE_URL" \ | |
| ${{ matrix.workload.args }} | |
| - name: (Micro) Run ${{ matrix.workload.display }} | |
| if: ${{ matrix.workload.kind == 'micro' }} | |
| run: | | |
| set -euo pipefail | |
| mkdir -p "$ARTIFACT_DIR" | |
| uv run --locked --no-sync python -m tests.benchmark.micro_benchmark \ | |
| --store-url "$STORE_URL" \ | |
| --summary-file "$ARTIFACT_DIR/$SUMMARY_FILE" \ | |
| "${{ matrix.workload.cli }}" | tee "$ARTIFACT_DIR/${{ matrix.workload.id }}.txt" | |
| - name: Record workload end | |
| if: ${{ always() }} | |
| run: echo "BENCHMARK_END=$(date -u +%FT%TZ)" >> "$GITHUB_ENV" | |
| - name: Show micro benchmark summary | |
| if: ${{ always() && matrix.workload.kind == 'micro' }} | |
| run: | | |
| set -euo pipefail | |
| summary_file="$ARTIFACT_DIR/$SUMMARY_FILE" | |
| if [ -f "$summary_file" ]; then | |
| echo "Micro benchmark summary ($WORKLOAD_ID/$BACKEND_ID):" | |
| cat "$summary_file" | |
| else | |
| echo "Summary file not found: $summary_file" | |
| fi | |
| - name: Run workload analysis | |
| if: ${{ always() }} | |
| run: | | |
| set -euo pipefail | |
| mkdir -p "$ARTIFACT_DIR" | |
| if [ -z "${BENCHMARK_START:-}" ] || [ -z "${BENCHMARK_END:-}" ]; then | |
| echo "Analysis skipped: benchmark window not recorded." > "$ARTIFACT_DIR/$ANALYSIS_FILE" | |
| exit 1 | |
| fi | |
| uv run --locked --no-sync python -m tests.benchmark.analysis \ | |
| --prom-url "$PROM_URL" \ | |
| --store-url "$STORE_API_URL" \ | |
| --start "$BENCHMARK_START" \ | |
| --end "$BENCHMARK_END" \ | |
| | tee "$ARTIFACT_DIR/$ANALYSIS_FILE" | |
| - name: Collect docker logs | |
| if: ${{ always() }} | |
| run: | | |
| set -euo pipefail | |
| mkdir -p "$ARTIFACT_DIR" | |
| cd docker | |
| readarray -t services < <(docker compose -f "$COMPOSE_FILE" config --services) | |
| if [ "${#services[@]}" -eq 0 ]; then | |
| echo "No services defined in compose file." | |
| exit 0 | |
| fi | |
| for service in "${services[@]}"; do | |
| docker compose -f "$COMPOSE_FILE" logs "$service" > "../$ARTIFACT_DIR/docker-${service}-${WORKLOAD_ID}-${BACKEND_ID}.log" || true | |
| done | |
| - name: Collect Kafka + OTel Collector logs | |
| if: ${{ always() && matrix.trace_sink == 'kafka' }} | |
| run: | | |
| set -euo pipefail | |
| mkdir -p "$ARTIFACT_DIR" | |
| cd docker | |
| if [ -f compose.kafka-otel.yml ]; then | |
| for service in zookeeper kafka otelcol; do | |
| docker compose -p agl-kafka -f compose.kafka-otel.yml logs "$service" \ | |
| > "../$ARTIFACT_DIR/docker-kafka-${service}-${WORKLOAD_ID}-${BACKEND_ID}.log" || true | |
| done | |
| fi | |
| - name: Stop Kafka + OTel Collector | |
| if: ${{ always() && matrix.trace_sink == 'kafka' }} | |
| run: | | |
| set -euo pipefail | |
| cd docker | |
| if [ -f compose.kafka-otel.yml ]; then | |
| docker compose -p agl-kafka -f compose.kafka-otel.yml down -v || true | |
| fi | |
| - name: Stop ${{ matrix.backend.id }} Prometheus stack | |
| if: ${{ always() }} | |
| run: | | |
| set -euo pipefail | |
| cd docker | |
| docker compose -f "$COMPOSE_FILE" down -v || true | |
| - name: Archive Prometheus metrics | |
| if: ${{ always() }} | |
| run: | | |
| set -euo pipefail | |
| mkdir -p "$ARTIFACT_DIR" | |
| if [ -d docker/data/prometheus ]; then | |
| tar -C docker/data -czf "$ARTIFACT_DIR/${PROM_ARCHIVE_BASENAME}.tar.gz" prometheus | |
| fi | |
| - name: Upload workload artifacts | |
| if: ${{ always() }} | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ env.ARTIFACT_NAME }} | |
| path: ${{ env.ARTIFACT_DIR }} | |
| if-no-files-found: error | |
| # collection-benchmarks: | |
| # name: collection (${{ matrix.backend.id }}, ${{ matrix.workload.id }}) | |
| # runs-on: ${{ matrix.backend.runner }} | |
| # timeout-minutes: 15 | |
| # strategy: | |
| # fail-fast: false | |
| # matrix: | |
| # backend: | |
| # - id: memory | |
| # needs_mongo: false | |
| # runner: ubuntu-latest | |
| # - id: mongo | |
| # needs_mongo: true | |
| # runner: ubuntu-latest | |
| # workload: | |
| # - id: high-insert | |
| # total_tasks: 50000 | |
| # concurrency: 2048 | |
| # type: insert | |
| # - id: medium-insert | |
| # total_tasks: 50000 | |
| # concurrency: 128 | |
| # type: insert | |
| # - id: low-insert | |
| # total_tasks: 50000 | |
| # concurrency: 4 | |
| # type: insert | |
| # - id: high-dequeue | |
| # total_tasks: 50000 | |
| # concurrency: 2048 | |
| # type: dequeue | |
| # - id: medium-dequeue | |
| # total_tasks: 50000 | |
| # concurrency: 128 | |
| # type: dequeue | |
| # - id: low-dequeue | |
| # total_tasks: 50000 | |
| # concurrency: 4 | |
| # type: dequeue | |
| # env: | |
| # ARTIFACT_DIR: ${{ format('artifacts/{0}-{1}', matrix.backend.id, matrix.workload.id) }} | |
| # SUMMARY_FILE: ${{ format('artifacts/{0}-{1}/summary-{0}-{1}.jsonl', matrix.backend.id, matrix.workload.id) }} | |
| # ARTIFACT_NAME: ${{ format('collections-{0}-{1}', matrix.backend.id, matrix.workload.id) }} | |
| # MONGO_URI: mongodb://localhost:27017/?replicaSet=rs0 | |
| # steps: | |
| # - uses: actions/checkout@v4 | |
| # - uses: astral-sh/setup-uv@v7 | |
| # with: | |
| # enable-cache: true | |
| # python-version: '3.12' | |
| # - name: Sync dependencies | |
| # run: uv sync --frozen --extra mongo --group core-stable --group dev | |
| # - name: Launch MongoDB | |
| # if: ${{ matrix.backend.needs_mongo }} | |
| # run: | | |
| # set -euo pipefail | |
| # cd docker | |
| # docker compose -f compose.mongo.yml down -v || true | |
| # docker compose -f compose.mongo.yml up -d --quiet-pull | |
| # for attempt in {1..60}; do | |
| # if docker compose -f compose.mongo.yml exec -T mongo mongosh --quiet --eval 'db.runCommand({ping:1})' >/dev/null 2>&1; then | |
| # exit 0 | |
| # fi | |
| # sleep 2 | |
| # done | |
| # echo "MongoDB did not become ready in time" >&2 | |
| # docker compose -f compose.mongo.yml logs mongo | |
| # exit 1 | |
| # - name: Run collection benchmark | |
| # run: | | |
| # set -euo pipefail | |
| # mkdir -p "$ARTIFACT_DIR" | |
| # echo "Running collection benchmark (backend=${{ matrix.backend.id }}, workload=${{ matrix.workload.id }})" | |
| # uv run --locked --no-sync python -m tests.benchmark.collection_benchmark \ | |
| # "${{ matrix.workload.type }}" \ | |
| # --backend "${{ matrix.backend.id }}" \ | |
| # --total-tasks "${{ matrix.workload.total_tasks }}" \ | |
| # --concurrency "${{ matrix.workload.concurrency }}" \ | |
| # --task-prefix "${{ matrix.backend.id }}-${{ matrix.workload.id }}" \ | |
| # --summary-file "$SUMMARY_FILE" \ | |
| # --mongo-uri "$MONGO_URI" \ | |
| # --mongo-database agentlightning_collection_bench | |
| # - name: Show collection benchmark summary | |
| # if: ${{ always() }} | |
| # run: | | |
| # set -euo pipefail | |
| # if [ -f "$SUMMARY_FILE" ]; then | |
| # echo "Collection benchmark summary (${{ matrix.backend.id }}):" | |
| # cat "$SUMMARY_FILE" | |
| # else | |
| # echo "Summary file not found: $SUMMARY_FILE" | |
| # fi | |
| # - name: Stop MongoDB | |
| # if: ${{ always() && matrix.backend.needs_mongo }} | |
| # run: | | |
| # set -euo pipefail | |
| # cd docker | |
| # docker compose -f compose.mongo.yml down -v || true | |
| # - name: Upload collection artifacts | |
| # if: ${{ always() }} | |
| # uses: actions/upload-artifact@v4 | |
| # with: | |
| # name: ${{ env.ARTIFACT_NAME }} | |
| # path: ${{ env.ARTIFACT_DIR }} | |
| # if-no-files-found: error |