fix(network): add active peer dialing for mesh recovery after partiti… #42
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Tendermint Chaos Testing | |
| on: | |
| # Run after Docker publish workflow completes (works after merge to main) | |
| workflow_run: | |
| workflows: ["Docker"] | |
| types: | |
| - completed | |
| branches: | |
| - feature/tendermint | |
| # TEMPORARY: Direct push trigger for feature branch (workflow_run uses main's workflow file) | |
| push: | |
| branches: | |
| - feature/tendermint | |
| schedule: | |
| # Run tier1 daily at 2 AM UTC | |
| - cron: '0 2 * * *' | |
| workflow_dispatch: | |
| inputs: | |
| scenario: | |
| description: 'Scenario to run (tier1, tier2, validator, network, timing, wal, external, liveness, all)' | |
| required: false | |
| default: 'tier1' | |
| timeout_minutes: | |
| description: 'Timeout in minutes' | |
| required: false | |
| default: '60' | |
| env: | |
| COMPOSE_FILE: docker-compose.tendermint-3node.yml | |
| jobs: | |
| chaos-test: | |
| runs-on: ubuntu-latest | |
| timeout-minutes: ${{ github.event.inputs.timeout_minutes || 60 }} | |
| # Skip if triggered by workflow_run and the workflow failed | |
| if: ${{ github.event_name != 'workflow_run' || github.event.workflow_run.conclusion == 'success' }} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| # For workflow_run, checkout the commit that triggered the Docker workflow | |
| ref: ${{ github.event.workflow_run.head_sha || github.ref }} | |
| - name: Set up Docker Buildx | |
| uses: docker/setup-buildx-action@v3 | |
| - name: Log in to GitHub Container Registry | |
| uses: docker/login-action@v3 | |
| with: | |
| registry: ghcr.io | |
| username: ${{ github.actor }} | |
| password: ${{ secrets.GITHUB_TOKEN }} | |
| - name: Install dependencies | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install -y jq bc | |
| - name: Pull Docker Image (from workflow_run) | |
| if: github.event_name == 'workflow_run' | |
| run: | | |
| docker pull ghcr.io/anduroproject/alys:feature-tendermint | |
| - name: Build Docker Image (fallback for schedule/dispatch) | |
| if: github.event_name != 'workflow_run' | |
| run: | | |
| docker build -t ghcr.io/anduroproject/alys:feature-tendermint -f etc/Dockerfile . | |
| - name: Start Tendermint Testnet | |
| working-directory: etc | |
| run: | | |
| docker compose -f ${{ env.COMPOSE_FILE }} up -d | |
| # Wait for consensus to stabilize | |
| cd chaos-testing | |
| ./wait-for-consensus.sh --timeout 180 --min-blocks 10 --verbose | |
| - name: Verify testnet health | |
| working-directory: etc | |
| run: | | |
| docker compose -f ${{ env.COMPOSE_FILE }} ps | |
| # Check all 3 validators are running | |
| RUNNING=$(docker ps --filter "name=alys-node-" --format '{{.Names}}' | wc -l) | |
| if [ "$RUNNING" -lt 3 ]; then | |
| echo "ERROR: Only $RUNNING validators running, expected 3" | |
| exit 1 | |
| fi | |
| # Check consensus height | |
| HEIGHT=$(curl -s -X POST http://localhost:3001 \ | |
| -H "Content-Type: application/json" \ | |
| -d '{"jsonrpc":"2.0","method":"tendermint_consensusState","params":[],"id":1}' \ | |
| | jq -r '.result.height // 0') | |
| echo "Current consensus height: $HEIGHT" | |
| if [ "$HEIGHT" -lt 5 ]; then | |
| echo "WARNING: Consensus height is low ($HEIGHT), waiting longer..." | |
| sleep 30 | |
| fi | |
| - name: Run Chaos Tests | |
| id: chaos | |
| working-directory: etc/chaos-testing | |
| run: | | |
| mkdir -p $GITHUB_WORKSPACE/chaos-results | |
| SCENARIO="${{ github.event.inputs.scenario || 'tier1' }}" | |
| echo "Running scenario: $SCENARIO" | |
| # Run tests and capture exit code properly | |
| set +e | |
| ./tendermint-chaos.sh \ | |
| --scenario "$SCENARIO" \ | |
| --verbose \ | |
| --output-dir "$GITHUB_WORKSPACE/chaos-results" \ | |
| 2>&1 | tee "$GITHUB_WORKSPACE/chaos-results/output.log" | |
| EXIT_CODE=${PIPESTATUS[0]} | |
| set -e | |
| echo "exit_code=$EXIT_CODE" >> $GITHUB_OUTPUT | |
| - name: Collect container logs | |
| if: always() | |
| working-directory: etc | |
| run: | | |
| mkdir -p $GITHUB_WORKSPACE/chaos-results/logs | |
| for container in alys-node-1 alys-node-2 alys-node-3 execution bitcoin-core; do | |
| docker logs "$container" > "$GITHUB_WORKSPACE/chaos-results/logs/${container}.log" 2>&1 || true | |
| done | |
| - name: Generate summary | |
| if: always() | |
| run: | | |
| cd $GITHUB_WORKSPACE/chaos-results | |
| # Extract results from output log | |
| echo "## Tendermint Chaos Test Results" > summary.md | |
| echo "" >> summary.md | |
| echo "**Scenario:** ${{ github.event.inputs.scenario || 'tier1' }}" >> summary.md | |
| echo "**Date:** $(date -u +"%Y-%m-%d %H:%M:%S UTC")" >> summary.md | |
| echo "" >> summary.md | |
| if [ -f output.log ]; then | |
| echo "### Test Results" >> summary.md | |
| echo '```' >> summary.md | |
| grep -E "^\[(PASS|FAIL)\]" output.log >> summary.md || echo "No results found" >> summary.md | |
| echo '```' >> summary.md | |
| echo "" >> summary.md | |
| # Extract summary | |
| echo "### Summary" >> summary.md | |
| echo '```' >> summary.md | |
| tail -20 output.log | grep -E "(Total|Passed|Failed)" >> summary.md || true | |
| echo '```' >> summary.md | |
| fi | |
| # Output to GitHub Step Summary | |
| cat summary.md >> $GITHUB_STEP_SUMMARY | |
| - name: Upload test results | |
| uses: actions/upload-artifact@v4 | |
| if: always() | |
| with: | |
| name: chaos-test-results-${{ github.run_id }} | |
| path: chaos-results/ | |
| retention-days: 30 | |
| - name: Stop testnet | |
| if: always() | |
| working-directory: etc | |
| run: | | |
| docker compose -f ${{ env.COMPOSE_FILE }} down -v | |
| - name: Check for failures | |
| if: steps.chaos.outputs.exit_code != '0' | |
| run: | | |
| echo "Chaos tests failed with exit code ${{ steps.chaos.outputs.exit_code }}" | |
| exit 1 | |
| rust-chaos-tests: | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 30 | |
| if: ${{ github.event_name != 'workflow_run' || github.event.workflow_run.conclusion == 'success' }} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ github.event.workflow_run.head_sha || github.ref }} | |
| - name: Install Rust toolchain | |
| uses: dtolnay/rust-toolchain@stable | |
| - name: Cache cargo registry | |
| uses: actions/cache@v4 | |
| with: | |
| path: | | |
| ~/.cargo/registry | |
| ~/.cargo/git | |
| target | |
| key: ${{ runner.os }}-cargo-chaos-${{ hashFiles('**/Cargo.lock') }} | |
| - name: Run Tendermint chaos unit tests | |
| working-directory: app | |
| run: | | |
| cargo test --package app --lib actors_v2::testing::chaos --no-fail-fast -- --nocapture | |
| env: | |
| RUST_BACKTRACE: 1 | |
| RUST_LOG: debug | |
| - name: Run Tendermint state machine tests | |
| working-directory: app | |
| run: | | |
| cargo test --package app --lib actors_v2::chain::tendermint --no-fail-fast -- --nocapture | |
| env: | |
| RUST_BACKTRACE: 1 |