fix exit code and slack notification steps #1
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Nightly Throughput Stress | |
| on: | |
| schedule: | |
| # Run at 3 AM PST (11:00 UTC) - offset from existing nightly | |
| - cron: '00 11 * * *' | |
| push: | |
| branches: | |
| - nightly_tps | |
| workflow_dispatch: | |
| inputs: | |
| duration: | |
| description: 'Test duration (e.g., 6h, 1h)' | |
| required: false | |
| default: '6h' | |
| type: string | |
| timeout: | |
| description: 'Scenario timeout (should always be 30m more than duration)' | |
| required: false | |
| default: '6h30m' | |
| type: string | |
| job_timeout_minutes: | |
| description: 'GitHub Actions job timeout in minutes' | |
| required: false | |
| default: 420 | |
| type: number | |
| env: | |
| # Workflow configuration | |
| TEST_DURATION: ${{ inputs.duration || vars.NIGHTLY_TEST_DURATION || '6h' }} | |
| TEST_TIMEOUT: ${{ inputs.timeout || vars.NIGHTLY_TEST_TIMEOUT || '6h30m' }} | |
| # Logging and artifacts | |
| WORKER_LOG_DIR: /tmp/throughput-stress-logs | |
| # Omes configuration | |
| OMES_REPO: temporalio/omes | |
| OMES_REF: main | |
| RUN_ID: ${{ github.run_id }}-throughput-stress | |
| jobs: | |
| throughput-stress: | |
| runs-on: ubuntu-latest-4-cores | |
| timeout-minutes: ${{ fromJSON(inputs.job_timeout_minutes || vars.NIGHTLY_JOB_TIMEOUT_MINUTES || 420) }} | |
| steps: | |
| - name: Print test configuration | |
| run: | | |
| echo "=== Throughput Stress Test Configuration ===" | |
| echo "Duration: $TEST_DURATION" | |
| echo "Timeout: $TEST_TIMEOUT" | |
| echo "Run ID: $RUN_ID" | |
| echo "==========================================" | |
| - name: Checkout SDK | |
| uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| - name: Checkout OMES | |
| uses: actions/checkout@v4 | |
| with: | |
| repository: ${{ env.OMES_REPO }} | |
| ref: ${{ env.OMES_REF }} | |
| path: omes | |
| - name: Setup Go | |
| uses: actions/setup-go@v5 | |
| with: | |
| go-version-file: omes/go.mod | |
| cache-dependency-path: omes/go.sum | |
| - name: Setup Node | |
| uses: actions/setup-node@v4 | |
| with: | |
| node-version: 22 | |
| - name: Get NPM cache directory | |
| id: npm-cache-dir | |
| run: echo "dir=$(npm config get cache)" >> ${GITHUB_OUTPUT} | |
| - name: Restore NPM cache | |
| uses: actions/cache/restore@v4 | |
| with: | |
| path: ${{ steps.npm-cache-dir.outputs.dir }} | |
| key: npm-main-linux-x64-${{ hashFiles('./package-lock.json') }} | |
| restore-keys: | | |
| npm-main-linux-x64- | |
| - name: Install protoc | |
| uses: arduino/setup-protoc@v3 | |
| with: | |
| version: '23.x' | |
| repo-token: ${{ secrets.GITHUB_TOKEN }} | |
| - name: Upgrade Rust | |
| uses: dtolnay/rust-toolchain@stable | |
| - name: Rust cache | |
| uses: Swatinem/rust-cache@v2 | |
| with: | |
| workspaces: packages/core-bridge -> target | |
| prefix-key: corebridge-buildcache | |
| shared-key: linux-intel | |
| env-vars: '' | |
| - name: Install SDK dependencies | |
| run: | | |
| npm ci --ignore-scripts --verbose || \ | |
| npm ci --ignore-scripts --verbose || \ | |
| npm ci --ignore-scripts --verbose | |
| - name: Build SDK | |
| run: npm run build | |
| env: | |
| BUILD_CORE_RELEASE: true | |
| - name: Save NPM cache | |
| uses: actions/cache/save@v4 | |
| if: always() | |
| with: | |
| path: ${{ steps.npm-cache-dir.outputs.dir }} | |
| key: npm-main-linux-x64-${{ hashFiles('./package-lock.json') }} | |
| - name: Install Temporal CLI | |
| uses: temporalio/setup-temporal@v0 | |
| - name: Setup log directory | |
| run: mkdir -p $WORKER_LOG_DIR | |
| - name: Start Temporal Server | |
| run: | | |
| temporal server start-dev \ | |
| --db-filename temporal-throughput-stress.sqlite \ | |
| --sqlite-pragma journal_mode=WAL \ | |
| --sqlite-pragma synchronous=OFF \ | |
| --headless &> $WORKER_LOG_DIR/temporal-server.log & | |
| - name: Run throughput stress scenario with local SDK | |
| working-directory: omes | |
| continue-on-error: true | |
| run: | | |
| # Use run-scenario-with-worker to build and run in one step | |
| # Pass the SDK directory as --version for local testing | |
| # Note: The hardcoded values below match OMES defaults, except: | |
| # - visibility-count-timeout: 5m (vs 3m default) | |
| # to give CI a bit more time for visibility consistency | |
| go run ./cmd run-scenario-with-worker \ | |
| --scenario throughput_stress \ | |
| --language typescript \ | |
| --version $(pwd)/.. \ | |
| --run-id $RUN_ID \ | |
| --duration $TEST_DURATION \ | |
| --timeout $TEST_TIMEOUT \ | |
| --max-concurrent 10 \ | |
| --option internal-iterations=10 \ | |
| --option continue-as-new-after-iterations=3 \ | |
| --option sleep-time=1s \ | |
| --option visibility-count-timeout=5m \ | |
| --option min-throughput-per-hour=1000 \ | |
| 2>&1 | tee $WORKER_LOG_DIR/scenario.log | |
| echo "SCENARIO_EXIT_CODE=${PIPESTATUS[0]}" >> $GITHUB_ENV | |
| - name: Upload logs on failure | |
| if: env.SCENARIO_EXIT_CODE != '0' | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: throughput-stress-logs | |
| path: ${{ env.WORKER_LOG_DIR }} | |
| retention-days: 30 | |
| - name: Notify Slack on failure | |
| if: env.SCENARIO_EXIT_CODE != '0' | |
| uses: slackapi/slack-github-action@v2 | |
| with: | |
| webhook-type: incoming-webhook | |
| payload: | | |
| { | |
| "text": "Nightly TypeScript throughput stress test failed", | |
| "blocks": [ | |
| { | |
| "type": "section", | |
| "text": { | |
| "type": "mrkdwn", | |
| "text": "*Nightly Throughput Stress Failed* :x:\n\n*Duration:* ${{ env.TEST_DURATION }}\n*Run:* <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View Logs>\n*Triggered by:* ${{ github.event_name == 'schedule' && 'Scheduled' || github.actor }}" | |
| } | |
| } | |
| ] | |
| } | |
| env: | |
| SLACK_WEBHOOK_URL: ${{ secrets.SLACK_SDK_ALERTS_WEBHOOK }} | |
| - name: Fail if scenario failed | |
| if: always() | |
| run: | | |
| if [ "${SCENARIO_EXIT_CODE:-1}" != "0" ]; then | |
| echo "❌ Throughput stress test failed with exit code ${SCENARIO_EXIT_CODE}" | |
| echo "Check the artifacts for detailed logs and state" | |
| exit 1 | |
| else | |
| echo "✅ Throughput stress test completed successfully" | |
| fi |