fix, properly capture scenario exit code, correct sdk path #5
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Nightly Throughput Stress | |
on: | |
schedule: | |
# Run at 3 AM PST (11:00 UTC) - offset from existing nightly | |
- cron: '00 11 * * *' | |
push: | |
branches: | |
- add-nightly-throughput-stress-workflow | |
workflow_dispatch: | |
inputs: | |
duration: | |
description: 'Test duration (e.g., 6h, 1h)' | |
required: false | |
default: '6h' | |
type: string | |
timeout: | |
description: 'Scenario timeout (should always be 30m more than duration)' | |
required: false | |
default: '6h30m' | |
type: string | |
job_timeout_minutes: | |
description: 'GitHub Actions job timeout in minutes' | |
required: false | |
default: 420 | |
type: number | |
env: | |
# Workflow configuration | |
TEST_DURATION: ${{ inputs.duration || vars.NIGHTLY_TEST_DURATION || '6h' }} | |
TEST_TIMEOUT: ${{ inputs.timeout || vars.NIGHTLY_TEST_TIMEOUT || '6h30m' }} | |
# Logging and artifacts | |
WORKER_LOG_DIR: /tmp/throughput-stress-logs | |
# Omes configuration | |
OMES_REPO: temporalio/omes | |
OMES_REF: main | |
RUN_ID: ${{ github.run_id }}-throughput-stress | |
jobs: | |
throughput-stress: | |
runs-on: ubuntu-latest-4-cores | |
timeout-minutes: ${{ fromJSON(inputs.job_timeout_minutes || vars.NIGHTLY_JOB_TIMEOUT_MINUTES || 420) }} | |
steps: | |
- name: Print test configuration | |
run: | | |
echo "=== Throughput Stress Test Configuration ===" | |
echo "Duration: $TEST_DURATION" | |
echo "Timeout: $TEST_TIMEOUT" | |
echo "Run ID: $RUN_ID" | |
echo "==========================================" | |
- name: Checkout SDK | |
uses: actions/checkout@v4 | |
with: | |
submodules: recursive | |
- name: Checkout OMES | |
uses: actions/checkout@v4 | |
with: | |
repository: ${{ env.OMES_REPO }} | |
ref: ${{ env.OMES_REF }} | |
path: omes | |
- name: Setup Go | |
uses: actions/setup-go@v5 | |
with: | |
go-version-file: omes/go.mod | |
cache-dependency-path: omes/go.sum | |
- name: Setup Rust | |
uses: dtolnay/rust-toolchain@stable | |
- name: Setup Rust cache | |
uses: Swatinem/rust-cache@v2 | |
with: | |
workspaces: temporalio/bridge -> target | |
- name: Setup Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: "3.13" | |
- name: Install protoc | |
uses: arduino/setup-protoc@v3 | |
with: | |
version: '23.x' | |
repo-token: ${{ secrets.GITHUB_TOKEN }} | |
- name: Setup uv | |
uses: astral-sh/setup-uv@v5 | |
- name: Install poethepoet | |
run: uv tool install poethepoet | |
- name: Install dependencies | |
run: uv sync --all-extras | |
- name: Build SDK | |
run: poe build-develop | |
- name: Install Temporal CLI | |
uses: temporalio/setup-temporal@v0 | |
- name: Setup log directory | |
run: mkdir -p $WORKER_LOG_DIR | |
- name: Start Temporal Server | |
run: | | |
temporal server start-dev \ | |
--db-filename temporal-throughput-stress.sqlite \ | |
--sqlite-pragma journal_mode=WAL \ | |
--sqlite-pragma synchronous=OFF \ | |
--headless &> $WORKER_LOG_DIR/temporal-server.log & | |
- name: Run throughput stress scenario with local SDK | |
working-directory: omes | |
continue-on-error: true | |
run: | | |
# Use run-scenario-with-worker to build and run in one step | |
# Pass the SDK directory as --version for local testing | |
# Note: The hardcoded values below match OMES defaults, except: | |
# - visibility-count-timeout: 5m (vs 3m default) | |
# to give CI a bit more time for visibility consistency | |
go run ./cmd run-scenario-with-worker \ | |
--scenario throughput_stress \ | |
--language python \ | |
--version $(pwd)/.. \ | |
--run-id $RUN_ID \ | |
--duration $TEST_DURATION \ | |
--timeout $TEST_TIMEOUT \ | |
--max-concurrent 10 \ | |
--option internal-iterations=10 \ | |
--option continue-as-new-after-iterations=3 \ | |
--option sleep-time=1s \ | |
--option visibility-count-timeout=5m \ | |
--option min-throughput-per-hour=1000 \ | |
2>&1 | tee $WORKER_LOG_DIR/scenario.log | |
SCENARIO_EXIT_CODE=${PIPESTATUS[0]} | |
echo "SCENARIO_EXIT_CODE=$SCENARIO_EXIT_CODE" >> $GITHUB_ENV | |
exit $SCENARIO_EXIT_CODE | |
- name: Upload logs on failure | |
if: failure() | |
uses: actions/upload-artifact@v4 | |
with: | |
name: throughput-stress-logs | |
path: ${{ env.WORKER_LOG_DIR }} | |
retention-days: 30 | |
- name: Notify Slack on failure | |
if: failure() | |
uses: slackapi/slack-github-action@v2 | |
with: | |
payload: | | |
{ | |
"text": "Nightly Python throughput stress test failed", | |
"blocks": [ | |
{ | |
"type": "section", | |
"text": { | |
"type": "mrkdwn", | |
"text": "*Nightly Throughput Stress Failed* :x:\n\n*Duration:* ${{ env.TEST_DURATION }}\n*Run:* <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View Logs>\n*Triggered by:* ${{ github.event_name == 'schedule' && 'Scheduled' || github.actor }}" | |
} | |
} | |
] | |
} | |
env: | |
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_SDK_ALERTS_WEBHOOK }} | |
- name: Fail if scenario failed | |
if: always() | |
run: | | |
if [ "${SCENARIO_EXIT_CODE:-1}" != "0" ]; then | |
echo "❌ Throughput stress test failed with exit code ${SCENARIO_EXIT_CODE}" | |
echo "Check the artifacts for detailed logs and state" | |
exit 1 | |
else | |
echo "✅ Throughput stress test completed successfully" | |
fi |