Skip to content

Commit 48f0795

Browse files
committed
add nightly throughput stress
1 parent 4fe685a commit 48f0795

File tree

1 file changed

+181
-0
lines changed

1 file changed

+181
-0
lines changed
Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
name: Nightly Throughput Stress
2+
3+
on:
4+
schedule:
5+
# Run at 3 AM PST (11:00 UTC) - offset from existing nightly
6+
- cron: '00 11 * * *'
7+
workflow_dispatch:
8+
inputs:
9+
duration:
10+
description: 'Test duration (e.g., 6h, 1h)'
11+
required: false
12+
default: '6h'
13+
type: string
14+
timeout:
15+
description: 'Scenario timeout (should always be 30m more than duration)'
16+
required: false
17+
default: '6h30m'
18+
type: string
19+
job_timeout_minutes:
20+
description: 'GitHub Actions job timeout in minutes'
21+
required: false
22+
default: 420
23+
type: number
24+
25+
env:
26+
# Workflow configuration
27+
TEST_DURATION: ${{ inputs.duration || vars.NIGHTLY_TEST_DURATION || '6h' }}
28+
TEST_TIMEOUT: ${{ inputs.timeout || vars.NIGHTLY_TEST_TIMEOUT || '6h30m' }}
29+
30+
# Logging and artifacts
31+
WORKER_LOG_DIR: /tmp/throughput-stress-logs
32+
33+
# Omes configuration
34+
OMES_REPO: temporalio/omes
35+
OMES_REF: main
36+
RUN_ID: ${{ github.run_id }}-throughput-stress
37+
38+
jobs:
39+
throughput-stress:
40+
runs-on: ubuntu-latest-4-cores
41+
timeout-minutes: ${{ inputs.job_timeout_minutes || vars.NIGHTLY_JOB_TIMEOUT_MINUTES || 420 }}
42+
43+
steps:
44+
- name: Print test configuration
45+
run: |
46+
echo "=== Throughput Stress Test Configuration ==="
47+
echo "Duration: $TEST_DURATION"
48+
echo "Timeout: $TEST_TIMEOUT"
49+
echo "Run ID: $RUN_ID"
50+
echo "=========================================="
51+
52+
- name: Checkout SDK
53+
uses: actions/checkout@v4
54+
with:
55+
submodules: recursive
56+
57+
- name: Checkout OMES
58+
uses: actions/checkout@v4
59+
with:
60+
repository: ${{ env.OMES_REPO }}
61+
ref: ${{ env.OMES_REF }}
62+
path: omes
63+
64+
- name: Setup Go
65+
uses: actions/setup-go@v5
66+
with:
67+
go-version-file: omes/go.mod
68+
cache-dependency-path: omes/go.sum
69+
70+
- name: Setup Rust
71+
uses: dtolnay/rust-toolchain@stable
72+
73+
- name: Setup Rust cache
74+
uses: Swatinem/rust-cache@v2
75+
with:
76+
workspaces: temporalio/bridge -> target
77+
78+
- name: Setup Python
79+
uses: actions/setup-python@v5
80+
with:
81+
python-version: "3.13"
82+
83+
- name: Install protoc
84+
uses: arduino/setup-protoc@v3
85+
with:
86+
version: '23.x'
87+
repo-token: ${{ secrets.GITHUB_TOKEN }}
88+
89+
- name: Setup uv
90+
uses: astral-sh/setup-uv@v5
91+
92+
- name: Install poethepoet
93+
run: uv tool install poethepoet
94+
95+
- name: Install dependencies
96+
run: uv sync --all-extras
97+
98+
- name: Build SDK
99+
run: poe build-develop
100+
101+
- name: Install Temporal CLI
102+
uses: temporalio/setup-temporal@v0
103+
104+
- name: Setup log directory
105+
run: mkdir -p $WORKER_LOG_DIR
106+
107+
- name: Start Temporal Server
108+
run: |
109+
temporal server start-dev \
110+
--db-filename temporal-throughput-stress.sqlite \
111+
--sqlite-pragma journal_mode=WAL \
112+
--sqlite-pragma synchronous=OFF \
113+
--headless &> $WORKER_LOG_DIR/temporal-server.log &
114+
115+
- name: Run throughput stress scenario with local SDK
116+
working-directory: omes
117+
run: |
118+
set +e # Don't fail immediately on error
119+
120+
# Use run-scenario-with-worker to build and run in one step
121+
# Pass the SDK directory as --version for local testing
122+
# Note: The hardcoded values below match OMES defaults, except:
123+
# - visibility-count-timeout: 5m (vs 3m default)
124+
# to give CI a bit more time for visibility consistency
125+
go run ./cmd run-scenario-with-worker \
126+
--scenario throughput_stress \
127+
--language python \
128+
--version $(pwd)/../sdk-python \
129+
--run-id $RUN_ID \
130+
--duration $TEST_DURATION \
131+
--timeout $TEST_TIMEOUT \
132+
--max-concurrent 10 \
133+
--option internal-iterations=10 \
134+
--option continue-as-new-after-iterations=3 \
135+
--option sleep-time=1s \
136+
--option visibility-count-timeout=5m \
137+
--option min-throughput-per-hour=1000 \
138+
2>&1 | tee $WORKER_LOG_DIR/scenario.log
139+
140+
SCENARIO_EXIT_CODE=$?
141+
echo "SCENARIO_EXIT_CODE=$SCENARIO_EXIT_CODE" >> $GITHUB_ENV
142+
exit $SCENARIO_EXIT_CODE
143+
144+
- name: Upload logs on failure
145+
if: failure()
146+
uses: actions/upload-artifact@v4
147+
with:
148+
name: throughput-stress-logs
149+
path: ${{ env.WORKER_LOG_DIR }}
150+
retention-days: 30
151+
152+
- name: Notify Slack on failure
153+
if: failure()
154+
uses: slackapi/slack-github-action@v2
155+
with:
156+
payload: |
157+
{
158+
"text": "Nightly Python throughput stress test failed",
159+
"blocks": [
160+
{
161+
"type": "section",
162+
"text": {
163+
"type": "mrkdwn",
164+
"text": "*Nightly Throughput Stress Failed* :x:\n\n*Duration:* ${{ env.TEST_DURATION }}\n*Run:* <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View Logs>\n*Triggered by:* ${{ github.event_name == 'schedule' && 'Scheduled' || github.actor }}"
165+
}
166+
}
167+
]
168+
}
169+
env:
170+
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_SDK_ALERTS_WEBHOOK }}
171+
172+
- name: Fail if scenario failed
173+
if: always()
174+
run: |
175+
if [ "${SCENARIO_EXIT_CODE:-1}" != "0" ]; then
176+
echo "❌ Throughput stress test failed with exit code ${SCENARIO_EXIT_CODE}"
177+
echo "Check the artifacts for detailed logs and state"
178+
exit 1
179+
else
180+
echo "✅ Throughput stress test completed successfully"
181+
fi

0 commit comments

Comments
 (0)