Skip to content

remove debug logging #8

remove debug logging

remove debug logging #8

name: Nightly Throughput Stress
on:
schedule:
# Run at 3 AM PST (11:00 UTC) - offset from existing nightly
- cron: '00 11 * * *'
push:
branches:
- nightly_tps
workflow_dispatch:
inputs:
duration:
description: 'Test duration (e.g., 6h, 1h)'
required: false
default: '5h'
type: string
timeout:
description: 'Scenario timeout (should always be 30m more than duration)'
required: false
default: '5h30m'
type: string
job_timeout_minutes:
description: 'GitHub Actions job timeout in minutes'
required: false
default: 360
type: number
env:
# Workflow configuration
TEST_DURATION: ${{ inputs.duration || vars.NIGHTLY_TEST_DURATION || '5h' }}
TEST_TIMEOUT: ${{ inputs.timeout || vars.NIGHTLY_TEST_TIMEOUT || '5h30m' }}
# Logging and artifacts
WORKER_LOG_DIR: /tmp/throughput-stress-logs
# Omes configuration
OMES_REPO: temporalio/omes
OMES_REF: main
RUN_ID: ${{ github.run_id }}-throughput-stress
jobs:
throughput-stress:
runs-on: ubuntu-latest-4-cores
timeout-minutes: ${{ fromJSON(inputs.job_timeout_minutes || vars.NIGHTLY_JOB_TIMEOUT_MINUTES || 360) }}
steps:
- name: Print test configuration
run: |
echo "=== Throughput Stress Test Configuration ==="
echo "Duration: $TEST_DURATION"
echo "Timeout: $TEST_TIMEOUT"
echo "Run ID: $RUN_ID"
echo "=========================================="
- name: Checkout SDK
uses: actions/checkout@v4
with:
submodules: recursive
- name: Checkout OMES
uses: actions/checkout@v4
with:
repository: ${{ env.OMES_REPO }}
ref: ${{ env.OMES_REF }}
path: omes
submodules: recursive
- name: Setup Go
uses: actions/setup-go@v5
with:
go-version-file: omes/go.mod
cache-dependency-path: omes/go.sum
- name: Setup Node
uses: actions/setup-node@v4
with:
node-version: 22
- name: Get NPM cache directory
id: npm-cache-dir
run: echo "dir=$(npm config get cache)" >> ${GITHUB_OUTPUT}
- name: Restore NPM cache
uses: actions/cache/restore@v4
with:
path: ${{ steps.npm-cache-dir.outputs.dir }}
key: npm-main-linux-x64-${{ hashFiles('./package-lock.json') }}
restore-keys: |
npm-main-linux-x64-
- name: Install protoc
uses: arduino/setup-protoc@v3
with:
version: '23.x'
repo-token: ${{ secrets.GITHUB_TOKEN }}
- name: Setup Rust
uses: dtolnay/rust-toolchain@stable
- name: Rust cache
uses: Swatinem/rust-cache@v2
with:
workspaces: packages/core-bridge -> target
- name: Install SDK dependencies
run: |
npm ci --ignore-scripts --verbose || \
npm ci --ignore-scripts --verbose || \
npm ci --ignore-scripts --verbose
- name: Build SDK
run: npm run build
env:
BUILD_CORE_RELEASE: true
- name: Save NPM cache
uses: actions/cache/save@v4
if: always()
with:
path: ${{ steps.npm-cache-dir.outputs.dir }}
key: npm-main-linux-x64-${{ hashFiles('./package-lock.json') }}
- name: Install Temporal CLI
uses: temporalio/setup-temporal@v0
- name: Setup log directory
run: mkdir -p $WORKER_LOG_DIR
- name: Start Temporal Server
run: |
temporal server start-dev \
--db-filename temporal-throughput-stress.sqlite \
--sqlite-pragma journal_mode=WAL \
--sqlite-pragma synchronous=OFF \
--headless &> $WORKER_LOG_DIR/temporal-server.log &
- name: Run throughput stress scenario with local SDK
working-directory: omes
run: |
# This makes the pipeline return the exit code of the first failing command
# Otherwise the output of the `tee` command will be used
# (which is troublesome when the scenario fails but the `tee` command succeeds)
set -o pipefail
# Use run-scenario-with-worker to build and run in one step
# Pass the SDK directory as --version for local testing
# Note: The hardcoded values below match OMES defaults, except:
# - visibility-count-timeout: 5m (vs 3m default)
# to give CI a bit more time for visibility consistency
go run ./cmd run-scenario-with-worker \
--scenario throughput_stress \
--language typescript \
--version $(pwd)/.. \
--run-id $RUN_ID \
--duration $TEST_DURATION \
--timeout $TEST_TIMEOUT \
--max-concurrent 10 \
--option internal-iterations=10 \
--option continue-as-new-after-iterations=3 \
--option sleep-time=1s \
--option visibility-count-timeout=5m \
--option min-throughput-per-hour=1000 \
2>&1 | tee $WORKER_LOG_DIR/scenario.log
- name: Upload logs on failure
if: failure() || cancelled()
uses: actions/upload-artifact@v4
with:
name: throughput-stress-logs
path: ${{ env.WORKER_LOG_DIR }}
retention-days: 30
- name: Notify Slack on failure
if: failure() || cancelled()
uses: slackapi/slack-github-action@v2
with:
webhook-type: incoming-webhook
payload: |
{
"text": "Nightly TypeScript throughput stress test failed",
"blocks": [
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": "*Nightly Throughput Stress Failed* :x:\n\n*Duration:* ${{ env.TEST_DURATION }}\n*Run:* <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View Logs>\n*Triggered by:* ${{ github.event_name == 'schedule' && 'Scheduled' || github.actor }}"
}
}
]
}
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_SDK_ALERTS_WEBHOOK }}
- name: Report test results
if: always()
run: |
if [ "${{ steps.*.outcome }}" = "success" ]; then
echo "✅ Throughput stress test completed successfully"
else
echo "❌ Throughput stress test failed"
echo "📦 Check the uploaded artifacts for detailed logs and state"
exit 1
fi