44 schedule :
55 # Run at 3 AM PST (11:00 UTC) - offset from existing nightly
66 - cron : ' 00 11 * * *'
7- push :
8- branches :
9- - add-nightly-throughput-stress-workflow
107 workflow_dispatch :
118 inputs :
129 duration :
2421 required : false
2522 default : 360
2623 type : number
24+ is_experiment :
25+ description : ' Mark this run as an experiment (excluded from nightly dashboards)'
26+ required : false
27+ default : false
28+ type : boolean
2729
2830permissions :
2931 contents : read
32+ id-token : write
3033
3134env :
3235 # Workflow configuration
3336 TEST_DURATION : ${{ inputs.duration || vars.NIGHTLY_TEST_DURATION || '5h' }}
3437 TEST_TIMEOUT : ${{ inputs.timeout || vars.NIGHTLY_TEST_TIMEOUT || '5h30m' }}
3538
39+ # AWS S3 metrics upload ARN
40+ AWS_S3_METRICS_UPLOAD_ROLE_ARN : ${{ vars.AWS_S3_METRICS_UPLOAD_ROLE_ARN }}
41+
3642 # Logging and artifacts
3743 WORKER_LOG_DIR : /tmp/throughput-stress-logs
3844
4147 OMES_REF : main
4248 RUN_ID : ${{ github.run_id }}-throughput-stress
4349
50+ # Prometheus version
51+ PROM_VERSION : " 3.8.0"
52+
53+ # Language
54+ SDK_LANG : " python"
55+
4456jobs :
4557 throughput-stress :
4658 runs-on : ubuntu-latest-4-cores
@@ -107,6 +119,13 @@ jobs:
107119 - name : Install Temporal CLI
108120 uses : temporalio/setup-temporal@v0
109121
122+ - name : Install Prometheus
123+ run : |
124+ wget -q https://github.com/prometheus/prometheus/releases/download/v${PROM_VERSION}/prometheus-${PROM_VERSION}.linux-amd64.tar.gz
125+ tar xzf prometheus-${PROM_VERSION}.linux-amd64.tar.gz
126+ sudo mv prometheus-${PROM_VERSION}.linux-amd64/prometheus /usr/local/bin/
127+ prometheus --version
128+
110129 - name : Setup log directory
111130 run : mkdir -p $WORKER_LOG_DIR
112131
@@ -133,19 +152,44 @@ jobs:
133152 # to give CI a bit more time for visibility consistency
134153 go run ./cmd run-scenario-with-worker \
135154 --scenario throughput_stress \
136- --language python \
155+ --language $SDK_LANG \
137156 --version $(pwd)/.. \
138157 --run-id $RUN_ID \
139158 --duration $TEST_DURATION \
140159 --timeout $TEST_TIMEOUT \
141160 --max-concurrent 10 \
161+ --prom-listen-address 127.0.0.1:9091 \
162+ --worker-prom-listen-address 127.0.0.1:9092 \
163+ --prom-instance-addr 127.0.0.1:9090 \
164+ --prom-instance-config \
165+ --prom-export-worker-metrics $RUN_ID.parquet \
142166 --option internal-iterations=10 \
143167 --option continue-as-new-after-iterations=3 \
144168 --option sleep-time=1s \
145169 --option visibility-count-timeout=5m \
146170 --option min-throughput-per-hour=1000 \
147171 2>&1 | tee $WORKER_LOG_DIR/scenario.log
148172
173+ - name : Configure AWS credentials
174+ if : always()
175+ uses : aws-actions/configure-aws-credentials@v4
176+ with :
177+ role-to-assume : ${{ env.AWS_S3_METRICS_UPLOAD_ROLE_ARN }}
178+ aws-region : us-west-2
179+
180+ - name : Upload metrics to S3
181+ if : always()
182+ run : |
183+ DATE=$(date +%Y-%m-%d)
184+ IS_EXPERIMENT="false"
185+ # Set as an experiment if we are not on the main branch or input as an experiment
186+ if [[ "$GH_REF" != "refs/heads/main" || "$IS_EXPERIMENT_INPUT" == "true" ]]; then
187+ IS_EXPERIMENT="true"
188+ fi
189+ echo "Uploading metrics: is_experiment=$IS_EXPERIMENT, language=$SDK_LANG, date=$DATE"
190+ aws s3 cp omes/$RUN_ID.parquet \
191+ "s3://cloud-data-ingest-prod/github/sdk_load_test/is_experiment=$IS_EXPERIMENT/language=$SDK_LANG/date=$DATE/$RUN_ID.parquet"
192+
149193 - name : Upload logs on failure
150194 if : failure() || cancelled()
151195 uses : actions/upload-artifact@v4
0 commit comments