Skip to content

Commit 6a3385b

Browse files
authored
upload nightly tps metrics to s3 (#1242)
* upload nightly tps metrics to s3 * use env vars instead of var interpolation, add is_experiment input
1 parent 89fb17f commit 6a3385b

File tree

1 file changed

+48
-4
lines changed

1 file changed

+48
-4
lines changed

.github/workflows/nightly-throughput-stress.yml

Lines changed: 48 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,6 @@ on:
44
schedule:
55
# Run at 3 AM PST (11:00 UTC) - offset from existing nightly
66
- cron: '00 11 * * *'
7-
push:
8-
branches:
9-
- add-nightly-throughput-stress-workflow
107
workflow_dispatch:
118
inputs:
129
duration:
@@ -24,15 +21,24 @@ on:
2421
required: false
2522
default: 360
2623
type: number
24+
is_experiment:
25+
description: 'Mark this run as an experiment (excluded from nightly dashboards)'
26+
required: false
27+
default: false
28+
type: boolean
2729

2830
permissions:
2931
contents: read
32+
id-token: write
3033

3134
env:
3235
# Workflow configuration
3336
TEST_DURATION: ${{ inputs.duration || vars.NIGHTLY_TEST_DURATION || '5h' }}
3437
TEST_TIMEOUT: ${{ inputs.timeout || vars.NIGHTLY_TEST_TIMEOUT || '5h30m' }}
3538

39+
# AWS S3 metrics upload ARN
40+
AWS_S3_METRICS_UPLOAD_ROLE_ARN: ${{ vars.AWS_S3_METRICS_UPLOAD_ROLE_ARN }}
41+
3642
# Logging and artifacts
3743
WORKER_LOG_DIR: /tmp/throughput-stress-logs
3844

@@ -41,6 +47,12 @@ env:
4147
OMES_REF: main
4248
RUN_ID: ${{ github.run_id }}-throughput-stress
4349

50+
# Prometheus version
51+
PROM_VERSION: "3.8.0"
52+
53+
# Language
54+
SDK_LANG: "python"
55+
4456
jobs:
4557
throughput-stress:
4658
runs-on: ubuntu-latest-4-cores
@@ -107,6 +119,13 @@ jobs:
107119
- name: Install Temporal CLI
108120
uses: temporalio/setup-temporal@v0
109121

122+
- name: Install Prometheus
123+
run: |
124+
wget -q https://github.com/prometheus/prometheus/releases/download/v${PROM_VERSION}/prometheus-${PROM_VERSION}.linux-amd64.tar.gz
125+
tar xzf prometheus-${PROM_VERSION}.linux-amd64.tar.gz
126+
sudo mv prometheus-${PROM_VERSION}.linux-amd64/prometheus /usr/local/bin/
127+
prometheus --version
128+
110129
- name: Setup log directory
111130
run: mkdir -p $WORKER_LOG_DIR
112131

@@ -133,19 +152,44 @@ jobs:
133152
# to give CI a bit more time for visibility consistency
134153
go run ./cmd run-scenario-with-worker \
135154
--scenario throughput_stress \
136-
--language python \
155+
--language $SDK_LANG \
137156
--version $(pwd)/.. \
138157
--run-id $RUN_ID \
139158
--duration $TEST_DURATION \
140159
--timeout $TEST_TIMEOUT \
141160
--max-concurrent 10 \
161+
--prom-listen-address 127.0.0.1:9091 \
162+
--worker-prom-listen-address 127.0.0.1:9092 \
163+
--prom-instance-addr 127.0.0.1:9090 \
164+
--prom-instance-config \
165+
--prom-export-worker-metrics $RUN_ID.parquet \
142166
--option internal-iterations=10 \
143167
--option continue-as-new-after-iterations=3 \
144168
--option sleep-time=1s \
145169
--option visibility-count-timeout=5m \
146170
--option min-throughput-per-hour=1000 \
147171
2>&1 | tee $WORKER_LOG_DIR/scenario.log
148172
173+
- name: Configure AWS credentials
174+
if: always()
175+
uses: aws-actions/configure-aws-credentials@v4
176+
with:
177+
role-to-assume: ${{ env.AWS_S3_METRICS_UPLOAD_ROLE_ARN }}
178+
aws-region: us-west-2
179+
180+
- name: Upload metrics to S3
181+
if: always()
182+
run: |
183+
DATE=$(date +%Y-%m-%d)
184+
IS_EXPERIMENT="false"
185+
# Set as an experiment if we are not on the main branch or input as an experiment
186+
if [[ "$GH_REF" != "refs/heads/main" || "$IS_EXPERIMENT_INPUT" == "true" ]]; then
187+
IS_EXPERIMENT="true"
188+
fi
189+
echo "Uploading metrics: is_experiment=$IS_EXPERIMENT, language=$SDK_LANG, date=$DATE"
190+
aws s3 cp omes/$RUN_ID.parquet \
191+
"s3://cloud-data-ingest-prod/github/sdk_load_test/is_experiment=$IS_EXPERIMENT/language=$SDK_LANG/date=$DATE/$RUN_ID.parquet"
192+
149193
- name: Upload logs on failure
150194
if: failure() || cancelled()
151195
uses: actions/upload-artifact@v4

0 commit comments

Comments
 (0)