Skip to content

Commit 2b66760

Browse files
authored
Fix mobile job rate limit failures (#5770)
It looks like AWS imposes a rate limit somewhere on the number of requests we can submit to them. So, jobs are failing flakily from time to time, i.e. https://github.com/pytorch/executorch/actions/runs/11352715938/attempts/1. Also, iOS job seems to suffer more, so maybe AWS has different rate limit for different devices? https://github.com/pytorch/executorch/actions/runs/11357190872/job/31590285863 Let's just slow down a bit here, and also support retry.
1 parent a43a148 commit 2b66760

File tree

1 file changed

+62
-32
lines changed

1 file changed

+62
-32
lines changed

.github/workflows/mobile_job.yml

Lines changed: 62 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -270,11 +270,25 @@ jobs:
270270
working-directory: test-infra
271271
github-token: ${{ secrets.GITHUB_TOKEN }}
272272

273+
- name: Slow down the incoming requests to mitigate AWS rate limit
274+
id: randomize-retry
275+
shell: bash
276+
continue-on-error: true
277+
env:
278+
MAX_WAIT_TIME_IN_SECONDS: 120
279+
run: |
280+
set -ex
281+
282+
# NB: AWS imposes a rate limit somewhere on the number of requests
283+
# we can submit to them. Let's just slow down a bit here
284+
WAIT_TIME_IN_SECONDS=$((RANDOM % MAX_WAIT_TIME_IN_SECONDS))
285+
echo "WAIT_TIME_IN_SECONDS=${WAIT_TIME_IN_SECONDS}" >> "${GITHUB_ENV}"
286+
287+
sleep "${WAIT_TIME_IN_SECONDS}"
288+
273289
- name: Run iOS tests on devices
274290
id: ios-test
275291
if: ${{ inputs.device-type == 'ios' }}
276-
shell: bash
277-
working-directory: test-infra/tools/device-farm-runner
278292
env:
279293
PROJECT_ARN: ${{ inputs.project-arn }}
280294
DEVICE_POOL_ARN: ${{ inputs.device-pool-arn }}
@@ -288,20 +302,29 @@ jobs:
288302
RUN_ID: ${{ github.run_id }}
289303
RUN_ATTEMPT: ${{ github.run_attempt }}
290304
JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
291-
run: |
292-
set -ex
293-
294-
${CONDA_RUN} python run_on_aws_devicefarm.py \
295-
--project-arn "${PROJECT_ARN}" \
296-
--device-pool-arn "${DEVICE_POOL_ARN}" \
297-
--app "${IPA_ARCHIVE}" \
298-
--ios-xctestrun "${XCTESTRUN_ZIP}" \
299-
--extra-data "${EXTRA_DATA}" \
300-
--test-spec "${TEST_SPEC}" \
301-
--name-prefix "${JOB_NAME}-${DEVICE_TYPE}" \
302-
--workflow-id "${RUN_ID}" \
303-
--workflow-attempt "${RUN_ATTEMPT}" \
304-
--output "ios-artifacts-${JOB_ID}.json"
305+
WORKING_DIRECTORY: test-infra/tools/device-farm-runner
306+
uses: nick-fields/[email protected]
307+
with:
308+
shell: bash
309+
timeout_minutes: ${{ inputs.timeout }}
310+
max_attempts: 3
311+
retry_wait_seconds: ${{ env.WAIT_TIME_IN_SECONDS || 120 }}
312+
command: |
313+
set -ex
314+
315+
pushd "${WORKING_DIRECTORY}"
316+
${CONDA_RUN} python run_on_aws_devicefarm.py \
317+
--project-arn "${PROJECT_ARN}" \
318+
--device-pool-arn "${DEVICE_POOL_ARN}" \
319+
--app "${IPA_ARCHIVE}" \
320+
--ios-xctestrun "${XCTESTRUN_ZIP}" \
321+
--extra-data "${EXTRA_DATA}" \
322+
--test-spec "${TEST_SPEC}" \
323+
--name-prefix "${JOB_NAME}-${DEVICE_TYPE}" \
324+
--workflow-id "${RUN_ID}" \
325+
--workflow-attempt "${RUN_ATTEMPT}" \
326+
--output "ios-artifacts-${JOB_ID}.json"
327+
popd
305328
306329
- name: Upload iOS artifacts to S3
307330
uses: seemethere/upload-artifact-s3@v5
@@ -317,8 +340,6 @@ jobs:
317340
- name: Run Android tests on devices
318341
id: android-test
319342
if: ${{ inputs.device-type == 'android' }}
320-
shell: bash
321-
working-directory: test-infra/tools/device-farm-runner
322343
env:
323344
PROJECT_ARN: ${{ inputs.project-arn }}
324345
DEVICE_POOL_ARN: ${{ inputs.device-pool-arn }}
@@ -332,20 +353,29 @@ jobs:
332353
RUN_ID: ${{ github.run_id }}
333354
RUN_ATTEMPT: ${{ github.run_attempt }}
334355
JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
335-
run: |
336-
set -ex
337-
338-
${CONDA_RUN} python run_on_aws_devicefarm.py \
339-
--project-arn "${PROJECT_ARN}" \
340-
--device-pool-arn "${DEVICE_POOL_ARN}" \
341-
--app "${APP_ARCHIVE}" \
342-
--android-instrumentation-test "${TEST_ARCHIVE}" \
343-
--extra-data "${EXTRA_DATA}" \
344-
--test-spec "${TEST_SPEC}" \
345-
--name-prefix "${JOB_NAME}-${DEVICE_TYPE}" \
346-
--workflow-id "${RUN_ID}" \
347-
--workflow-attempt "${RUN_ATTEMPT}" \
348-
--output "android-artifacts-${JOB_ID}.json"
356+
WORKING_DIRECTORY: test-infra/tools/device-farm-runner
357+
uses: nick-fields/[email protected]
358+
with:
359+
shell: bash
360+
timeout_minutes: ${{ inputs.timeout }}
361+
max_attempts: 3
362+
retry_wait_seconds: ${{ env.WAIT_TIME_IN_SECONDS || 120 }}
363+
command: |
364+
set -ex
365+
366+
pushd "${WORKING_DIRECTORY}"
367+
${CONDA_RUN} python run_on_aws_devicefarm.py \
368+
--project-arn "${PROJECT_ARN}" \
369+
--device-pool-arn "${DEVICE_POOL_ARN}" \
370+
--app "${APP_ARCHIVE}" \
371+
--android-instrumentation-test "${TEST_ARCHIVE}" \
372+
--extra-data "${EXTRA_DATA}" \
373+
--test-spec "${TEST_SPEC}" \
374+
--name-prefix "${JOB_NAME}-${DEVICE_TYPE}" \
375+
--workflow-id "${RUN_ID}" \
376+
--workflow-attempt "${RUN_ATTEMPT}" \
377+
--output "android-artifacts-${JOB_ID}.json"
378+
popd
349379
350380
- name: Upload Android artifacts to S3
351381
uses: seemethere/upload-artifact-s3@v5

0 commit comments

Comments
 (0)