Skip to content

Commit d2e4f1f

Browse files
authored
chore(smoke-tests): Retry failed jobs 3 times before failing Evergreen task COMPASS-9837 (#7309)
* Retry failed jobs * Increase waitfor timeout
1 parent c01fb29 commit d2e4f1f

File tree

2 files changed

+40
-16
lines changed

2 files changed

+40
-16
lines changed

.github/workflows/test-installers.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ jobs:
182182
# Similar to total task timeout, setting these higher than the default
183183
# value to account for very slow windows machines
184184
COMPASS_E2E_MOCHA_TIMEOUT: 720000 # 12min
185-
COMPASS_E2E_WEBDRIVER_WAITFOR_TIMEOUT: 360000 # 6min
185+
COMPASS_E2E_WEBDRIVER_WAITFOR_TIMEOUT: 600000 # 10min
186186
steps:
187187
- name: Checkout
188188
uses: actions/checkout@v2

packages/compass-smoke-tests/src/dispatch.ts

Lines changed: 39 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,13 @@ async function getWorkflowRun(
3333
async function getWorkflowRunRetrying(
3434
octokit: ReturnType<typeof github.getOctokit>,
3535
expectedRunName: string,
36+
expectedRunAttempt: number,
3637
pollDelayMs = 1000
3738
) {
3839
for (let attempt = 0; attempt < MAX_GET_LATEST_ATTEMPTS; attempt++) {
3940
const run = await getWorkflowRun(octokit, expectedRunName);
4041
debug(`Attempt %d finding run named "%s"`, attempt, expectedRunName);
41-
if (run) {
42+
if (run && run.run_attempt === expectedRunAttempt) {
4243
return run;
4344
}
4445
await new Promise((resolve) => setTimeout(resolve, pollDelayMs));
@@ -129,6 +130,11 @@ type DispatchOptions = {
129130
* Delay in milliseconds to wait between requests when polling while watching the run.
130131
*/
131132
watchPollDelayMs?: number | undefined;
133+
134+
/**
135+
* How many times should a failed job be retried.
136+
*/
137+
retries?: number | undefined;
132138
};
133139

134140
export async function dispatchAndWait({
@@ -140,6 +146,7 @@ export async function dispatchAndWait({
140146
githubPrNumber,
141147
evergreenTaskUrl,
142148
watchPollDelayMs = 5000,
149+
retries = 3,
143150
}: DispatchOptions) {
144151
const octokit = github.getOctokit(githubToken);
145152
const nonce = createNonce();
@@ -159,20 +166,37 @@ export async function dispatchAndWait({
159166
},
160167
});
161168

162-
// Find the next run we just dispatched
163-
const run = await getWorkflowRunRetrying(
164-
octokit,
165-
`Test Installers ${devVersion || ref} / (nonce = ${nonce})`
166-
);
169+
for (let attempt = 1; attempt <= retries; attempt++) {
170+
// Find the next run we just dispatched
171+
const run = await getWorkflowRunRetrying(
172+
octokit,
173+
// Matching on the run name, as defined by the workflow in `.github/workflows/test-installers.yml`
174+
`Test Installers ${devVersion || ref} / (nonce = ${nonce})`,
175+
attempt
176+
);
177+
178+
console.log(
179+
`Dispatched run #${run.run_number} (attempt ${attempt} / ${retries}) (${run.html_url})`
180+
);
181+
const status = await pollToCompletion({
182+
octokit,
183+
runId: run.id,
184+
watchTimeoutMs: WATCH_POLL_TIMEOUT_MS,
185+
watchPollDelayMs,
186+
});
167187

168-
console.log(`Dispatched run #${run.run_number} (${run.html_url})`);
169-
const status = await pollToCompletion({
170-
octokit,
171-
runId: run.id,
172-
watchTimeoutMs: WATCH_POLL_TIMEOUT_MS,
173-
watchPollDelayMs,
174-
});
188+
console.log(`Run completed (status = ${status}): ${run.html_url}`);
189+
if (status === 'success') {
190+
return;
191+
} else {
192+
console.log('Re-running failed jobs');
193+
await octokit.rest.actions.reRunWorkflowFailedJobs({
194+
owner: GITHUB_OWNER,
195+
repo: GITHUB_REPO,
196+
run_id: run.id,
197+
});
198+
}
199+
}
175200

176-
console.log(`Run completed: ${run.html_url}`);
177-
assert.equal(status, 'success', "Expected a 'success' conclusion");
201+
throw new Error('All attempts to run the workflow failed!');
178202
}

0 commit comments

Comments
 (0)