Skip to content

Commit 601e6f4

Browse files
authored
runners: Move runner removal logic up (#6930)
This is a refactor of scale-down to move the runner removal logic up to the top of the loop. This is done to avoid long wait times between determining if a runner should be removed and actually removing it. In practice we were observing wait times of up to 7 to 10 minutes. This might only actually be testable with production traffic / rate limits. --------- Signed-off-by: Eli Uriegas <[email protected]>
1 parent d9cffc8 commit 601e6f4

File tree

2 files changed

+113
-104
lines changed

2 files changed

+113
-104
lines changed

terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-down.test.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -472,7 +472,7 @@ describe('scale-down', () => {
472472
expect(mockedGetRunnerTypes).toBeCalledTimes(9);
473473
expect(mockedGetRunnerTypes).toBeCalledWith({ owner: theOrg, repo: scaleConfigRepo }, metrics);
474474

475-
expect(mockedRemoveGithubRunnerOrg).toBeCalledTimes(4);
475+
expect(mockedRemoveGithubRunnerOrg).toBeCalledTimes(5);
476476
{
477477
const { awsR, ghR } = getRunnerPair('keep-min-runners-oldest-02');
478478
expect(mockedRemoveGithubRunnerOrg).toBeCalledWith(ghR.id, awsR.org as string, metrics);
@@ -814,7 +814,7 @@ describe('scale-down', () => {
814814
expect(mockedGetRunnerTypes).toBeCalledTimes(9);
815815
expect(mockedGetRunnerTypes).toBeCalledWith(repo, metrics);
816816

817-
expect(mockedRemoveGithubRunnerRepo).toBeCalledTimes(4);
817+
expect(mockedRemoveGithubRunnerRepo).toBeCalledTimes(5);
818818
{
819819
const { ghR } = getRunnerPair('keep-min-runners-oldest-02');
820820
expect(mockedRemoveGithubRunnerRepo).toBeCalledWith(ghR.id, repo, metrics);

terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts

Lines changed: 111 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,7 @@ export async function scaleDown(): Promise<void> {
5959
for (const [runnerType, runners] of shuffleArrayInPlace(Array.from(runnersDict.entries()))) {
6060
if (runners.length < 1 || runners[0].runnerType === undefined || runnerType === undefined) continue;
6161

62-
const ghRunnersRemovableWGHRunner: Array<[RunnerInfo, GhRunner]> = [];
63-
const ghRunnersRemovableNoGHRunner: Array<[RunnerInfo, GhRunner | undefined]> = [];
62+
let removedRunners = 0;
6463

6564
for (const ec2runner of runners) {
6665
// REPO assigned runners
@@ -71,10 +70,12 @@ export async function scaleDown(): Promise<void> {
7170
if (!Config.Instance.enableOrganizationRunners) {
7271
metrics.runnerFound(ec2runner);
7372
if (isRunnerRemovable(ghRunner, ec2runner, metrics)) {
74-
if (ghRunner === undefined) {
75-
ghRunnersRemovableNoGHRunner.push([ec2runner, undefined]);
76-
} else {
77-
ghRunnersRemovableWGHRunner.push([ec2runner, ghRunner]);
73+
// Process removal immediately instead of adding to array
74+
if (await shouldSkipRemoval(ghRunner, ec2runner, removedRunners, runners.length, metrics)) {
75+
continue;
76+
}
77+
if (await removeRunner(ec2runner, ghRunner, metrics)) {
78+
removedRunners += 1;
7879
}
7980
}
8081
}
@@ -86,10 +87,12 @@ export async function scaleDown(): Promise<void> {
8687
if (Config.Instance.enableOrganizationRunners) {
8788
metrics.runnerFound(ec2runner);
8889
if (isRunnerRemovable(ghRunner, ec2runner, metrics)) {
89-
if (ghRunner === undefined) {
90-
ghRunnersRemovableNoGHRunner.push([ec2runner, undefined]);
91-
} else {
92-
ghRunnersRemovableWGHRunner.push([ec2runner, ghRunner]);
90+
// Process removal immediately instead of adding to array
91+
if (await shouldSkipRemoval(ghRunner, ec2runner, removedRunners, runners.length, metrics)) {
92+
continue;
93+
}
94+
if (await removeRunner(ec2runner, ghRunner, metrics)) {
95+
removedRunners += 1;
9396
}
9497
}
9598
}
@@ -99,98 +102,6 @@ export async function scaleDown(): Promise<void> {
99102
metrics.runnerFound(ec2runner);
100103
}
101104
}
102-
103-
const ghRunnersRemovable: Array<[RunnerInfo, GhRunner | undefined]> =
104-
ghRunnersRemovableNoGHRunner.concat(ghRunnersRemovableWGHRunner);
105-
106-
let removedRunners = 0;
107-
for (const [ec2runner, ghRunner] of ghRunnersRemovable) {
108-
// We only limit the number of removed instances here for the reason: while sorting and getting info
109-
// on getRunner[Org|Repo] we send statistics that are relevant for monitoring
110-
if (
111-
ghRunnersRemovable.length - removedRunners <= (await minRunners(ec2runner, metrics)) &&
112-
ghRunner !== undefined &&
113-
ec2runner.applicationDeployDatetime == Config.Instance.datetimeDeploy
114-
) {
115-
continue;
116-
}
117-
118-
let shouldRemoveEC2 = true;
119-
if (ghRunner !== undefined) {
120-
if (Config.Instance.enableOrganizationRunners) {
121-
console.debug(
122-
`GH Runner instance '${ghRunner.id}'[${ec2runner.org}] for EC2 '${ec2runner.instanceId}' ` +
123-
`[${ec2runner.runnerType}] will be removed.`,
124-
);
125-
try {
126-
await removeGithubRunnerOrg(ghRunner.id, ec2runner.org as string, metrics);
127-
metrics.runnerGhTerminateSuccessOrg(ec2runner.org as string, ec2runner);
128-
console.info(
129-
`GH Runner instance '${ghRunner.id}'[${ec2runner.org}] for EC2 '${ec2runner.instanceId}' ` +
130-
`[${ec2runner.runnerType}] successfuly removed.`,
131-
);
132-
} catch (e) {
133-
/* istanbul ignore next */
134-
console.warn(
135-
`GH Runner instance '${ghRunner.id}'[${ec2runner.org}] for EC2 '${ec2runner.instanceId}' ` +
136-
`[${ec2runner.runnerType}] failed to be removed. ${e}`,
137-
);
138-
/* istanbul ignore next */
139-
metrics.runnerGhTerminateFailureOrg(ec2runner.org as string, ec2runner);
140-
/* istanbul ignore next */
141-
shouldRemoveEC2 = false;
142-
}
143-
} else {
144-
const repo = getRepo(ec2runner.repo as string);
145-
console.debug(
146-
`GH Runner instance '${ghRunner.id}'[${ec2runner.repo}] for EC2 '${ec2runner.instanceId}' ` +
147-
`[${ec2runner.runnerType}] will be removed.`,
148-
);
149-
try {
150-
await removeGithubRunnerRepo(ghRunner.id, repo, metrics);
151-
metrics.runnerGhTerminateSuccessRepo(repo, ec2runner);
152-
console.info(
153-
`GH Runner instance '${ghRunner.id}'[${ec2runner.repo}] for EC2 '${ec2runner.instanceId}' ` +
154-
`[${ec2runner.runnerType}] successfuly removed.`,
155-
);
156-
} catch (e) {
157-
/* istanbul ignore next */
158-
console.warn(
159-
`GH Runner instance '${ghRunner.id}'[${ec2runner.repo}] for EC2 '${ec2runner.instanceId}' ` +
160-
`[${ec2runner.runnerType}] failed to be removed. ${e}`,
161-
);
162-
/* istanbul ignore next */
163-
metrics.runnerGhTerminateFailureRepo(repo, ec2runner);
164-
/* istanbul ignore next */
165-
shouldRemoveEC2 = false;
166-
}
167-
}
168-
} else {
169-
if (Config.Instance.enableOrganizationRunners) {
170-
metrics.runnerGhTerminateNotFoundOrg(ec2runner.org as string, ec2runner);
171-
} else {
172-
metrics.runnerGhTerminateFailureRepo(getRepo(ec2runner.repo as string), ec2runner);
173-
}
174-
}
175-
176-
if (shouldRemoveEC2) {
177-
removedRunners += 1;
178-
179-
console.info(`Runner '${ec2runner.instanceId}' [${ec2runner.runnerType}] will be removed.`);
180-
try {
181-
await terminateRunner(ec2runner, metrics);
182-
metrics.runnerTerminateSuccess(ec2runner);
183-
} catch (e) {
184-
/* istanbul ignore next */
185-
metrics.runnerTerminateFailure(ec2runner);
186-
/* istanbul ignore next */
187-
console.error(`Runner '${ec2runner.instanceId}' [${ec2runner.runnerType}] cannot be removed: ${e}`);
188-
}
189-
} else {
190-
/* istanbul ignore next */
191-
metrics.runnerTerminateSkipped(ec2runner);
192-
}
193-
}
194105
}
195106

196107
if (Config.Instance.enableOrganizationRunners) {
@@ -525,3 +436,101 @@ export function sortSSMParametersByUpdateTime(ssmParams: Array<SSM.ParameterMeta
525436
return 0;
526437
});
527438
}
439+
440+
async function shouldSkipRemoval(
441+
ghRunner: GhRunner | undefined,
442+
ec2runner: RunnerInfo,
443+
removedRunners: number,
444+
totalRunners: number,
445+
metrics: ScaleDownMetrics,
446+
): Promise<boolean> {
447+
return (
448+
totalRunners - removedRunners <= (await minRunners(ec2runner, metrics)) &&
449+
ghRunner !== undefined &&
450+
ec2runner.applicationDeployDatetime == Config.Instance.datetimeDeploy
451+
);
452+
}
453+
454+
async function removeRunner(
455+
ec2runner: RunnerInfo,
456+
ghRunner: GhRunner | undefined,
457+
metrics: ScaleDownMetrics,
458+
): Promise<boolean> {
459+
let shouldRemoveEC2 = true;
460+
461+
if (ghRunner !== undefined) {
462+
if (Config.Instance.enableOrganizationRunners) {
463+
console.debug(
464+
`GH Runner instance '${ghRunner.id}'[${ec2runner.org}] for EC2 '${ec2runner.instanceId}' ` +
465+
`[${ec2runner.runnerType}] will be removed.`,
466+
);
467+
try {
468+
await removeGithubRunnerOrg(ghRunner.id, ec2runner.org as string, metrics);
469+
metrics.runnerGhTerminateSuccessOrg(ec2runner.org as string, ec2runner);
470+
console.info(
471+
`GH Runner instance '${ghRunner.id}'[${ec2runner.org}] for EC2 '${ec2runner.instanceId}' ` +
472+
`[${ec2runner.runnerType}] successfuly removed.`,
473+
);
474+
} catch (e) {
475+
/* istanbul ignore next */
476+
console.warn(
477+
`GH Runner instance '${ghRunner.id}'[${ec2runner.org}] for EC2 '${ec2runner.instanceId}' ` +
478+
`[${ec2runner.runnerType}] failed to be removed. ${e}`,
479+
);
480+
/* istanbul ignore next */
481+
metrics.runnerGhTerminateFailureOrg(ec2runner.org as string, ec2runner);
482+
/* istanbul ignore next */
483+
shouldRemoveEC2 = false;
484+
}
485+
} else {
486+
const repo = getRepo(ec2runner.repo as string);
487+
console.debug(
488+
`GH Runner instance '${ghRunner.id}'[${ec2runner.repo}] for EC2 '${ec2runner.instanceId}' ` +
489+
`[${ec2runner.runnerType}] will be removed.`,
490+
);
491+
try {
492+
await removeGithubRunnerRepo(ghRunner.id, repo, metrics);
493+
metrics.runnerGhTerminateSuccessRepo(repo, ec2runner);
494+
console.info(
495+
`GH Runner instance '${ghRunner.id}'[${ec2runner.repo}] for EC2 '${ec2runner.instanceId}' ` +
496+
`[${ec2runner.runnerType}] successfuly removed.`,
497+
);
498+
} catch (e) {
499+
/* istanbul ignore next */
500+
console.warn(
501+
`GH Runner instance '${ghRunner.id}'[${ec2runner.repo}] for EC2 '${ec2runner.instanceId}' ` +
502+
`[${ec2runner.runnerType}] failed to be removed. ${e}`,
503+
);
504+
/* istanbul ignore next */
505+
metrics.runnerGhTerminateFailureRepo(repo, ec2runner);
506+
/* istanbul ignore next */
507+
shouldRemoveEC2 = false;
508+
}
509+
}
510+
} else {
511+
if (Config.Instance.enableOrganizationRunners) {
512+
metrics.runnerGhTerminateNotFoundOrg(ec2runner.org as string, ec2runner);
513+
} else {
514+
metrics.runnerGhTerminateFailureRepo(getRepo(ec2runner.repo as string), ec2runner);
515+
}
516+
}
517+
518+
if (shouldRemoveEC2) {
519+
console.info(`Runner '${ec2runner.instanceId}' [${ec2runner.runnerType}] will be removed.`);
520+
try {
521+
await terminateRunner(ec2runner, metrics);
522+
metrics.runnerTerminateSuccess(ec2runner);
523+
return true;
524+
} catch (e) {
525+
/* istanbul ignore next */
526+
metrics.runnerTerminateFailure(ec2runner);
527+
/* istanbul ignore next */
528+
console.error(`Runner '${ec2runner.instanceId}' [${ec2runner.runnerType}] cannot be removed: ${e}`);
529+
}
530+
} else {
531+
/* istanbul ignore next */
532+
metrics.runnerTerminateSkipped(ec2runner);
533+
}
534+
535+
return false;
536+
}

0 commit comments

Comments
 (0)