Skip to content

Commit 649fb5e

Browse files
committed
runners: Add scaleCycle lambda to reuse runners
This lambda will attempt to reuse runners that have finished jobs that are sitting idle. Plan is to have this run in AWS on a cron. The functionality within this lambda will eventually replace the tryReuseRunner function in scale-up.ts. Signed-off-by: Eli Uriegas <[email protected]> ghstack-source-id: debc427 ghstack-comment-id: 3046547816 Pull-Request: #6892 Signed-off-by: Eli Uriegas <[email protected]>
1 parent 12bc70b commit 649fb5e

File tree

4 files changed

+142
-1
lines changed

4 files changed

+142
-1
lines changed

terraform-aws-github-runner/modules/runners/lambdas/runners/src/lambda.ts

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,13 @@ import {
77
ScaleUpChronMetrics,
88
sendMetricsAtTimeout,
99
sendMetricsTimeoutVars,
10+
ScaleCycleMetrics,
1011
} from './scale-runners/metrics';
1112
import { getDelayWithJitterRetryCount, stochaticRunOvershoot } from './scale-runners/utils';
1213
import { scaleDown as scaleDownR } from './scale-runners/scale-down';
1314
import { scaleUpChron as scaleUpChronR } from './scale-runners/scale-up-chron';
1415
import { sqsSendMessages, sqsDeleteMessageBatch } from './scale-runners/sqs';
16+
import { scaleCycle as scaleCycleR } from './scale-runners/scale-cycle';
1517

1618
async function sendRetryEvents(evtFailed: Array<[SQSRecord, boolean, number]>, metrics: ScaleUpMetrics) {
1719
console.error(`Detected ${evtFailed.length} errors when processing messages, will retry relevant messages.`);
@@ -202,3 +204,38 @@ export async function scaleUpChron(event: ScheduledEvent, context: Context, call
202204
}
203205
callback(callbackOutput);
204206
}
207+
208+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
209+
export async function scaleCycle(event: ScheduledEvent, context: Context, callback: any) {
210+
// we mantain open connections to redis, so the event pool is only cleaned when the SIGTERM is sent
211+
context.callbackWaitsForEmptyEventLoop = false;
212+
213+
const metrics = new ScaleCycleMetrics();
214+
const sndMetricsTimout: sendMetricsTimeoutVars = {
215+
metrics: metrics,
216+
};
217+
sndMetricsTimout.setTimeout = setTimeout(
218+
sendMetricsAtTimeout(sndMetricsTimout),
219+
(Config.Instance.lambdaTimeout - 10) * 1000,
220+
);
221+
222+
let callbackOutput: string | null = null;
223+
224+
try {
225+
await scaleCycleR(metrics);
226+
} catch (e) {
227+
console.error(e);
228+
callbackOutput = `Failed to scale cycle: ${e}`;
229+
} finally {
230+
try {
231+
clearTimeout(sndMetricsTimout.setTimeout);
232+
sndMetricsTimout.metrics = undefined;
233+
sndMetricsTimout.setTimeout = undefined;
234+
await metrics.sendMetrics();
235+
} catch (e) {
236+
callbackOutput = `Error sending metrics: ${e}`;
237+
}
238+
}
239+
240+
callback(callbackOutput);
241+
}

terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/metrics.ts

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1813,3 +1813,30 @@ export function sendMetricsAtTimeout(metricsTimeouts: sendMetricsTimeoutVars) {
18131813
}
18141814
};
18151815
}
1816+
1817+
export class ScaleCycleMetrics extends ScaleUpMetrics {
1818+
constructor() {
1819+
super('scaleCycle');
1820+
}
1821+
1822+
scaleCycleRunnerReuseFound(runnerType: string) {
1823+
const dimensions = new Map([['RunnerType', runnerType]]);
1824+
this.countEntry('run.scaleCycle.runnerReuse.found', 1, dimensions);
1825+
}
1826+
1827+
scaleCycleRunnerReuseFoundOrg(org: string, runnerType: string) {
1828+
const dimensions = new Map([
1829+
['Org', org],
1830+
['RunnerType', runnerType],
1831+
]);
1832+
this.countEntry('run.scaleCycle.runnerReuse.found.org', 1, dimensions);
1833+
}
1834+
1835+
scaleCycleRunnerReuseFoundRepo(repo: string, runnerType: string) {
1836+
const dimensions = new Map([
1837+
['Repo', repo],
1838+
['RunnerType', runnerType],
1839+
]);
1840+
this.countEntry('run.scaleCycle.runnerReuse.found.repo', 1, dimensions);
1841+
}
1842+
}
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
import { Config } from './config';
2+
import { listRunners, RunnerInputParameters, tryReuseRunner } from './runners';
3+
import { getRepo, getRepoKey } from './utils';
4+
import { ScaleCycleMetrics } from './metrics';
5+
import { getRunnerTypes } from './gh-runners';
6+
import { createRunnerConfigArgument } from './scale-up';
7+
8+
export async function scaleCycle(metrics: ScaleCycleMetrics) {
9+
// Get runner types configuration first
10+
const scaleConfigRepo = getRepo(Config.Instance.scaleConfigOrg, Config.Instance.scaleConfigRepo);
11+
const runnerTypes = await getRunnerTypes(scaleConfigRepo, metrics);
12+
13+
// Get all valid runner type names for filtering
14+
const validRunnerTypeNames = Array.from(runnerTypes.keys());
15+
16+
// Make separate calls for each runner type to filter at EC2 level
17+
const allRunners = await Promise.all(
18+
validRunnerTypeNames.map((runnerTypeName) =>
19+
listRunners(metrics, {
20+
containsTags: ['GithubRunnerID', 'EphemeralRunnerFinished', 'RunnerType'],
21+
runnerType: runnerTypeName,
22+
}),
23+
),
24+
);
25+
26+
// Flatten the results
27+
const runners = allRunners.flat();
28+
29+
for (const runner of runners) {
30+
// Skip if required fields are missing (org/repo still need to be checked)
31+
if (!runner.runnerType || !runner.org || !runner.repo) {
32+
console.warn(`Skipping runner ${runner.instanceId} due to missing required tags`);
33+
continue;
34+
}
35+
36+
// Get the RunnerType object from the string (we know it exists since we filtered by it)
37+
const runnerType = runnerTypes.get(runner.runnerType);
38+
if (!runnerType) {
39+
console.warn(`Unknown runner type: ${runner.runnerType}, skipping`);
40+
continue;
41+
}
42+
43+
// Create repo object
44+
const repo = getRepo(runner.org, runner.repo);
45+
46+
// For each runner send an EBS volume replacement task
47+
const runnerInputParameters: RunnerInputParameters = {
48+
runnerConfig: (awsRegion: string, experimentalRunner: boolean) => {
49+
return createRunnerConfigArgument(
50+
runnerType,
51+
repo,
52+
// NOTE: installationId can actually be undefined here but this may incur lower rate limits
53+
// TODO: figure out if we need to pass an actual installationId here
54+
undefined,
55+
metrics,
56+
awsRegion,
57+
experimentalRunner,
58+
);
59+
},
60+
environment: Config.Instance.environment,
61+
runnerType: runnerType,
62+
};
63+
64+
// Set orgName or repoName based on configuration
65+
if (Config.Instance.enableOrganizationRunners) {
66+
runnerInputParameters.orgName = runner.org;
67+
metrics.scaleCycleRunnerReuseFoundOrg(runner.org, runner.runnerType);
68+
console.info(`Reusing runner ${runner.instanceId} for ${runner.org}`);
69+
} else {
70+
runnerInputParameters.repoName = getRepoKey(repo);
71+
metrics.scaleCycleRunnerReuseFoundRepo(getRepoKey(repo), runner.runnerType);
72+
console.info(`Reusing runner ${runner.instanceId} for ${getRepoKey(repo)}`);
73+
}
74+
75+
await tryReuseRunner(runnerInputParameters, metrics);
76+
}
77+
}

terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-up.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ export async function scaleUp(
163163
}
164164
}
165165

166-
async function createRunnerConfigArgument(
166+
export async function createRunnerConfigArgument(
167167
runnerType: RunnerType,
168168
repo: Repo,
169169
installationId: number | undefined,

0 commit comments

Comments
 (0)