Skip to content

Commit 406e275

Browse files
committed
feat: add lambda function to cleanup org runners
Add a lambda function to cleanup offline runners in a GitHub organization. Normally runners will be cleaned up automatically, but when using ephemeral runners and spot instances, the call to Github API to remove the runner may not happen, and the runner will stay in the list in offline state. This lambda function will be triggered by a CloudWatch event and will remove any organization runners that are offline, and their labels match the config.
1 parent 0999ea5 commit 406e275

File tree

9 files changed

+634
-1
lines changed

9 files changed

+634
-1
lines changed

lambdas/functions/control-plane/src/lambda.test.ts

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
import { captureLambdaHandler, logger } from '@aws-github-runner/aws-powertools-util';
22
import { Context, SQSEvent, SQSRecord } from 'aws-lambda';
33

4-
import { addMiddleware, adjustPool, scaleDownHandler, scaleUpHandler, ssmHousekeeper, jobRetryCheck } from './lambda';
4+
import { addMiddleware, adjustPool, scaleDownHandler, scaleUpHandler, ssmHousekeeper, jobRetryCheck, cleanupOrgRunnersHandler } from './lambda';
55
import { adjust } from './pool/pool';
66
import ScaleError from './scale-runners/ScaleError';
77
import { scaleDown } from './scale-runners/scale-down';
88
import { ActionRequestMessage, scaleUp } from './scale-runners/scale-up';
99
import { cleanSSMTokens } from './scale-runners/ssm-housekeeper';
1010
import { checkAndRetryJob } from './scale-runners/job-retry';
1111
import { describe, it, expect, vi, MockedFunction } from 'vitest';
12+
import { cleanupOrgRunners } from './scale-runners/cleanup-org-runners';
1213

1314
const body: ActionRequestMessage = {
1415
eventType: 'workflow_job',
@@ -61,13 +62,32 @@ const context: Context = {
6162
},
6263
};
6364

65+
<<<<<<< HEAD
6466
vi.mock('./pool/pool');
6567
vi.mock('./scale-runners/scale-down');
6668
vi.mock('./scale-runners/scale-up');
6769
vi.mock('./scale-runners/ssm-housekeeper');
6870
vi.mock('./scale-runners/job-retry');
6971
vi.mock('@aws-github-runner/aws-powertools-util');
7072
vi.mock('@aws-github-runner/aws-ssm-util');
73+
||||||| parent of b4283485 (feat: add lambda function to cleanup org runners)
74+
jest.mock('./pool/pool');
75+
jest.mock('./scale-runners/scale-down');
76+
jest.mock('./scale-runners/scale-up');
77+
jest.mock('./scale-runners/ssm-housekeeper');
78+
jest.mock('./scale-runners/job-retry');
79+
jest.mock('@aws-github-runner/aws-powertools-util');
80+
jest.mock('@aws-github-runner/aws-ssm-util');
81+
=======
82+
jest.mock('./pool/pool');
83+
jest.mock('./scale-runners/scale-down');
84+
jest.mock('./scale-runners/scale-up');
85+
jest.mock('./scale-runners/ssm-housekeeper');
86+
jest.mock('./scale-runners/job-retry');
87+
jest.mock('./scale-runners/cleanup-org-runners');
88+
jest.mock('@aws-github-runner/aws-powertools-util');
89+
jest.mock('@aws-github-runner/aws-ssm-util');
90+
>>>>>>> b4283485 (feat: add lambda function to cleanup org runners)
7191

7292
describe('Test scale up lambda wrapper.', () => {
7393
it('Do not handle multiple record sets.', async () => {
@@ -226,3 +246,26 @@ describe('Test job retry check wrapper', () => {
226246
expect(logSpyWarn).toHaveBeenCalledWith(`Error processing job retry: ${error.message}`, { error });
227247
});
228248
});
249+
250+
describe('Test cleanupOrgRunnersHandler lambda wrapper', () => {
251+
it('Cleanup without error should resolve.', async () => {
252+
const mock = mocked(cleanupOrgRunners);
253+
mock.mockImplementation(() => {
254+
return new Promise((resolve) => {
255+
resolve();
256+
});
257+
});
258+
await expect(cleanupOrgRunnersHandler({}, context)).resolves.not.toThrow();
259+
});
260+
261+
it('Cleanup with error should resolve and log error.', async () => {
262+
const logSpyError = jest.spyOn(logger, 'error');
263+
264+
const mock = mocked(cleanupOrgRunners);
265+
const error = new Error('Error cleaning up org runners.');
266+
mock.mockRejectedValue(error);
267+
268+
await expect(cleanupOrgRunnersHandler({}, context)).resolves.not.toThrow();
269+
expect(logSpyError).toHaveBeenCalledWith(expect.stringContaining(error.message), expect.anything());
270+
});
271+
});

lambdas/functions/control-plane/src/lambda.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import { scaleDown } from './scale-runners/scale-down';
99
import { scaleUp } from './scale-runners/scale-up';
1010
import { SSMCleanupOptions, cleanSSMTokens } from './scale-runners/ssm-housekeeper';
1111
import { checkAndRetryJob } from './scale-runners/job-retry';
12+
import { cleanupOrgRunners } from './scale-runners/cleanup-org-runners';
1213

1314
export async function scaleUpHandler(event: SQSEvent, context: Context): Promise<void> {
1415
setContext(context, 'lambda.ts');
@@ -64,6 +65,8 @@ export const addMiddleware = () => {
6465
middy(scaleDownHandler).use(handler);
6566
middy(adjustPool).use(handler);
6667
middy(ssmHousekeeper).use(handler);
68+
middy(jobRetryCheck).use(handler);
69+
middy(cleanupOrgRunnersHandler).use(handler);
6770
};
6871
addMiddleware();
6972

@@ -91,3 +94,14 @@ export async function jobRetryCheck(event: SQSEvent, context: Context): Promise<
9194
}
9295
return Promise.resolve();
9396
}
97+
98+
export async function cleanupOrgRunnersHandler(event: unknown, context: Context): Promise<void> {
99+
setContext(context, 'lambda.ts');
100+
logger.logEventIfEnabled(event);
101+
102+
try {
103+
await cleanupOrgRunners();
104+
} catch (e) {
105+
logger.error(`${(e as Error).message}`, { error: e as Error });
106+
}
107+
}

0 commit comments

Comments
 (0)