Skip to content

Commit 9393963

Browse files
committed
feat: add lambda function to cleanup org runners
Add a lambda function to cleanup offline runners in a GitHub organization. Normally runners will be cleaned up automatically, but when using ephemeral runners and spot instances, the call to Github API to remove the runner may not happen, and the runner will stay in the list in offline state. This lambda function will be triggered by a CloudWatch event and will remove any organization runners that are offline, and their labels match the config.
1 parent cbebee6 commit 9393963

File tree

9 files changed

+616
-1
lines changed

9 files changed

+616
-1
lines changed

lambdas/functions/control-plane/src/lambda.test.ts

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,14 @@ import { captureLambdaHandler, logger } from '@aws-github-runner/aws-powertools-
22
import { Context, SQSEvent, SQSRecord } from 'aws-lambda';
33
import { mocked } from 'jest-mock';
44

5-
import { addMiddleware, adjustPool, scaleDownHandler, scaleUpHandler, ssmHousekeeper, jobRetryCheck } from './lambda';
5+
import { addMiddleware, adjustPool, scaleDownHandler, scaleUpHandler, ssmHousekeeper, jobRetryCheck, cleanupOrgRunnersHandler } from './lambda';
66
import { adjust } from './pool/pool';
77
import ScaleError from './scale-runners/ScaleError';
88
import { scaleDown } from './scale-runners/scale-down';
99
import { ActionRequestMessage, scaleUp } from './scale-runners/scale-up';
1010
import { cleanSSMTokens } from './scale-runners/ssm-housekeeper';
1111
import { checkAndRetryJob } from './scale-runners/job-retry';
12+
import { cleanupOrgRunners } from './scale-runners/cleanup-org-runners';
1213

1314
const body: ActionRequestMessage = {
1415
eventType: 'workflow_job',
@@ -66,6 +67,7 @@ jest.mock('./scale-runners/scale-down');
6667
jest.mock('./scale-runners/scale-up');
6768
jest.mock('./scale-runners/ssm-housekeeper');
6869
jest.mock('./scale-runners/job-retry');
70+
jest.mock('./scale-runners/cleanup-org-runners');
6971
jest.mock('@aws-github-runner/aws-powertools-util');
7072
jest.mock('@aws-github-runner/aws-ssm-util');
7173

@@ -220,3 +222,26 @@ describe('Test job retry check wrapper', () => {
220222
expect(logSpyWarn).toHaveBeenCalledWith(expect.stringContaining(error.message), expect.anything());
221223
});
222224
});
225+
226+
describe('Test cleanupOrgRunnersHandler lambda wrapper', () => {
227+
it('Cleanup without error should resolve.', async () => {
228+
const mock = mocked(cleanupOrgRunners);
229+
mock.mockImplementation(() => {
230+
return new Promise((resolve) => {
231+
resolve();
232+
});
233+
});
234+
await expect(cleanupOrgRunnersHandler({}, context)).resolves.not.toThrow();
235+
});
236+
237+
it('Cleanup with error should resolve and log error.', async () => {
238+
const logSpyError = jest.spyOn(logger, 'error');
239+
240+
const mock = mocked(cleanupOrgRunners);
241+
const error = new Error('Error cleaning up org runners.');
242+
mock.mockRejectedValue(error);
243+
244+
await expect(cleanupOrgRunnersHandler({}, context)).resolves.not.toThrow();
245+
expect(logSpyError).toHaveBeenCalledWith(expect.stringContaining(error.message), expect.anything());
246+
});
247+
});

lambdas/functions/control-plane/src/lambda.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import { scaleDown } from './scale-runners/scale-down';
99
import { scaleUp } from './scale-runners/scale-up';
1010
import { SSMCleanupOptions, cleanSSMTokens } from './scale-runners/ssm-housekeeper';
1111
import { checkAndRetryJob } from './scale-runners/job-retry';
12+
import { cleanupOrgRunners } from './scale-runners/cleanup-org-runners';
1213

1314
export async function scaleUpHandler(event: SQSEvent, context: Context): Promise<void> {
1415
setContext(context, 'lambda.ts');
@@ -61,6 +62,8 @@ export const addMiddleware = () => {
6162
middy(scaleDownHandler).use(handler);
6263
middy(adjustPool).use(handler);
6364
middy(ssmHousekeeper).use(handler);
65+
middy(jobRetryCheck).use(handler);
66+
middy(cleanupOrgRunnersHandler).use(handler);
6467
};
6568
addMiddleware();
6669

@@ -87,3 +90,14 @@ export async function jobRetryCheck(event: SQSEvent, context: Context): Promise<
8790
});
8891
}
8992
}
93+
94+
export async function cleanupOrgRunnersHandler(event: unknown, context: Context): Promise<void> {
95+
setContext(context, 'lambda.ts');
96+
logger.logEventIfEnabled(event);
97+
98+
try {
99+
await cleanupOrgRunners();
100+
} catch (e) {
101+
logger.error(`${(e as Error).message}`, { error: e as Error });
102+
}
103+
}
Lines changed: 304 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,304 @@
1+
import { Octokit } from '@octokit/rest';
2+
import { cleanupOrgRunners } from './cleanup-org-runners';
3+
import * as auth from '../github/auth';
4+
import * as scaleUp from './scale-up';
5+
6+
// Mock the modules
7+
jest.mock('../github/auth');
8+
jest.mock('./scale-up');
9+
10+
describe('cleanup-org-runners', () => {
11+
// Setup environment variables
12+
const OLD_ENV = process.env;
13+
14+
// Mock functions
15+
const mockCreateGithubAppAuth = auth.createGithubAppAuth as jest.Mock;
16+
const mockCreateGithubInstallationAuth = auth.createGithubInstallationAuth as jest.Mock;
17+
const mockCreateOctokitClient = auth.createOctokitClient as jest.Mock;
18+
const mockGetGitHubEnterpriseApiUrl = scaleUp.getGitHubEnterpriseApiUrl as jest.Mock;
19+
20+
// Mock Octokit client
21+
const mockOctokit = {
22+
actions: {
23+
listSelfHostedRunnersForOrg: jest.fn(),
24+
deleteSelfHostedRunnerFromOrg: jest.fn().mockImplementation(() => Promise.resolve({ status: 204 })),
25+
},
26+
apps: {
27+
getOrgInstallation: jest.fn().mockImplementation(() => Promise.resolve({ data: { id: 12345 } })),
28+
},
29+
paginate: jest.fn().mockImplementation(async () => []),
30+
} as unknown as Octokit & {
31+
paginate: jest.Mock;
32+
actions: {
33+
deleteSelfHostedRunnerFromOrg: jest.Mock;
34+
};
35+
apps: {
36+
getOrgInstallation: jest.Mock;
37+
};
38+
};
39+
40+
beforeEach(() => {
41+
// Reset mocks
42+
jest.resetAllMocks();
43+
44+
// Setup environment
45+
process.env = { ...OLD_ENV };
46+
process.env.RUNNER_OWNER = 'test-org';
47+
process.env.RUNNER_LABELS = 'label1,label2';
48+
49+
// Setup mock returns
50+
mockGetGitHubEnterpriseApiUrl.mockReturnValue({ ghesApiUrl: undefined });
51+
mockCreateGithubAppAuth.mockResolvedValue({ token: 'mock-app-token' });
52+
mockCreateGithubInstallationAuth.mockResolvedValue({ token: 'mock-installation-token' });
53+
54+
// Fix the mock to properly return the same mockOctokit for both calls
55+
mockCreateOctokitClient.mockImplementation(() => Promise.resolve(mockOctokit));
56+
57+
// Default mock for paginate to return empty array
58+
mockOctokit.paginate.mockResolvedValue([]);
59+
mockOctokit.actions.deleteSelfHostedRunnerFromOrg.mockImplementation(() => Promise.resolve({ status: 204 }));
60+
61+
// Ensure the getOrgInstallation mock returns proper data structure
62+
mockOctokit.apps.getOrgInstallation.mockImplementation(() => Promise.resolve({ data: { id: 12345 } }));
63+
});
64+
65+
afterEach(() => {
66+
// Restore environment
67+
process.env = OLD_ENV;
68+
});
69+
70+
describe('Core functionality', () => {
71+
test('should not delete any runners when no runners exist', async () => {
72+
// Setup
73+
mockOctokit.paginate.mockResolvedValueOnce([]);
74+
75+
// Execute
76+
await cleanupOrgRunners();
77+
78+
// Verify
79+
expect(mockOctokit.paginate).toHaveBeenCalledWith(mockOctokit.actions.listSelfHostedRunnersForOrg, {
80+
org: 'test-org',
81+
per_page: 100,
82+
});
83+
expect(mockOctokit.actions.deleteSelfHostedRunnerFromOrg).not.toHaveBeenCalled();
84+
});
85+
86+
test('should delete offline runners with matching labels', async () => {
87+
// Setup
88+
const mockRunners = [
89+
{
90+
id: 1,
91+
name: 'runner-1',
92+
status: 'offline',
93+
labels: [{ name: 'label1' }, { name: 'label2' }],
94+
},
95+
{
96+
id: 2,
97+
name: 'runner-2',
98+
status: 'online',
99+
labels: [{ name: 'label1' }, { name: 'label2' }],
100+
},
101+
{
102+
id: 3,
103+
name: 'runner-3',
104+
status: 'offline',
105+
labels: [{ name: 'label3' }],
106+
},
107+
];
108+
109+
mockOctokit.paginate.mockResolvedValueOnce(mockRunners);
110+
111+
// Execute
112+
await cleanupOrgRunners();
113+
114+
// Verify
115+
expect(mockOctokit.paginate).toHaveBeenCalledWith(mockOctokit.actions.listSelfHostedRunnersForOrg, {
116+
org: 'test-org',
117+
per_page: 100,
118+
});
119+
expect(mockOctokit.actions.deleteSelfHostedRunnerFromOrg).toHaveBeenCalledTimes(1);
120+
expect(mockOctokit.actions.deleteSelfHostedRunnerFromOrg).toHaveBeenCalledWith({
121+
runner_id: 1,
122+
org: 'test-org',
123+
});
124+
});
125+
126+
test('should use GitHub Enterprise API URL when provided', async () => {
127+
// Setup
128+
const ghesApiUrl = 'https://github.enterprise.com/api/v3';
129+
mockGetGitHubEnterpriseApiUrl.mockReturnValue({ ghesApiUrl });
130+
131+
// Mock runners to prevent the map error
132+
mockOctokit.paginate.mockResolvedValue([]);
133+
134+
// Execute
135+
await cleanupOrgRunners();
136+
137+
// Verify
138+
expect(mockCreateGithubAppAuth).toHaveBeenCalledWith(undefined, ghesApiUrl);
139+
expect(mockOctokit.apps.getOrgInstallation).toHaveBeenCalledWith({ org: 'test-org' });
140+
expect(mockCreateGithubInstallationAuth).toHaveBeenCalledWith(12345, ghesApiUrl);
141+
expect(mockCreateOctokitClient).toHaveBeenCalledWith('mock-app-token', ghesApiUrl);
142+
expect(mockCreateOctokitClient).toHaveBeenCalledWith('mock-installation-token', ghesApiUrl);
143+
});
144+
145+
test('should handle pagination for large number of runners', async () => {
146+
// Setup - create a large number of runners to test pagination
147+
const mockRunners = Array(10)
148+
.fill(null)
149+
.map((_, index) => ({
150+
id: index + 1,
151+
name: `runner-${index + 1}`,
152+
status: index % 2 === 0 ? 'offline' : 'online', // Alternate offline/online
153+
labels: [{ name: 'label1' }, { name: 'label2' }],
154+
}));
155+
156+
mockOctokit.paginate.mockResolvedValueOnce(mockRunners);
157+
158+
// Execute
159+
await cleanupOrgRunners();
160+
161+
// Verify - should delete all offline runners with matching labels (5 runners)
162+
expect(mockOctokit.actions.deleteSelfHostedRunnerFromOrg).toHaveBeenCalledTimes(5);
163+
164+
// Check that only offline runners were deleted
165+
for (let i = 0; i < 10; i++) {
166+
if (i % 2 === 0) {
167+
// Offline runners
168+
expect(mockOctokit.actions.deleteSelfHostedRunnerFromOrg).toHaveBeenCalledWith({
169+
runner_id: i + 1,
170+
org: 'test-org',
171+
});
172+
}
173+
}
174+
});
175+
});
176+
177+
describe('Label handling', () => {
178+
test('should handle different label scenarios correctly', async () => {
179+
// Test cases for different label scenarios
180+
const testCases = [
181+
{
182+
name: 'empty labels env var',
183+
runnerLabels: '',
184+
runners: [
185+
{ id: 1, name: 'runner-1', status: 'offline', labels: [{ name: 'label1' }] },
186+
{ id: 2, name: 'runner-2', status: 'offline', labels: [] },
187+
],
188+
expectedDeletedIds: [1, 2], // Should delete all offline runners when no labels specified
189+
},
190+
{
191+
name: 'partial label match',
192+
runnerLabels: 'label1,label2',
193+
runners: [
194+
{ id: 1, name: 'runner-1', status: 'offline', labels: [{ name: 'label1' }] }, // Partial match
195+
{ id: 2, name: 'runner-2', status: 'offline', labels: [{ name: 'label3' }] }, // No match
196+
],
197+
expectedDeletedIds: [1], // Should delete runner with partial match
198+
},
199+
{
200+
name: 'empty runner labels',
201+
runnerLabels: 'label1,label2',
202+
runners: [
203+
{ id: 1, name: 'runner-1', status: 'offline', labels: [] }, // Empty labels
204+
],
205+
expectedDeletedIds: [1], // Based on actual behavior, it deletes runners with empty labels
206+
}
207+
];
208+
209+
for (const testCase of testCases) {
210+
// Setup
211+
jest.clearAllMocks();
212+
process.env.RUNNER_LABELS = testCase.runnerLabels;
213+
mockOctokit.paginate.mockResolvedValueOnce(testCase.runners);
214+
215+
// Execute
216+
await cleanupOrgRunners();
217+
218+
// Verify
219+
expect(mockOctokit.actions.deleteSelfHostedRunnerFromOrg).toHaveBeenCalledTimes(
220+
testCase.expectedDeletedIds.length
221+
);
222+
223+
testCase.expectedDeletedIds.forEach(id => {
224+
expect(mockOctokit.actions.deleteSelfHostedRunnerFromOrg).toHaveBeenCalledWith({
225+
runner_id: id,
226+
org: 'test-org',
227+
});
228+
});
229+
}
230+
});
231+
});
232+
233+
describe('Error handling', () => {
234+
test('should handle various API errors correctly', async () => {
235+
// Test cases for different error scenarios
236+
const testCases = [
237+
{
238+
name: 'runner listing error',
239+
mockSetup: () => {
240+
mockOctokit.paginate.mockRejectedValueOnce(new Error('API error during listing'));
241+
},
242+
expectedError: 'API error during listing',
243+
},
244+
{
245+
name: 'runner deletion error',
246+
mockSetup: () => {
247+
mockOctokit.paginate.mockResolvedValueOnce([
248+
{ id: 1, name: 'runner-1', status: 'offline', labels: [{ name: 'label1' }, { name: 'label2' }] },
249+
]);
250+
mockOctokit.actions.deleteSelfHostedRunnerFromOrg.mockRejectedValueOnce(new Error('Deletion failed'));
251+
},
252+
expectedError: 'Deletion failed',
253+
},
254+
];
255+
256+
for (const testCase of testCases) {
257+
// Setup
258+
jest.clearAllMocks();
259+
testCase.mockSetup();
260+
261+
// Execute and verify
262+
await expect(cleanupOrgRunners()).rejects.toThrow(testCase.expectedError);
263+
}
264+
});
265+
266+
test('should handle authentication and installation errors', async () => {
267+
// Test cases for auth errors
268+
const testCases = [
269+
{
270+
name: 'app auth error',
271+
mockSetup: () => {
272+
mockCreateGithubAppAuth.mockRejectedValueOnce(new Error('Authentication failed'));
273+
},
274+
expectedError: 'Authentication failed',
275+
},
276+
{
277+
name: 'installation lookup error',
278+
mockSetup: () => {
279+
mockOctokit.apps.getOrgInstallation.mockRejectedValueOnce(new Error('Installation not found'));
280+
},
281+
expectedError: 'Installation not found',
282+
},
283+
{
284+
name: 'missing environment variables',
285+
mockSetup: () => {
286+
process.env.RUNNER_OWNER = undefined as unknown as string;
287+
mockOctokit.apps.getOrgInstallation.mockRejectedValueOnce(new Error('Missing org parameter'));
288+
},
289+
expectedError: 'Missing org parameter',
290+
},
291+
];
292+
293+
for (const testCase of testCases) {
294+
// Setup
295+
jest.clearAllMocks();
296+
testCase.mockSetup();
297+
298+
// Execute and verify
299+
await expect(cleanupOrgRunners()).rejects.toThrow(testCase.expectedError);
300+
expect(mockOctokit.paginate).not.toHaveBeenCalled();
301+
}
302+
});
303+
});
304+
});

0 commit comments

Comments
 (0)