Skip to content

Commit 3fa09d2

Browse files
committed
feat(runners): add runnerId to RunnerList and implement untag functionality for incorrectly tagged runners
1 parent 01e100f commit 3fa09d2

File tree

7 files changed

+192
-20
lines changed

7 files changed

+192
-20
lines changed

lambdas/functions/control-plane/src/aws/runners.d.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,14 @@ export interface RunnerList {
1010
repo?: string;
1111
org?: string;
1212
orphan?: boolean;
13+
runnerId?: string;
14+
}
15+
16+
export interface RunnerState {
17+
id: number;
18+
name: string;
19+
busy: boolean;
20+
status: string;
1321
}
1422

1523
export interface RunnerInfo {

lambdas/functions/control-plane/src/aws/runners.test.ts

Lines changed: 63 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import {
44
CreateFleetInstance,
55
CreateFleetResult,
66
CreateTagsCommand,
7+
DeleteTagsCommand,
78
DefaultTargetCapacityType,
89
DescribeInstancesCommand,
910
DescribeInstancesResult,
@@ -17,7 +18,7 @@ import { mockClient } from 'aws-sdk-client-mock';
1718
import 'aws-sdk-client-mock-jest/vitest';
1819

1920
import ScaleError from './../scale-runners/ScaleError';
20-
import { createRunner, listEC2Runners, tag, terminateRunner } from './runners';
21+
import { createRunner, listEC2Runners, tag, untag, terminateRunner } from './runners';
2122
import { RunnerInfo, RunnerInputParameters, RunnerType } from './runners.d';
2223
import { describe, it, expect, beforeEach, vi } from 'vitest';
2324

@@ -53,14 +54,34 @@ const mockRunningInstances: DescribeInstancesResult = {
5354
},
5455
],
5556
};
57+
const mockRunningInstancesJit: DescribeInstancesResult = {
58+
Reservations: [
59+
{
60+
Instances: [
61+
{
62+
LaunchTime: new Date('2020-10-10T14:48:00.000+09:00'),
63+
InstanceId: 'i-1234',
64+
Tags: [
65+
{ Key: 'ghr:Application', Value: 'github-action-runner' },
66+
{ Key: 'ghr:runner_name_prefix', Value: RUNNER_NAME_PREFIX },
67+
{ Key: 'ghr:created_by', Value: 'scale-up-lambda' },
68+
{ Key: 'ghr:Type', Value: 'Org' },
69+
{ Key: 'ghr:Owner', Value: 'CoderToCat' },
70+
{ Key: 'ghr:githubrunnerid', Value: '9876543210' },
71+
],
72+
},
73+
],
74+
},
75+
],
76+
};
5677

5778
describe('list instances', () => {
5879
beforeEach(() => {
5980
vi.resetModules();
6081
vi.clearAllMocks();
6182
});
6283

63-
it('returns a list of instances', async () => {
84+
it('returns a list of instances (Non JIT)', async () => {
6485
mockEC2Client.on(DescribeInstancesCommand).resolves(mockRunningInstances);
6586
const resp = await listEC2Runners();
6687
expect(resp.length).toBe(1);
@@ -73,6 +94,20 @@ describe('list instances', () => {
7394
});
7495
});
7596

97+
it('returns a list of instances (JIT)', async () => {
98+
mockEC2Client.on(DescribeInstancesCommand).resolves(mockRunningInstancesJit);
99+
const resp = await listEC2Runners();
100+
expect(resp.length).toBe(1);
101+
expect(resp).toContainEqual({
102+
instanceId: 'i-1234',
103+
launchTime: new Date('2020-10-10T14:48:00.000+09:00'),
104+
type: 'Org',
105+
owner: 'CoderToCat',
106+
orphan: false,
107+
runnerId: '9876543210',
108+
});
109+
});
110+
76111
it('check orphan tag.', async () => {
77112
const instances: DescribeInstancesResult = mockRunningInstances;
78113
instances.Reservations![0].Instances![0].Tags!.push({ Key: 'ghr:orphan', Value: 'true' });
@@ -229,11 +264,35 @@ describe('tag runner', () => {
229264
owner: 'owner-2',
230265
type: 'Repo',
231266
};
232-
await tag(runner.instanceId, [{ Key: 'ghr:orphan', Value: 'truer' }]);
267+
await tag(runner.instanceId, [{ Key: 'ghr:orphan', Value: 'true' }]);
268+
269+
expect(mockEC2Client).toHaveReceivedCommandWith(CreateTagsCommand, {
270+
Resources: [runner.instanceId],
271+
Tags: [{ Key: 'ghr:orphan', Value: 'true' }],
272+
});
273+
});
274+
});
233275

276+
describe('untag runner', () => {
277+
beforeEach(() => {
278+
vi.clearAllMocks();
279+
});
280+
it('removing extra tag', async () => {
281+
mockEC2Client.on(DeleteTagsCommand).resolves({});
282+
const runner: RunnerInfo = {
283+
instanceId: 'instance-2',
284+
owner: 'owner-2',
285+
type: 'Repo',
286+
};
287+
//await tag(runner.instanceId, [{ Key: 'ghr:orphan', Value: '' }]);
234288
expect(mockEC2Client).toHaveReceivedCommandWith(CreateTagsCommand, {
235289
Resources: [runner.instanceId],
236-
Tags: [{ Key: 'ghr:orphan', Value: 'truer' }],
290+
Tags: [{ Key: 'ghr:orphan', Value: 'true' }],
291+
});
292+
await untag(runner.instanceId, [{ Key: 'ghr:orphan', Value: 'true' }]);
293+
expect(mockEC2Client).toHaveReceivedCommandWith(DeleteTagsCommand, {
294+
Resources: [runner.instanceId],
295+
Tags: [{ Key: 'ghr:orphan', Value: 'true' }],
237296
});
238297
});
239298
});

lambdas/functions/control-plane/src/aws/runners.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import {
22
CreateFleetCommand,
33
CreateFleetResult,
44
CreateTagsCommand,
5+
DeleteTagsCommand,
56
DescribeInstancesCommand,
67
DescribeInstancesResult,
78
EC2Client,
@@ -91,6 +92,7 @@ function getRunnerInfo(runningInstances: DescribeInstancesResult) {
9192
repo: i.Tags?.find((e) => e.Key === 'ghr:Repo')?.Value as string,
9293
org: i.Tags?.find((e) => e.Key === 'ghr:Org')?.Value as string,
9394
orphan: i.Tags?.find((e) => e.Key === 'ghr:orphan')?.Value === 'true',
95+
runnerId: i.Tags?.find((e) => e.Key === 'ghr:githubrunnerid')?.Value as string,
9496
});
9597
}
9698
}
@@ -112,6 +114,12 @@ export async function tag(instanceId: string, tags: Tag[]): Promise<void> {
112114
await ec2.send(new CreateTagsCommand({ Resources: [instanceId], Tags: tags }));
113115
}
114116

117+
export async function untag(instanceId: string, tags: Tag[]): Promise<void> {
118+
logger.debug(`Untagging '${instanceId}'`, { tags });
119+
const ec2 = getTracedAWSV3Client(new EC2Client({ region: process.env.AWS_REGION }));
120+
await ec2.send(new DeleteTagsCommand({ Resources: [instanceId], Tags: tags }));
121+
}
122+
115123
function generateFleetOverrides(
116124
subnetIds: string[],
117125
instancesTypes: string[],

lambdas/functions/control-plane/src/scale-runners/scale-down.test.ts

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import nock from 'nock';
44

55
import { RunnerInfo, RunnerList } from '../aws/runners.d';
66
import * as ghAuth from '../github/auth';
7-
import { listEC2Runners, terminateRunner, tag } from './../aws/runners';
7+
import { listEC2Runners, terminateRunner, tag, untag } from './../aws/runners';
88
import { githubCache } from './cache';
99
import { newestFirstStrategy, oldestFirstStrategy, scaleDown } from './scale-down';
1010
import { describe, it, expect, beforeEach, vi } from 'vitest';
@@ -33,6 +33,7 @@ vi.mock('./../aws/runners', async (importOriginal) => {
3333
return {
3434
...actual,
3535
tag: vi.fn(),
36+
untag: vi.fn(),
3637
terminateRunner: vi.fn(),
3738
listEC2Runners: vi.fn(),
3839
};
@@ -62,6 +63,7 @@ const mockedInstallationAuth = vi.mocked(ghAuth.createGithubInstallationAuth);
6263
const mockCreateClient = vi.mocked(ghAuth.createOctokitClient);
6364
const mockListRunners = vi.mocked(listEC2Runners);
6465
const mockTagRunners = vi.mocked(tag);
66+
const mockUntagRunners = vi.mocked(untag);
6567
const mockTerminateRunners = vi.mocked(terminateRunner);
6668

6769
export interface TestData {
@@ -312,7 +314,7 @@ describe('Scale down runners', () => {
312314
checkNonTerminated(runners);
313315
});
314316

315-
it(`Should terminate orphan.`, async () => {
317+
it(`Should terminate orphan (Non JIT)`, async () => {
316318
// setup
317319
const orphanRunner = createRunnerTestData('orphan-1', type, MINIMUM_BOOT_TIME + 1, false, false, false);
318320
const idleRunner = createRunnerTestData('idle-1', type, MINIMUM_BOOT_TIME + 1, true, false, false);
@@ -334,6 +336,7 @@ describe('Scale down runners', () => {
334336
Value: 'true',
335337
},
336338
]);
339+
337340
expect(mockTagRunners).not.toHaveBeenCalledWith(idleRunner.instanceId, expect.anything());
338341

339342
// next cycle, update test data set orphan to true and terminate should be true
@@ -348,6 +351,51 @@ describe('Scale down runners', () => {
348351
checkNonTerminated(runners);
349352
});
350353

354+
it('Should test if orphaned runner untag if online and busy, else terminate (JIT)', async () => {
355+
// arrange
356+
const orphanRunner = createRunnerTestData('orphan-jit', type, MINIMUM_BOOT_TIME + 1, false, true, false, undefined, 1234567890);
357+
const runners = [orphanRunner];
358+
359+
mockGitHubRunners([]);
360+
mockAwsRunners(runners);
361+
362+
if (type === 'Repo') {
363+
mockOctokit.actions.getSelfHostedRunnerForRepo.mockResolvedValueOnce({
364+
data: { runnerId: 1234567890, name: orphanRunner.instanceId, busy: true, status: 'online' },
365+
});
366+
} else {
367+
mockOctokit.actions.getSelfHostedRunnerForOrg.mockResolvedValueOnce({
368+
data: { runnerId: 1234567890, name: orphanRunner.instanceId, busy: true, status: 'online' },
369+
});
370+
}
371+
372+
// act
373+
await scaleDown();
374+
375+
// assert
376+
expect(mockUntagRunners).toHaveBeenCalledWith(orphanRunner.instanceId, [
377+
{ Key: 'ghr:orphan', Value: 'true' },
378+
]);
379+
expect(mockTerminateRunners).not.toHaveBeenCalledWith(orphanRunner.instanceId);
380+
381+
// arrange
382+
if (type === 'Repo') {
383+
mockOctokit.actions.getSelfHostedRunnerForRepo.mockResolvedValueOnce({
384+
data: { runnerId: 1234567890, name: orphanRunner.instanceId, busy: true, status: 'offline' },
385+
});
386+
} else {
387+
mockOctokit.actions.getSelfHostedRunnerForOrg.mockResolvedValueOnce({
388+
data: { runnerId: 1234567890, name: orphanRunner.instanceId, busy: true, status: 'offline' },
389+
});
390+
}
391+
392+
// act
393+
await scaleDown();
394+
395+
// assert
396+
expect(mockTerminateRunners).toHaveBeenCalledWith(orphanRunner.instanceId);
397+
});
398+
351399
it(`Should ignore errors when termination orphan fails.`, async () => {
352400
// setup
353401
const orphanRunner = createRunnerTestData('orphan-1', type, MINIMUM_BOOT_TIME + 1, false, true, true);
@@ -625,6 +673,7 @@ function createRunnerTestData(
625673
orphan: boolean,
626674
shouldBeTerminated: boolean,
627675
owner?: string,
676+
runnerId?: number,
628677
): RunnerTestItem {
629678
return {
630679
instanceId: `i-${name}-${type.toLowerCase()}`,
@@ -638,5 +687,6 @@ function createRunnerTestData(
638687
registered,
639688
orphan,
640689
shouldBeTerminated,
690+
runnerId: runnerId !== undefined ? String(runnerId) : undefined,
641691
};
642692
}

lambdas/functions/control-plane/src/scale-runners/scale-down.ts

Lines changed: 40 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ import { createChildLogger } from '@aws-github-runner/aws-powertools-util';
33
import moment from 'moment';
44

55
import { createGithubAppAuth, createGithubInstallationAuth, createOctokitClient } from '../github/auth';
6-
import { bootTimeExceeded, listEC2Runners, tag, terminateRunner } from './../aws/runners';
7-
import { RunnerInfo, RunnerList } from './../aws/runners.d';
6+
import { bootTimeExceeded, listEC2Runners, tag, untag, terminateRunner } from './../aws/runners';
7+
import { RunnerInfo, RunnerList, RunnerState } from './../aws/runners.d';
88
import { GhRunners, githubCache } from './cache';
99
import { ScalingDownConfig, getEvictionStrategy, getIdleRunnerCount } from './scale-down-config';
1010
import { metricGitHubAppRateLimit } from '../github/rate-limit';
@@ -46,7 +46,11 @@ async function getOrCreateOctokit(runner: RunnerInfo): Promise<Octokit> {
4646
return octokit;
4747
}
4848

49-
async function getGitHubRunnerBusyState(client: Octokit, ec2runner: RunnerInfo, runnerId: number): Promise<boolean> {
49+
async function getGitHubSelfHostedRunnerState(
50+
client: Octokit,
51+
ec2runner: RunnerInfo,
52+
runnerId: number,
53+
): Promise<RunnerState> {
5054
const state =
5155
ec2runner.type === 'Org'
5256
? await client.actions.getSelfHostedRunnerForOrg({
@@ -59,11 +63,12 @@ async function getGitHubRunnerBusyState(client: Octokit, ec2runner: RunnerInfo,
5963
repo: ec2runner.owner.split('/')[1],
6064
});
6165

62-
logger.info(`Runner '${ec2runner.instanceId}' - GitHub Runner ID '${runnerId}' - Busy: ${state.data.busy}`);
63-
64-
metricGitHubAppRateLimit(state.headers);
66+
return { id: state.data.id, name: state.data.name, busy: state.data.busy, status: state.data.status };
67+
}
6568

66-
return state.data.busy;
69+
async function getGitHubRunnerBusyState(client: Octokit, ec2runner: RunnerInfo, runnerId: number): Promise<boolean> {
70+
const state = await getGitHubSelfHostedRunnerState(client, ec2runner, runnerId);
71+
return state.busy;
6772
}
6873

6974
async function listGitHubRunners(runner: RunnerInfo): Promise<GhRunners> {
@@ -205,13 +210,36 @@ async function terminateOrphan(environment: string): Promise<void> {
205210
const orphanRunners = await listEC2Runners({ environment, orphan: true });
206211

207212
for (const runner of orphanRunners) {
208-
logger.info(`Terminating orphan runner '${runner.instanceId}'`);
209-
await terminateRunner(runner.instanceId).catch((e) => {
210-
logger.error(`Failed to terminate orphan runner '${runner.instanceId}'`, { error: e });
211-
});
213+
// do we have a valid runnerId? then we are in a Jit Runner scenario else, use old method
214+
logger.info(`Runner var us '${JSON.stringify(runner)}' `);
215+
if (runner.runnerId) {
216+
logger.info(`Runner '${runner.instanceId}' is orphan, but has a runnerId.`);
217+
// build a runner instance
218+
const client = await getOrCreateOctokit(runner as RunnerInfo);
219+
const runnerId = parseInt(runner.runnerId);
220+
const ec2Instance = runner as RunnerInfo;
221+
const state = await getGitHubSelfHostedRunnerState(client, ec2Instance, runnerId);
222+
logger.info(`Runner is currently '${runner.instanceId}' state: ${JSON.stringify(state)}`);
223+
if (state.status === 'online' && state.busy) {
224+
logger.info(`Runner '${runner.instanceId}' is orphan, but is online and busy.`);
225+
await untag(runner.instanceId, [{ Key: 'ghr:orphan', Value: 'true' }]);
226+
} else if (state.status === 'offline') {
227+
logger.warn(`Runner '${runner.instanceId}' is orphan.`);
228+
logger.info(`Terminating orphan runner '${runner.instanceId}'`);
229+
await terminateRunner(runner.instanceId).catch((e) => {
230+
logger.error(`Failed to terminate orphan runner '${runner.instanceId}'`, { error: e });
231+
});
232+
}
233+
} else {
234+
logger.warn(`Runner '${runner.instanceId}' is orphan, but no runnerId found.`);
235+
logger.info(`Terminating orphan runner '${runner.instanceId}'`);
236+
await terminateRunner(runner.instanceId).catch((e) => {
237+
logger.error(`Failed to terminate orphan runner '${runner.instanceId}'`, { error: e });
238+
});
239+
}
212240
}
213241
} catch (e) {
214-
logger.warn(`Failure during orphan runner termination.`, { error: e });
242+
logger.warn(`Failure during orphan termination processing.`, { error: e });
215243
}
216244
}
217245

lambdas/functions/control-plane/src/scale-runners/scale-up.test.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ vi.mock('@octokit/rest', () => ({
4141
vi.mock('./../aws/runners', async () => ({
4242
createRunner: vi.fn(),
4343
listEC2Runners: vi.fn(),
44+
tag: vi.fn(),
4445
}));
4546

4647
vi.mock('./../github/auth', async () => ({
@@ -645,7 +646,7 @@ describe('scaleUp with public GH', () => {
645646
});
646647
});
647648

648-
it('JIT config is ingored for non-ephemeral runners.', async () => {
649+
it('JIT config is ignored for non-ephemeral runners.', async () => {
649650
process.env.ENABLE_EPHEMERAL_RUNNERS = 'false';
650651
process.env.ENABLE_JIT_CONFIG = 'true';
651652
process.env.ENABLE_JOB_QUEUED_CHECK = 'false';
@@ -1008,11 +1009,13 @@ function defaultOctokitMockImpl() {
10081009
]);
10091010
mockOctokit.actions.generateRunnerJitconfigForOrg.mockImplementation(() => ({
10101011
data: {
1012+
runner: { id: 9876543210 },
10111013
encoded_jit_config: 'TEST_JIT_CONFIG_ORG',
10121014
},
10131015
}));
10141016
mockOctokit.actions.generateRunnerJitconfigForRepo.mockImplementation(() => ({
10151017
data: {
1018+
runner: { id: 9876543210 },
10161019
encoded_jit_config: 'TEST_JIT_CONFIG_REPO',
10171020
},
10181021
}));

0 commit comments

Comments
 (0)