Skip to content

Commit 456e399

Browse files
authored
runners: Remove SSM deletion from terminateRunner (#6934)
This is super-ceded by our addition of an expiration policy to our SSM parameters. I also think that this was somewhat causing us to be slow. See: * #6885 --------- Signed-off-by: Eli Uriegas <[email protected]>
1 parent a8af85f commit 456e399

File tree

4 files changed

+4
-231
lines changed

4 files changed

+4
-231
lines changed

terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/runners.test.ts

Lines changed: 1 addition & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ import {
22
RunnerInputParameters,
33
createRunner,
44
findAmiID,
5-
getParameterNameForRunner,
65
listRunners,
76
listSSMParameters,
87
resetRunnersCaches,
@@ -345,21 +344,11 @@ describe('terminateRunner', () => {
345344
instanceId: 'i-1234',
346345
environment: 'gi-ci',
347346
};
348-
mockSSMdescribeParametersRet.mockResolvedValueOnce({
349-
Parameters: [getParameterNameForRunner(runner.environment as string, runner.instanceId)].map((s) => {
350-
return { Name: s };
351-
}),
352-
});
353347
await terminateRunner(runner, metrics);
354348

355349
expect(mockEC2.terminateInstances).toBeCalledWith({
356350
InstanceIds: [runner.instanceId],
357351
});
358-
expect(mockSSM.describeParameters).toBeCalledTimes(1);
359-
expect(mockSSM.deleteParameter).toBeCalledTimes(1);
360-
expect(mockSSM.deleteParameter).toBeCalledWith({
361-
Name: getParameterNameForRunner(runner.environment as string, runner.instanceId),
362-
});
363352
});
364353

365354
it('fails to terminate', async () => {
@@ -372,11 +361,9 @@ describe('terminateRunner', () => {
372361
promise: jest.fn().mockRejectedValueOnce(Error(errMsg)),
373362
});
374363
expect(terminateRunner(runner, metrics)).rejects.toThrowError(errMsg);
375-
expect(mockSSM.describeParameters).not.toBeCalled();
376-
expect(mockSSM.deleteParameter).not.toBeCalled();
377364
});
378365

379-
it('fails to list parameters on terminate, then force delete all next parameters', async () => {
366+
it('terminates multiple runners', async () => {
380367
const runner1: RunnerInfo = {
381368
awsRegion: Config.Instance.awsRegion,
382369
instanceId: '1234',
@@ -387,19 +374,10 @@ describe('terminateRunner', () => {
387374
instanceId: '1235',
388375
environment: 'environ',
389376
};
390-
mockSSMdescribeParametersRet.mockRejectedValueOnce('Some Error');
391377
await terminateRunner(runner1, metrics);
392378
await terminateRunner(runner2, metrics);
393379

394380
expect(mockEC2.terminateInstances).toBeCalledTimes(2);
395-
expect(mockSSM.describeParameters).toBeCalledTimes(1);
396-
expect(mockSSM.deleteParameter).toBeCalledTimes(2);
397-
expect(mockSSM.deleteParameter).toBeCalledWith({
398-
Name: getParameterNameForRunner(runner1.environment as string, runner1.instanceId),
399-
});
400-
expect(mockSSM.deleteParameter).toBeCalledWith({
401-
Name: getParameterNameForRunner(runner2.environment as string, runner2.instanceId),
402-
});
403381
});
404382
});
405383

terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/runners.ts

Lines changed: 1 addition & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,6 @@ export interface DescribeInstancesResultRegion {
6161
describeInstanceResult: PromiseResult<EC2.Types.DescribeInstancesResult, AWS.AWSError>;
6262
}
6363

64-
const SHOULD_NOT_TRY_LIST_SSM = 'SHOULD_NOT_TRY_LIST_SSM';
65-
6664
// Keep the cache as long as half of minimum time, this should reduce calls to AWS API
6765
const ssmParametersCache = new LRU({ maxAge: (Config.Instance.minimumRunningTimeInMinutes * 60 * 1000) / 2 });
6866

@@ -174,7 +172,7 @@ export async function listRunners(
174172
.describeInstances({ Filters: ec2Filters })
175173
.promise()
176174
.then((describeInstanceResult): DescribeInstancesResultRegion => {
177-
const listOfRunnersIdType: string[] = (
175+
(
178176
describeInstanceResult?.Reservations?.flatMap((reservation) => {
179177
return (
180178
reservation.Instances?.map((instance) => {
@@ -327,31 +325,6 @@ export async function terminateRunner(runner: RunnerInfo, metrics: Metrics): Pro
327325
);
328326
});
329327
console.info(`Runner terminated: ${runner.instanceId} ${runner.runnerType}`);
330-
331-
const paramName = getParameterNameForRunner(runner.environment || Config.Instance.environment, runner.instanceId);
332-
const cacheName = `${SHOULD_NOT_TRY_LIST_SSM}_${runner.awsRegion}`;
333-
334-
if (ssmParametersCache.has(cacheName)) {
335-
doDeleteSSMParameter(paramName, metrics, runner.awsRegion);
336-
} else {
337-
try {
338-
const params = await listSSMParameters(metrics, runner.awsRegion);
339-
340-
if (params.has(paramName)) {
341-
doDeleteSSMParameter(paramName, metrics, runner.awsRegion);
342-
} else {
343-
/* istanbul ignore next */
344-
console.info(`[${runner.awsRegion}] Parameter "${paramName}" not found in SSM, no need to delete it`);
345-
}
346-
} catch (e) {
347-
ssmParametersCache.set(cacheName, 1, 60 * 1000);
348-
console.error(
349-
`[terminateRunner - listSSMParameters] [${runner.awsRegion}] ` +
350-
`Failed to list parameters or check if available: ${e}`,
351-
);
352-
doDeleteSSMParameter(paramName, metrics, runner.awsRegion);
353-
}
354-
}
355328
} catch (e) {
356329
console.error(`[${runner.awsRegion}] [terminateRunner]: ${e}`);
357330
throw e;

terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-down.test.ts

Lines changed: 1 addition & 132 deletions
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,8 @@ import {
1717
resetGHRunnersCaches,
1818
} from './gh-runners';
1919
import * as MetricsModule from './metrics';
20+
import { listRunners, resetRunnersCaches, terminateRunner, RunnerType } from './runners';
2021
import {
21-
doDeleteSSMParameter,
22-
listRunners,
23-
resetRunnersCaches,
24-
terminateRunner,
25-
RunnerType,
26-
listSSMParameters,
27-
} from './runners';
28-
import {
29-
cleanupOldSSMParameters,
3022
getGHRunnerOrg,
3123
getGHRunnerRepo,
3224
isEphemeralRunner,
@@ -38,7 +30,6 @@ import {
3830
sortSSMParametersByUpdateTime,
3931
} from './scale-down';
4032
import { RequestError } from '@octokit/request-error';
41-
import { SSM } from 'aws-sdk';
4233

4334
jest.mock('./gh-runners', () => ({
4435
/* eslint-disable-next-line @typescript-eslint/no-explicit-any */
@@ -56,9 +47,7 @@ jest.mock('./gh-runners', () => ({
5647
jest.mock('./runners', () => ({
5748
/* eslint-disable-next-line @typescript-eslint/no-explicit-any */
5849
...(jest.requireActual('./runners') as any),
59-
doDeleteSSMParameter: jest.fn(),
6050
listRunners: jest.fn(),
61-
listSSMParameters: jest.fn(),
6251
resetRunnersCaches: jest.fn(),
6352
terminateRunner: jest.fn(),
6453
}));
@@ -79,8 +68,6 @@ const mockRunner = (runnerDef: any) => {
7968
return runnerDef as GhRunner;
8069
};
8170

82-
const MAX_SSM_PARAMETERS = 25;
83-
8471
const metrics = new MetricsModule.ScaleDownMetrics();
8572

8673
const minimumRunningTimeInMinutes = 10;
@@ -95,8 +82,6 @@ const baseConfig = {
9582
shuffledSubnetIdsForAwsRegion: jest.fn().mockImplementation((awsRegion: string) => {
9683
return Array.from(subnetIds.get(awsRegion) ?? []).sort();
9784
}),
98-
sSMParamCleanupAgeDays: 7,
99-
sSMParamMaxCleanupAllowance: MAX_SSM_PARAMETERS, // Easier to test
10085
};
10186

10287
describe('scale-down', () => {
@@ -1506,122 +1491,6 @@ describe('scale-down', () => {
15061491
});
15071492
});
15081493

1509-
describe('cleanupOldSSMParameters', () => {
1510-
it('Stops when LastModifiedDate === undefined', async () => {
1511-
const oldDt = moment()
1512-
.subtract(Config.Instance.sSMParamCleanupAgeDays + 1, 'days')
1513-
.toDate();
1514-
const ssmParameters = new Map<string, SSM.ParameterMetadata>();
1515-
ssmParameters.set('WG113', { Name: 'WG113', LastModifiedDate: undefined });
1516-
ssmParameters.set('WG115', { Name: 'WG115', LastModifiedDate: oldDt });
1517-
ssmParameters.set('WG116', { Name: 'WG116', LastModifiedDate: oldDt });
1518-
1519-
const mockedDoDeleteSSMParameter = mocked(doDeleteSSMParameter);
1520-
const mockedListSSMParameters = mocked(listSSMParameters);
1521-
1522-
mockedListSSMParameters.mockResolvedValueOnce(ssmParameters);
1523-
mockedDoDeleteSSMParameter.mockResolvedValue(true);
1524-
1525-
await cleanupOldSSMParameters(new Set(['us-east-1']), metrics);
1526-
1527-
expect(mockedDoDeleteSSMParameter).toBeCalledTimes(2);
1528-
expect(mockedDoDeleteSSMParameter).toBeCalledWith('WG115', metrics, 'us-east-1');
1529-
expect(mockedDoDeleteSSMParameter).toBeCalledWith('WG116', metrics, 'us-east-1');
1530-
expect(mockedListSSMParameters).toBeCalledTimes(1);
1531-
});
1532-
1533-
it('Stops when LastModifiedDate is < Config.Instance.sSMParamCleanupAgeDays', async () => {
1534-
const oldDt = moment()
1535-
.subtract(Config.Instance.sSMParamCleanupAgeDays + 1, 'days')
1536-
.toDate();
1537-
const olderDt = moment()
1538-
.subtract(Config.Instance.sSMParamCleanupAgeDays - 1, 'days')
1539-
.toDate();
1540-
const ssmParameters = new Map<string, SSM.ParameterMetadata>();
1541-
ssmParameters.set('WG113', { Name: 'WG113', LastModifiedDate: undefined });
1542-
ssmParameters.set('WG115', { Name: 'WG115', LastModifiedDate: oldDt });
1543-
ssmParameters.set('WG116', { Name: 'WG116', LastModifiedDate: olderDt });
1544-
1545-
const mockedDoDeleteSSMParameter = mocked(doDeleteSSMParameter);
1546-
const mockedListSSMParameters = mocked(listSSMParameters);
1547-
1548-
mockedListSSMParameters.mockResolvedValueOnce(ssmParameters);
1549-
mockedDoDeleteSSMParameter.mockResolvedValue(true);
1550-
1551-
await cleanupOldSSMParameters(new Set(['us-east-1']), metrics);
1552-
1553-
expect(mockedDoDeleteSSMParameter).toBeCalledTimes(1);
1554-
expect(mockedDoDeleteSSMParameter).toBeCalledWith('WG115', metrics, 'us-east-1');
1555-
expect(mockedListSSMParameters).toBeCalledTimes(1);
1556-
});
1557-
1558-
it('Stops when deleted >= Config.Instance.sSMParamMaxCleanupAllowance', async () => {
1559-
const oldDt = moment()
1560-
.subtract(Config.Instance.sSMParamCleanupAgeDays + 1, 'days')
1561-
.toDate();
1562-
const ssmParameters = new Map<string, SSM.ParameterMetadata>();
1563-
[...Array(MAX_SSM_PARAMETERS + 5).keys()].forEach((i) => {
1564-
const name = `AGDGADUWG113-${i}`;
1565-
ssmParameters.set(name, { Name: name, LastModifiedDate: oldDt });
1566-
});
1567-
1568-
const mockedDoDeleteSSMParameter = mocked(doDeleteSSMParameter);
1569-
const mockedListSSMParameters = mocked(listSSMParameters);
1570-
1571-
mockedListSSMParameters.mockResolvedValueOnce(ssmParameters);
1572-
mockedDoDeleteSSMParameter.mockResolvedValue(true);
1573-
1574-
await cleanupOldSSMParameters(new Set(['us-east-1']), metrics);
1575-
1576-
expect(mockedDoDeleteSSMParameter).toBeCalledTimes(MAX_SSM_PARAMETERS);
1577-
expect(mockedListSSMParameters).toBeCalledTimes(1);
1578-
});
1579-
1580-
it('Breaks when deleted >= Config.Instance.sSMParamMaxCleanupAllowance', async () => {
1581-
const oldDt = moment()
1582-
.subtract(Config.Instance.sSMParamCleanupAgeDays + 1, 'days')
1583-
.toDate();
1584-
const ssmParameters = new Map<string, SSM.ParameterMetadata>();
1585-
[...Array(MAX_SSM_PARAMETERS + 5).keys()].forEach((i) => {
1586-
const name = `AGDGADUWG113-${i}`;
1587-
ssmParameters.set(name, { Name: name, LastModifiedDate: oldDt });
1588-
});
1589-
1590-
const mockedDoDeleteSSMParameter = mocked(doDeleteSSMParameter);
1591-
const mockedListSSMParameters = mocked(listSSMParameters);
1592-
1593-
mockedListSSMParameters.mockResolvedValueOnce(ssmParameters);
1594-
mockedDoDeleteSSMParameter.mockResolvedValue(true);
1595-
1596-
await cleanupOldSSMParameters(new Set(['us-east-1']), metrics);
1597-
1598-
expect(mockedDoDeleteSSMParameter).toBeCalledTimes(MAX_SSM_PARAMETERS);
1599-
expect(mockedListSSMParameters).toBeCalledTimes(1);
1600-
});
1601-
1602-
it('Dont count failed to delete', async () => {
1603-
const oldDt = moment()
1604-
.subtract(Config.Instance.sSMParamCleanupAgeDays + 1, 'days')
1605-
.toDate();
1606-
const ssmParameters = new Map<string, SSM.ParameterMetadata>();
1607-
[...Array(MAX_SSM_PARAMETERS + 5).keys()].forEach((i) => {
1608-
const name = `AGDGADUWG113-${i}`;
1609-
ssmParameters.set(name, { Name: name, LastModifiedDate: oldDt });
1610-
});
1611-
1612-
const mockedDoDeleteSSMParameter = mocked(doDeleteSSMParameter);
1613-
const mockedListSSMParameters = mocked(listSSMParameters);
1614-
1615-
mockedListSSMParameters.mockResolvedValueOnce(ssmParameters);
1616-
mockedDoDeleteSSMParameter.mockResolvedValue(false);
1617-
1618-
await cleanupOldSSMParameters(new Set(['us-east-1']), metrics);
1619-
1620-
expect(mockedDoDeleteSSMParameter).toBeCalledTimes(MAX_SSM_PARAMETERS + 5);
1621-
expect(mockedListSSMParameters).toBeCalledTimes(1);
1622-
});
1623-
});
1624-
16251494
describe('getGHRunnerOrg', () => {
16261495
const ghRunners = [
16271496
{ name: 'instance-id-01', busy: true },

terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-down.ts

Lines changed: 1 addition & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ import {
1313
resetGHRunnersCaches,
1414
} from './gh-runners';
1515
import { ScaleDownMetrics, sendMetricsAtTimeout, sendMetricsTimeoutVars } from './metrics';
16-
import { doDeleteSSMParameter, listRunners, listSSMParameters, resetRunnersCaches, terminateRunner } from './runners';
16+
import { listRunners, resetRunnersCaches, terminateRunner } from './runners';
1717
import { getRepo, groupBy, Repo, RunnerInfo, isGHRateLimitError, shuffleArrayInPlace } from './utils';
1818
import { SSM } from 'aws-sdk';
1919

@@ -44,10 +44,6 @@ export async function scaleDown(): Promise<void> {
4444
},
4545
);
4646

47-
const runnersRegions = new Set<string>(
48-
Array.from(runnersDict.values()).flatMap((runners) => runners.map((runner) => runner.awsRegion)),
49-
);
50-
5147
if (runnersDict.size === 0) {
5248
console.info(`No active runners found for environment: '${Config.Instance.environment}'`);
5349
return;
@@ -144,9 +140,6 @@ export async function scaleDown(): Promise<void> {
144140
}
145141
}
146142
}
147-
148-
await cleanupOldSSMParameters(runnersRegions, metrics);
149-
150143
console.info('Scale down completed');
151144
} catch (e) {
152145
/* istanbul ignore next */
@@ -161,46 +154,6 @@ export async function scaleDown(): Promise<void> {
161154
}
162155
}
163156

164-
export async function cleanupOldSSMParameters(runnersRegions: Set<string>, metrics: ScaleDownMetrics): Promise<void> {
165-
try {
166-
for (const awsRegion of runnersRegions) {
167-
const ssmParams = sortSSMParametersByUpdateTime(
168-
Array.from((await listSSMParameters(metrics, awsRegion)).values()),
169-
);
170-
171-
let deleted = 0;
172-
for (const ssmParam of ssmParams) {
173-
/* istanbul ignore next */
174-
if (ssmParam.Name === undefined) {
175-
continue;
176-
}
177-
if (ssmParam.LastModifiedDate === undefined) {
178-
break;
179-
}
180-
if (
181-
ssmParam.LastModifiedDate.getTime() >
182-
moment().subtract(Config.Instance.sSMParamCleanupAgeDays, 'days').toDate().getTime()
183-
) {
184-
break;
185-
}
186-
if (await doDeleteSSMParameter(ssmParam.Name, metrics, awsRegion)) {
187-
deleted += 1;
188-
}
189-
if (deleted >= Config.Instance.sSMParamMaxCleanupAllowance) {
190-
break;
191-
}
192-
}
193-
194-
if (deleted > 0) {
195-
console.info(`Deleted ${deleted} old SSM parameters in ${awsRegion}`);
196-
}
197-
}
198-
} catch (e) {
199-
/* istanbul ignore next */
200-
console.error('Failed to cleanup old SSM parameters', e);
201-
}
202-
}
203-
204157
export async function getGHRunnerOrg(ec2runner: RunnerInfo, metrics: ScaleDownMetrics): Promise<GhRunner | undefined> {
205158
const org = ec2runner.org as string;
206159
let ghRunner: GhRunner | undefined = undefined;

0 commit comments

Comments
 (0)