Skip to content

Commit eaaf4f5

Browse files
feat(control-plane): [issue 4746] pool sufficiency metrics
1 parent b09d3e3 commit eaaf4f5

File tree

10 files changed

+122
-1
lines changed

10 files changed

+122
-1
lines changed

examples/default/main.tf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ module "runners" {
128128
# metric = {
129129
# enable_spot_termination_warning = true
130130
# enable_job_retry = false
131+
# enable_pool_sufficiency = true
131132
# enable_github_app_rate_limit = false
132133
# }
133134
# }

examples/multi-runner/main.tf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ module "runners" {
150150
# metric = {
151151
# enable_github_app_rate_limit = true
152152
# enable_job_retry = false
153+
# enable_pool_sufficiency = true
153154
# enable_spot_termination_warning = true
154155
# }
155156
# }

lambdas/functions/control-plane/src/scale-runners/scale-up.test.ts

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import { RunnerInputParameters } from './../aws/runners.d';
1212
import ScaleError from './ScaleError';
1313
import * as scaleUpModule from './scale-up';
1414
import { getParameter } from '@aws-github-runner/aws-ssm-util';
15+
import { createSingleMetric } from '@aws-github-runner/aws-powertools-util';
1516
import { describe, it, expect, beforeEach, vi } from 'vitest';
1617

1718
const mockOctokit = {
@@ -33,6 +34,7 @@ const mockCreateRunner = vi.mocked(createRunner);
3334
const mockListRunners = vi.mocked(listEC2Runners);
3435
const mockSSMClient = mockClient(SSMClient);
3536
const mockSSMgetParameter = vi.mocked(getParameter);
37+
const mockCreateSingleMetric = vi.mocked(createSingleMetric);
3638

3739
vi.mock('@octokit/rest', () => ({
3840
Octokit: vi.fn().mockImplementation(() => mockOctokit),
@@ -61,6 +63,22 @@ vi.mock('@aws-github-runner/aws-ssm-util', async () => {
6163
};
6264
});
6365

66+
vi.mock('@aws-github-runner/aws-powertools-util', async () => {
67+
const actual = (await vi.importActual(
68+
'@aws-github-runner/aws-powertools-util',
69+
)) as typeof import('@aws-github-runner/aws-powertools-util');
70+
71+
return {
72+
...actual,
73+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
74+
createSingleMetric: vi.fn((name: string, unit: string, value: number, dimensions?: Record<string, string>) => {
75+
return {
76+
addMetadata: vi.fn(),
77+
};
78+
}),
79+
};
80+
});
81+
6482
export type RunnerType = 'ephemeral' | 'non-ephemeral';
6583

6684
// for ephemeral and non-ephemeral runners
@@ -183,6 +201,83 @@ describe('scaleUp with GHES', () => {
183201
expect(listEC2Runners).not.toBeCalled();
184202
});
185203

204+
describe('pool sufficiency metrics', () => {
205+
beforeEach(() => {
206+
process.env.ENABLE_ORGANIZATION_RUNNERS = 'true';
207+
process.env.ENVIRONMENT = 'test-env';
208+
});
209+
210+
it('records pool sufficiency metric as insufficient when scaling up', async () => {
211+
process.env.ENABLE_METRIC_POOL_SUFFICIENCY = 'true';
212+
process.env.RUNNERS_MAXIMUM_COUNT = '5';
213+
214+
mockListRunners.mockImplementation(async () => [
215+
{
216+
instanceId: 'i-1234',
217+
launchTime: new Date(),
218+
type: 'Org',
219+
owner: TEST_DATA.repositoryOwner,
220+
},
221+
]);
222+
223+
await scaleUpModule.scaleUp('aws:sqs', TEST_DATA);
224+
225+
expect(mockCreateSingleMetric).toHaveBeenCalledWith('SufficientPoolHosts', 'Count', 0.0, {
226+
Environment: 'test-env',
227+
});
228+
});
229+
230+
it('records pool sufficiency metric as sufficient when job is not queued', async () => {
231+
process.env.ENABLE_METRIC_POOL_SUFFICIENCY = 'true';
232+
233+
mockOctokit.actions.getJobForWorkflowRun.mockImplementation(() => ({
234+
data: { status: 'completed' },
235+
}));
236+
237+
await scaleUpModule.scaleUp('aws:sqs', TEST_DATA);
238+
239+
expect(mockCreateSingleMetric).toHaveBeenCalledWith('SufficientPoolHosts', 'Count', 1.0, {
240+
Environment: 'test-env',
241+
});
242+
});
243+
244+
it('does not record pool sufficiency metric when disabled', async () => {
245+
process.env.ENABLE_METRIC_POOL_SUFFICIENCY = 'false';
246+
process.env.RUNNERS_MAXIMUM_COUNT = '5';
247+
248+
mockListRunners.mockImplementation(async () => [
249+
{
250+
instanceId: 'i-1234',
251+
launchTime: new Date(),
252+
type: 'Org',
253+
owner: TEST_DATA.repositoryOwner,
254+
},
255+
]);
256+
257+
await scaleUpModule.scaleUp('aws:sqs', TEST_DATA);
258+
259+
expect(mockCreateSingleMetric).not.toHaveBeenCalled();
260+
});
261+
262+
it('does not record pool sufficiency metric when environment variable is undefined', async () => {
263+
delete process.env.ENABLE_METRIC_POOL_SUFFICIENCY;
264+
process.env.RUNNERS_MAXIMUM_COUNT = '5';
265+
266+
mockListRunners.mockImplementation(async () => [
267+
{
268+
instanceId: 'i-1234',
269+
launchTime: new Date(),
270+
type: 'Org',
271+
owner: TEST_DATA.repositoryOwner,
272+
},
273+
]);
274+
275+
await scaleUpModule.scaleUp('aws:sqs', TEST_DATA);
276+
277+
expect(mockCreateSingleMetric).not.toHaveBeenCalled();
278+
});
279+
});
280+
186281
describe('on org level', () => {
187282
beforeEach(() => {
188283
process.env.ENABLE_ORGANIZATION_RUNNERS = 'true';

lambdas/functions/control-plane/src/scale-runners/scale-up.ts

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
import { Octokit } from '@octokit/rest';
2-
import { addPersistentContextToChildLogger, createChildLogger } from '@aws-github-runner/aws-powertools-util';
2+
import {
3+
addPersistentContextToChildLogger,
4+
createChildLogger,
5+
createSingleMetric,
6+
} from '@aws-github-runner/aws-powertools-util';
37
import { getParameter, putParameter } from '@aws-github-runner/aws-ssm-util';
48
import yn from 'yn';
59

@@ -9,6 +13,7 @@ import { RunnerInputParameters } from './../aws/runners.d';
913
import ScaleError from './ScaleError';
1014
import { publishRetryMessage } from './job-retry';
1115
import { metricGitHubAppRateLimit } from '../github/rate-limit';
16+
import { MetricUnit } from '@aws-lambda-powertools/metrics';
1217

1318
const logger = createChildLogger('scale-up');
1419

@@ -307,6 +312,7 @@ export async function scaleUp(eventSource: string, payload: ActionRequestMessage
307312

308313
if (scaleUp) {
309314
logger.info(`Attempting to launch a new runner`);
315+
createPoolSufficiencyMetric(environment, payload, false);
310316

311317
await createRunners(
312318
{
@@ -348,6 +354,7 @@ export async function scaleUp(eventSource: string, payload: ActionRequestMessage
348354
}
349355
} else {
350356
logger.info('No runner will be created, job is not queued.');
357+
createPoolSufficiencyMetric(environment, payload, true);
351358
}
352359
}
353360

@@ -473,3 +480,14 @@ async function createJitConfig(githubRunnerConfig: CreateGitHubRunnerConfig, ins
473480
}
474481
}
475482
}
483+
484+
function createPoolSufficiencyMetric(environment: string, payload: ActionRequestMessage, wasSufficient: boolean) {
485+
if (yn(process.env.ENABLE_METRIC_POOL_SUFFICIENCY, { default: false })) {
486+
const metric = createSingleMetric('SufficientPoolHosts', MetricUnit.Count, wasSufficient ? 1.0 : 0.0, {
487+
Environment: environment,
488+
});
489+
metric.addMetadata('Environment', environment);
490+
metric.addMetadata('RepositoryName', payload.repositoryName);
491+
metric.addMetadata('RepositoryOwner', payload.repositoryOwner);
492+
}
493+
}

modules/multi-runner/variables.tf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -693,6 +693,7 @@ variable "metrics" {
693693
metric = optional(object({
694694
enable_github_app_rate_limit = optional(bool, true)
695695
enable_job_retry = optional(bool, true)
696+
enable_pool_sufficiency = optional(bool, true)
696697
enable_spot_termination_warning = optional(bool, true)
697698
}), {})
698699
})

modules/runners/job-retry/main.tf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ locals {
55
environment_variables = {
66
ENABLE_ORGANIZATION_RUNNERS = var.config.enable_organization_runners
77
ENABLE_METRIC_JOB_RETRY = var.config.metrics.enable && var.config.metrics.metric.enable_job_retry
8+
ENABLE_METRIC_POOL_SUFFICIENCY = var.config.metrics.enable && var.config.metrics.metric.enable_pool_sufficiency
89
ENABLE_METRIC_GITHUB_APP_RATE_LIMIT = var.config.metrics.enable && var.config.metrics.metric.enable_github_app_rate_limit
910
GHES_URL = var.config.ghes_url
1011
USER_AGENT = var.config.user_agent

modules/runners/job-retry/variables.tf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ variable "config" {
5757
metric = optional(object({
5858
enable_github_app_rate_limit = optional(bool, true)
5959
enable_job_retry = optional(bool, true)
60+
enable_pool_sufficiency = optional(bool, true)
6061
}), {})
6162
}), {})
6263
prefix = optional(string, null)

modules/runners/scale-up.tf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ resource "aws_lambda_function" "scale_up" {
3131
ENABLE_JIT_CONFIG = var.enable_jit_config
3232
ENABLE_JOB_QUEUED_CHECK = local.enable_job_queued_check
3333
ENABLE_METRIC_GITHUB_APP_RATE_LIMIT = var.metrics.enable && var.metrics.metric.enable_github_app_rate_limit
34+
ENABLE_METRIC_POOL_SUFFICIENCY = var.metrics.enable && var.metrics.metric.enable_pool_sufficiency
3435
ENABLE_ORGANIZATION_RUNNERS = var.enable_organization_runners
3536
ENVIRONMENT = var.prefix
3637
GHES_URL = var.ghes_url

modules/runners/variables.tf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -718,6 +718,7 @@ variable "metrics" {
718718
metric = optional(object({
719719
enable_github_app_rate_limit = optional(bool, true)
720720
enable_job_retry = optional(bool, true)
721+
enable_pool_sufficiency = optional(bool, true)
721722
enable_spot_termination_warning = optional(bool, true)
722723
}), {})
723724
})

variables.tf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -917,6 +917,7 @@ variable "metrics" {
917917
metric = optional(object({
918918
enable_github_app_rate_limit = optional(bool, true)
919919
enable_job_retry = optional(bool, true)
920+
enable_pool_sufficiency = optional(bool, true)
920921
enable_spot_termination_warning = optional(bool, true)
921922
}), {})
922923
})

0 commit comments

Comments
 (0)