Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions examples/default/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ module "runners" {
# metric = {
# enable_spot_termination_warning = true
# enable_job_retry = false
# enable_pool_sufficiency = true
# enable_github_app_rate_limit = false
# }
# }
Expand Down
1 change: 1 addition & 0 deletions examples/multi-runner/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ module "runners" {
# metric = {
# enable_github_app_rate_limit = true
# enable_job_retry = false
# enable_pool_sufficiency = true
# enable_spot_termination_warning = true
# }
# }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import { RunnerInputParameters } from './../aws/runners.d';
import ScaleError from './ScaleError';
import * as scaleUpModule from './scale-up';
import { getParameter } from '@aws-github-runner/aws-ssm-util';
import { createSingleMetric } from '@aws-github-runner/aws-powertools-util';
import { describe, it, expect, beforeEach, vi } from 'vitest';

const mockOctokit = {
Expand All @@ -33,6 +34,7 @@ const mockCreateRunner = vi.mocked(createRunner);
const mockListRunners = vi.mocked(listEC2Runners);
const mockSSMClient = mockClient(SSMClient);
const mockSSMgetParameter = vi.mocked(getParameter);
const mockCreateSingleMetric = vi.mocked(createSingleMetric);

vi.mock('@octokit/rest', () => ({
Octokit: vi.fn().mockImplementation(() => mockOctokit),
Expand Down Expand Up @@ -61,6 +63,22 @@ vi.mock('@aws-github-runner/aws-ssm-util', async () => {
};
});

vi.mock('@aws-github-runner/aws-powertools-util', async () => {
const actual = (await vi.importActual(
'@aws-github-runner/aws-powertools-util',
)) as typeof import('@aws-github-runner/aws-powertools-util');

return {
...actual,
// eslint-disable-next-line @typescript-eslint/no-unused-vars
createSingleMetric: vi.fn((name: string, unit: string, value: number, dimensions?: Record<string, string>) => {
return {
addMetadata: vi.fn(),
};
}),
};
});

export type RunnerType = 'ephemeral' | 'non-ephemeral';

// for ephemeral and non-ephemeral runners
Expand Down Expand Up @@ -183,6 +201,83 @@ describe('scaleUp with GHES', () => {
expect(listEC2Runners).not.toBeCalled();
});

describe('pool sufficiency metrics', () => {
beforeEach(() => {
process.env.ENABLE_ORGANIZATION_RUNNERS = 'true';
process.env.ENVIRONMENT = 'test-env';
});

it('records pool sufficiency metric as insufficient when scaling up', async () => {
process.env.ENABLE_METRIC_POOL_SUFFICIENCY = 'true';
process.env.RUNNERS_MAXIMUM_COUNT = '5';

mockListRunners.mockImplementation(async () => [
{
instanceId: 'i-1234',
launchTime: new Date(),
type: 'Org',
owner: TEST_DATA.repositoryOwner,
},
]);

await scaleUpModule.scaleUp('aws:sqs', TEST_DATA);

expect(mockCreateSingleMetric).toHaveBeenCalledWith('SufficientPoolHosts', 'Count', 0.0, {
Environment: 'test-env',
});
});

it('records pool sufficiency metric as sufficient when job is not queued', async () => {
process.env.ENABLE_METRIC_POOL_SUFFICIENCY = 'true';

mockOctokit.actions.getJobForWorkflowRun.mockImplementation(() => ({
data: { status: 'completed' },
}));

await scaleUpModule.scaleUp('aws:sqs', TEST_DATA);

expect(mockCreateSingleMetric).toHaveBeenCalledWith('SufficientPoolHosts', 'Count', 1.0, {
Environment: 'test-env',
});
});

it('does not record pool sufficiency metric when disabled', async () => {
process.env.ENABLE_METRIC_POOL_SUFFICIENCY = 'false';
process.env.RUNNERS_MAXIMUM_COUNT = '5';

mockListRunners.mockImplementation(async () => [
{
instanceId: 'i-1234',
launchTime: new Date(),
type: 'Org',
owner: TEST_DATA.repositoryOwner,
},
]);

await scaleUpModule.scaleUp('aws:sqs', TEST_DATA);

expect(mockCreateSingleMetric).not.toHaveBeenCalled();
});

it('does not record pool sufficiency metric when environment variable is undefined', async () => {
delete process.env.ENABLE_METRIC_POOL_SUFFICIENCY;
process.env.RUNNERS_MAXIMUM_COUNT = '5';

mockListRunners.mockImplementation(async () => [
{
instanceId: 'i-1234',
launchTime: new Date(),
type: 'Org',
owner: TEST_DATA.repositoryOwner,
},
]);

await scaleUpModule.scaleUp('aws:sqs', TEST_DATA);

expect(mockCreateSingleMetric).not.toHaveBeenCalled();
});
});

describe('on org level', () => {
beforeEach(() => {
process.env.ENABLE_ORGANIZATION_RUNNERS = 'true';
Expand Down
20 changes: 19 additions & 1 deletion lambdas/functions/control-plane/src/scale-runners/scale-up.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import { Octokit } from '@octokit/rest';
import { addPersistentContextToChildLogger, createChildLogger } from '@aws-github-runner/aws-powertools-util';
import {
addPersistentContextToChildLogger,
createChildLogger,
createSingleMetric,
} from '@aws-github-runner/aws-powertools-util';
import { getParameter, putParameter } from '@aws-github-runner/aws-ssm-util';
import yn from 'yn';

Expand All @@ -9,6 +13,7 @@ import { RunnerInputParameters } from './../aws/runners.d';
import ScaleError from './ScaleError';
import { publishRetryMessage } from './job-retry';
import { metricGitHubAppRateLimit } from '../github/rate-limit';
import { MetricUnit } from '@aws-lambda-powertools/metrics';

const logger = createChildLogger('scale-up');

Expand Down Expand Up @@ -307,6 +312,7 @@ export async function scaleUp(eventSource: string, payload: ActionRequestMessage

if (scaleUp) {
logger.info(`Attempting to launch a new runner`);
createPoolSufficiencyMetric(environment, payload, false);

await createRunners(
{
Expand Down Expand Up @@ -348,6 +354,7 @@ export async function scaleUp(eventSource: string, payload: ActionRequestMessage
}
} else {
logger.info('No runner will be created, job is not queued.');
createPoolSufficiencyMetric(environment, payload, true);
}
}

Expand Down Expand Up @@ -473,3 +480,14 @@ async function createJitConfig(githubRunnerConfig: CreateGitHubRunnerConfig, ins
}
}
}

function createPoolSufficiencyMetric(environment: string, payload: ActionRequestMessage, wasSufficient: boolean) {
if (yn(process.env.ENABLE_METRIC_POOL_SUFFICIENCY, { default: false })) {
const metric = createSingleMetric('SufficientPoolHosts', MetricUnit.Count, wasSufficient ? 1.0 : 0.0, {
Environment: environment,
});
metric.addMetadata('Environment', environment);
metric.addMetadata('RepositoryName', payload.repositoryName);
metric.addMetadata('RepositoryOwner', payload.repositoryOwner);
}
}
Comment on lines +484 to +493
Copy link
Preview

Copilot AI Sep 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Environment dimension is added twice - once in the metric creation and again as metadata. The dimension in the metric creation (line 487) should be sufficient for grouping metrics, making the duplicate metadata on line 489 redundant.

Copilot uses AI. Check for mistakes.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is not correct review comment. First time environment is added as dimension. addMetadata is only added in the log not as dimension, correct?

1 change: 1 addition & 0 deletions modules/multi-runner/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -697,6 +697,7 @@ variable "metrics" {
metric = optional(object({
enable_github_app_rate_limit = optional(bool, true)
enable_job_retry = optional(bool, true)
enable_pool_sufficiency = optional(bool, true)
enable_spot_termination_warning = optional(bool, true)
}), {})
})
Expand Down
1 change: 1 addition & 0 deletions modules/runners/job-retry/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ locals {
environment_variables = {
ENABLE_ORGANIZATION_RUNNERS = var.config.enable_organization_runners
ENABLE_METRIC_JOB_RETRY = var.config.metrics.enable && var.config.metrics.metric.enable_job_retry
ENABLE_METRIC_POOL_SUFFICIENCY = var.config.metrics.enable && var.config.metrics.metric.enable_pool_sufficiency
ENABLE_METRIC_GITHUB_APP_RATE_LIMIT = var.config.metrics.enable && var.config.metrics.metric.enable_github_app_rate_limit
GHES_URL = var.config.ghes_url
USER_AGENT = var.config.user_agent
Expand Down
1 change: 1 addition & 0 deletions modules/runners/job-retry/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ variable "config" {
metric = optional(object({
enable_github_app_rate_limit = optional(bool, true)
enable_job_retry = optional(bool, true)
enable_pool_sufficiency = optional(bool, true)
}), {})
}), {})
prefix = optional(string, null)
Expand Down
1 change: 1 addition & 0 deletions modules/runners/scale-up.tf
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ resource "aws_lambda_function" "scale_up" {
ENABLE_JIT_CONFIG = var.enable_jit_config
ENABLE_JOB_QUEUED_CHECK = local.enable_job_queued_check
ENABLE_METRIC_GITHUB_APP_RATE_LIMIT = var.metrics.enable && var.metrics.metric.enable_github_app_rate_limit
ENABLE_METRIC_POOL_SUFFICIENCY = var.metrics.enable && var.metrics.metric.enable_pool_sufficiency
ENABLE_ORGANIZATION_RUNNERS = var.enable_organization_runners
ENVIRONMENT = var.prefix
GHES_URL = var.ghes_url
Expand Down
1 change: 1 addition & 0 deletions modules/runners/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -727,6 +727,7 @@ variable "metrics" {
metric = optional(object({
enable_github_app_rate_limit = optional(bool, true)
enable_job_retry = optional(bool, true)
enable_pool_sufficiency = optional(bool, true)
enable_spot_termination_warning = optional(bool, true)
}), {})
})
Expand Down
1 change: 1 addition & 0 deletions variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -926,6 +926,7 @@ variable "metrics" {
metric = optional(object({
enable_github_app_rate_limit = optional(bool, true)
enable_job_retry = optional(bool, true)
enable_pool_sufficiency = optional(bool, true)
enable_spot_termination_warning = optional(bool, true)
}), {})
})
Expand Down
Loading