github-aws-runners · mabbott-aurorasolar · Sep 4, 2025 · Oct 3, 2025 · Copilot · Sep 4, 2025
@@ -128,6 +128,7 @@ module "runners" {
   #   metric = {
   #     enable_spot_termination_warning = true
   #     enable_job_retry                = false
+  #     enable_pool_sufficiency         = true
   #     enable_github_app_rate_limit    = false
   #   }
   # }

@@ -150,6 +150,7 @@ module "runners" {
   #   metric = {
   #     enable_github_app_rate_limit    = true
   #     enable_job_retry                = false
+  #     enable_pool_sufficiency         = true
   #     enable_spot_termination_warning = true
   #   }
   # }

@@ -12,6 +12,7 @@ import { RunnerInputParameters } from './../aws/runners.d';
 import ScaleError from './ScaleError';
 import * as scaleUpModule from './scale-up';
 import { getParameter } from '@aws-github-runner/aws-ssm-util';
+import { createSingleMetric } from '@aws-github-runner/aws-powertools-util';
 import { describe, it, expect, beforeEach, vi } from 'vitest';
 
 const mockOctokit = {
@@ -33,6 +34,7 @@ const mockCreateRunner = vi.mocked(createRunner);
 const mockListRunners = vi.mocked(listEC2Runners);
 const mockSSMClient = mockClient(SSMClient);
 const mockSSMgetParameter = vi.mocked(getParameter);
+const mockCreateSingleMetric = vi.mocked(createSingleMetric);
 
 vi.mock('@octokit/rest', () => ({
   Octokit: vi.fn().mockImplementation(() => mockOctokit),
@@ -61,6 +63,22 @@ vi.mock('@aws-github-runner/aws-ssm-util', async () => {
   };
 });
 
+vi.mock('@aws-github-runner/aws-powertools-util', async () => {
+  const actual = (await vi.importActual(
+    '@aws-github-runner/aws-powertools-util',
+  )) as typeof import('@aws-github-runner/aws-powertools-util');
+
+  return {
+    ...actual,
+    // eslint-disable-next-line @typescript-eslint/no-unused-vars
+    createSingleMetric: vi.fn((name: string, unit: string, value: number, dimensions?: Record<string, string>) => {
+      return {
+        addMetadata: vi.fn(),
+      };
+    }),
+  };
+});
+
 export type RunnerType = 'ephemeral' | 'non-ephemeral';
 
 // for ephemeral and non-ephemeral runners
@@ -183,6 +201,83 @@ describe('scaleUp with GHES', () => {
     expect(listEC2Runners).not.toBeCalled();
   });
 
+  describe('pool sufficiency metrics', () => {
+    beforeEach(() => {
+      process.env.ENABLE_ORGANIZATION_RUNNERS = 'true';
+      process.env.ENVIRONMENT = 'test-env';
+    });
+
+    it('records pool sufficiency metric as insufficient when scaling up', async () => {
+      process.env.ENABLE_METRIC_POOL_SUFFICIENCY = 'true';
+      process.env.RUNNERS_MAXIMUM_COUNT = '5';
+
+      mockListRunners.mockImplementation(async () => [
+        {
+          instanceId: 'i-1234',
+          launchTime: new Date(),
+          type: 'Org',
+          owner: TEST_DATA.repositoryOwner,
+        },
+      ]);
+
+      await scaleUpModule.scaleUp('aws:sqs', TEST_DATA);
+
+      expect(mockCreateSingleMetric).toHaveBeenCalledWith('SufficientPoolHosts', 'Count', 0.0, {
+        Environment: 'test-env',
+      });
+    });
+
+    it('records pool sufficiency metric as sufficient when job is not queued', async () => {
+      process.env.ENABLE_METRIC_POOL_SUFFICIENCY = 'true';
+
+      mockOctokit.actions.getJobForWorkflowRun.mockImplementation(() => ({
+        data: { status: 'completed' },
+      }));
+
+      await scaleUpModule.scaleUp('aws:sqs', TEST_DATA);
+
+      expect(mockCreateSingleMetric).toHaveBeenCalledWith('SufficientPoolHosts', 'Count', 1.0, {
+        Environment: 'test-env',
+      });
+    });
+
+    it('does not record pool sufficiency metric when disabled', async () => {
+      process.env.ENABLE_METRIC_POOL_SUFFICIENCY = 'false';
+      process.env.RUNNERS_MAXIMUM_COUNT = '5';
+
+      mockListRunners.mockImplementation(async () => [
+        {
+          instanceId: 'i-1234',
+          launchTime: new Date(),
+          type: 'Org',
+          owner: TEST_DATA.repositoryOwner,
+        },
+      ]);
+
+      await scaleUpModule.scaleUp('aws:sqs', TEST_DATA);
+
+      expect(mockCreateSingleMetric).not.toHaveBeenCalled();
+    });
+
+    it('does not record pool sufficiency metric when environment variable is undefined', async () => {
+      delete process.env.ENABLE_METRIC_POOL_SUFFICIENCY;
+      process.env.RUNNERS_MAXIMUM_COUNT = '5';
+
+      mockListRunners.mockImplementation(async () => [
+        {
+          instanceId: 'i-1234',
+          launchTime: new Date(),
+          type: 'Org',
+          owner: TEST_DATA.repositoryOwner,
+        },
+      ]);
+
+      await scaleUpModule.scaleUp('aws:sqs', TEST_DATA);
+
+      expect(mockCreateSingleMetric).not.toHaveBeenCalled();
+    });
+  });
+
   describe('on org level', () => {
     beforeEach(() => {
       process.env.ENABLE_ORGANIZATION_RUNNERS = 'true';

@@ -1,5 +1,9 @@
 import { Octokit } from '@octokit/rest';
-import { addPersistentContextToChildLogger, createChildLogger } from '@aws-github-runner/aws-powertools-util';
+import {
+  addPersistentContextToChildLogger,
+  createChildLogger,
+  createSingleMetric,
+} from '@aws-github-runner/aws-powertools-util';
 import { getParameter, putParameter } from '@aws-github-runner/aws-ssm-util';
 import yn from 'yn';
 
@@ -9,6 +13,7 @@ import { RunnerInputParameters } from './../aws/runners.d';
 import ScaleError from './ScaleError';
 import { publishRetryMessage } from './job-retry';
 import { metricGitHubAppRateLimit } from '../github/rate-limit';
+import { MetricUnit } from '@aws-lambda-powertools/metrics';
 
 const logger = createChildLogger('scale-up');
 
@@ -307,6 +312,7 @@ export async function scaleUp(eventSource: string, payload: ActionRequestMessage
 
     if (scaleUp) {
       logger.info(`Attempting to launch a new runner`);
+      createPoolSufficiencyMetric(environment, payload, false);
 
       await createRunners(
         {
@@ -348,6 +354,7 @@ export async function scaleUp(eventSource: string, payload: ActionRequestMessage
     }
   } else {
     logger.info('No runner will be created, job is not queued.');
+    createPoolSufficiencyMetric(environment, payload, true);
   }
 }
 
@@ -473,3 +480,14 @@ async function createJitConfig(githubRunnerConfig: CreateGitHubRunnerConfig, ins
     }
   }
 }
+
+function createPoolSufficiencyMetric(environment: string, payload: ActionRequestMessage, wasSufficient: boolean) {
+  if (yn(process.env.ENABLE_METRIC_POOL_SUFFICIENCY, { default: false })) {
+    const metric = createSingleMetric('SufficientPoolHosts', MetricUnit.Count, wasSufficient ? 1.0 : 0.0, {
+      Environment: environment,
+    });
+    metric.addMetadata('Environment', environment);
+    metric.addMetadata('RepositoryName', payload.repositoryName);
+    metric.addMetadata('RepositoryOwner', payload.repositoryOwner);
+  }
+}
@@ -697,6 +697,7 @@ variable "metrics" {
     metric = optional(object({
       enable_github_app_rate_limit    = optional(bool, true)
       enable_job_retry                = optional(bool, true)
+      enable_pool_sufficiency         = optional(bool, true)
       enable_spot_termination_warning = optional(bool, true)
     }), {})
   })

@@ -5,6 +5,7 @@ locals {
   environment_variables = {
     ENABLE_ORGANIZATION_RUNNERS          = var.config.enable_organization_runners
     ENABLE_METRIC_JOB_RETRY              = var.config.metrics.enable && var.config.metrics.metric.enable_job_retry
+    ENABLE_METRIC_POOL_SUFFICIENCY       = var.config.metrics.enable && var.config.metrics.metric.enable_pool_sufficiency
     ENABLE_METRIC_GITHUB_APP_RATE_LIMIT  = var.config.metrics.enable && var.config.metrics.metric.enable_github_app_rate_limit
     GHES_URL                             = var.config.ghes_url
     USER_AGENT                           = var.config.user_agent

@@ -57,6 +57,7 @@ variable "config" {
       metric = optional(object({
         enable_github_app_rate_limit = optional(bool, true)
         enable_job_retry             = optional(bool, true)
+        enable_pool_sufficiency      = optional(bool, true)
       }), {})
     }), {})
     prefix = optional(string, null)

@@ -31,6 +31,7 @@ resource "aws_lambda_function" "scale_up" {
       ENABLE_JIT_CONFIG                        = var.enable_jit_config
       ENABLE_JOB_QUEUED_CHECK                  = local.enable_job_queued_check
       ENABLE_METRIC_GITHUB_APP_RATE_LIMIT      = var.metrics.enable && var.metrics.metric.enable_github_app_rate_limit
+      ENABLE_METRIC_POOL_SUFFICIENCY           = var.metrics.enable && var.metrics.metric.enable_pool_sufficiency
       ENABLE_ORGANIZATION_RUNNERS              = var.enable_organization_runners
       ENVIRONMENT                              = var.prefix
       GHES_URL                                 = var.ghes_url

@@ -727,6 +727,7 @@ variable "metrics" {
     metric = optional(object({
       enable_github_app_rate_limit    = optional(bool, true)
       enable_job_retry                = optional(bool, true)
+      enable_pool_sufficiency         = optional(bool, true)
       enable_spot_termination_warning = optional(bool, true)
     }), {})
   })

@@ -926,6 +926,7 @@ variable "metrics" {
     metric = optional(object({
       enable_github_app_rate_limit    = optional(bool, true)
       enable_job_retry                = optional(bool, true)
+      enable_pool_sufficiency         = optional(bool, true)
       enable_spot_termination_warning = optional(bool, true)
     }), {})
   })