From 64a19c770e979179b076760f595381d77bcdd00e Mon Sep 17 00:00:00 2001 From: Niek Palm Date: Fri, 20 Dec 2024 10:16:51 +0100 Subject: [PATCH 01/11] fix: Remove fifo queues --- examples/arm64/main.tf | 3 --- examples/multi-runner/main.tf | 1 - lambdas/functions/control-plane/src/local.ts | 2 +- .../functions/webhook/src/sqs/index.test.ts | 25 ++----------------- lambdas/functions/webhook/src/sqs/index.ts | 5 ---- .../webhook/src/webhook/index.test.ts | 1 - main.tf | 5 ++-- modules/multi-runner/queues.tf | 17 ++++++------- modules/multi-runner/variables.tf | 1 - modules/webhook/variables.tf | 5 ++-- variables.tf | 7 +++++- 11 files changed, 20 insertions(+), 52 deletions(-) diff --git a/examples/arm64/main.tf b/examples/arm64/main.tf index e7b0a2f349..56edcecbc9 100644 --- a/examples/arm64/main.tf +++ b/examples/arm64/main.tf @@ -80,9 +80,6 @@ module "runners" { delay_webhook_event = 5 runners_maximum_count = 1 - # set up a fifo queue to remain order - enable_fifo_build_queue = true - # override scaling down scale_down_schedule_expression = "cron(* * * * ? *)" } diff --git a/examples/multi-runner/main.tf b/examples/multi-runner/main.tf index 9be340ab47..74cb5efa21 100644 --- a/examples/multi-runner/main.tf +++ b/examples/multi-runner/main.tf @@ -47,7 +47,6 @@ module "runners" { # labelMatchers = [["self-hosted", "linux", "x64", "amazon"]] # exactMatch = false # } - # fifo = true # delay_webhook_event = 0 # runner_config = { # runner_os = "linux" diff --git a/lambdas/functions/control-plane/src/local.ts b/lambdas/functions/control-plane/src/local.ts index a26ec37a04..2166da58fd 100644 --- a/lambdas/functions/control-plane/src/local.ts +++ b/lambdas/functions/control-plane/src/local.ts @@ -28,7 +28,7 @@ const sqsEvent = { messageAttributes: {}, md5OfBody: '4aef3bd70526e152e86426a0938cbec6', eventSource: 'aws:sqs', - eventSourceARN: 'arn:aws:sqs:us-west-2:916370655143:cicddev-queued-builds.fifo', + eventSourceARN: 'arn:aws:sqs:us-west-2:916370655143:cicddev-queued-builds', awsRegion: 'us-west-2', }, ], diff --git a/lambdas/functions/webhook/src/sqs/index.test.ts b/lambdas/functions/webhook/src/sqs/index.test.ts index be539ef0c3..131de6eb7d 100644 --- a/lambdas/functions/webhook/src/sqs/index.test.ts +++ b/lambdas/functions/webhook/src/sqs/index.test.ts @@ -30,38 +30,17 @@ describe('Test sending message to SQS.', () => { it('no fifo queue', async () => { // Arrange - const no_fifo_message: ActionRequestMessage = { - ...message, - queueFifo: false, - }; const sqsMessage: SendMessageCommandInput = { QueueUrl: queueUrl, - MessageBody: JSON.stringify(no_fifo_message), + MessageBody: JSON.stringify(message), }; // Act - const result = sendActionRequest(no_fifo_message); + const result = sendActionRequest(message); // Assert expect(mockSQS.sendMessage).toHaveBeenCalledWith(sqsMessage); await expect(result).resolves.not.toThrow(); }); - it('use a fifo queue', async () => { - // Arrange - const fifo_message: ActionRequestMessage = { - ...message, - queueFifo: true, - }; - const sqsMessage: SendMessageCommandInput = { - QueueUrl: queueUrl, - MessageBody: JSON.stringify(fifo_message), - }; - // Act - const result = sendActionRequest(fifo_message); - - // Assert - expect(mockSQS.sendMessage).toHaveBeenCalledWith({ ...sqsMessage, MessageGroupId: String(message.id) }); - await expect(result).resolves.not.toThrow(); - }); }); diff --git a/lambdas/functions/webhook/src/sqs/index.ts b/lambdas/functions/webhook/src/sqs/index.ts index 2d9b3ed2dd..a028d7dcc4 100644 --- a/lambdas/functions/webhook/src/sqs/index.ts +++ b/lambdas/functions/webhook/src/sqs/index.ts @@ -11,7 +11,6 @@ export interface ActionRequestMessage { repositoryOwner: string; installationId: number; queueId: string; - queueFifo: boolean; repoOwnerType: string; } @@ -26,7 +25,6 @@ export interface RunnerMatcherConfig { matcherConfig: MatcherConfig; id: string; arn: string; - fifo: boolean; } export interface GithubWorkflowEvent { @@ -42,9 +40,6 @@ export const sendActionRequest = async (message: ActionRequestMessage): Promise< }; logger.debug(`sending message to SQS: ${JSON.stringify(sqsMessage)}`); - if (message.queueFifo) { - sqsMessage.MessageGroupId = String(message.id); - } await sqs.sendMessage(sqsMessage); }; diff --git a/lambdas/functions/webhook/src/webhook/index.test.ts b/lambdas/functions/webhook/src/webhook/index.test.ts index c202361369..95ffca3522 100644 --- a/lambdas/functions/webhook/src/webhook/index.test.ts +++ b/lambdas/functions/webhook/src/webhook/index.test.ts @@ -290,7 +290,6 @@ function mockSSMResponse() { { id: '1', arn: 'arn:aws:sqs:us-east-1:123456789012:queue1', - fifo: false, matcherConfig: { labelMatchers: [['label1', 'label2']], exactMatch: true, diff --git a/main.tf b/main.tf index 8a02e470e3..ce53b3746a 100644 --- a/main.tf +++ b/main.tf @@ -53,7 +53,7 @@ resource "aws_sqs_queue_policy" "build_queue_policy" { } resource "aws_sqs_queue" "queued_builds" { - name = "${var.prefix}-queued-builds${var.enable_fifo_build_queue ? ".fifo" : ""}" + name = "${var.prefix}-queued-builds" delay_seconds = var.delay_webhook_event visibility_timeout_seconds = var.runners_scale_up_lambda_timeout message_retention_seconds = var.job_queue_retention_in_seconds @@ -80,7 +80,7 @@ resource "aws_sqs_queue_policy" "build_queue_dlq_policy" { resource "aws_sqs_queue" "queued_builds_dlq" { count = var.redrive_build_queue.enabled ? 1 : 0 - name = "${var.prefix}-queued-builds_dead_letter${var.enable_fifo_build_queue ? ".fifo" : ""}" + name = "${var.prefix}-queued-builds_dead_letter" sqs_managed_sse_enabled = var.queue_encryption.sqs_managed_sse_enabled kms_master_key_id = var.queue_encryption.kms_master_key_id @@ -114,7 +114,6 @@ module "webhook" { (aws_sqs_queue.queued_builds.id) = { id : aws_sqs_queue.queued_builds.id arn : aws_sqs_queue.queued_builds.arn - fifo : var.enable_fifo_build_queue matcherConfig : { labelMatchers : [local.runner_labels] exactMatch : var.enable_runner_workflow_job_labels_check_all diff --git a/modules/multi-runner/queues.tf b/modules/multi-runner/queues.tf index 9def57a316..58a9d2915e 100644 --- a/modules/multi-runner/queues.tf +++ b/modules/multi-runner/queues.tf @@ -27,14 +27,12 @@ data "aws_iam_policy_document" "deny_unsecure_transport" { } resource "aws_sqs_queue" "queued_builds" { - for_each = var.multi_runner_config - name = "${var.prefix}-${each.key}-queued-builds${each.value.fifo ? ".fifo" : ""}" - delay_seconds = each.value.runner_config.delay_webhook_event - visibility_timeout_seconds = var.runners_scale_up_lambda_timeout - message_retention_seconds = each.value.runner_config.job_queue_retention_in_seconds - fifo_queue = each.value.fifo - receive_wait_time_seconds = 0 - content_based_deduplication = each.value.fifo + for_each = var.multi_runner_config + name = "${var.prefix}-${each.key}-queued-builds" + delay_seconds = each.value.runner_config.delay_webhook_event + visibility_timeout_seconds = var.runners_scale_up_lambda_timeout + message_retention_seconds = each.value.runner_config.job_queue_retention_in_seconds + receive_wait_time_seconds = 0 redrive_policy = each.value.redrive_build_queue.enabled ? jsonencode({ deadLetterTargetArn = aws_sqs_queue.queued_builds_dlq[each.key].arn, maxReceiveCount = each.value.redrive_build_queue.maxReceiveCount @@ -55,12 +53,11 @@ resource "aws_sqs_queue_policy" "build_queue_policy" { resource "aws_sqs_queue" "queued_builds_dlq" { for_each = { for config, values in var.multi_runner_config : config => values if values.redrive_build_queue.enabled } - name = "${var.prefix}-${each.key}-queued-builds_dead_letter${each.value.fifo ? ".fifo" : ""}" + name = "${var.prefix}-${each.key}-queued-builds_dead_letter" sqs_managed_sse_enabled = var.queue_encryption.sqs_managed_sse_enabled kms_master_key_id = var.queue_encryption.kms_master_key_id kms_data_key_reuse_period_seconds = var.queue_encryption.kms_data_key_reuse_period_seconds - fifo_queue = each.value.fifo tags = var.tags } diff --git a/modules/multi-runner/variables.tf b/modules/multi-runner/variables.tf index d1d31b211c..7cc4568b3a 100644 --- a/modules/multi-runner/variables.tf +++ b/modules/multi-runner/variables.tf @@ -126,7 +126,6 @@ variable "multi_runner_config" { exactMatch = optional(bool, false) priority = optional(number, 999) }) - fifo = optional(bool, false) redrive_build_queue = optional(object({ enabled = bool maxReceiveCount = number diff --git a/modules/webhook/variables.tf b/modules/webhook/variables.tf index ba583d9859..60bdbddf32 100644 --- a/modules/webhook/variables.tf +++ b/modules/webhook/variables.tf @@ -25,9 +25,8 @@ variable "tags" { variable "runner_matcher_config" { description = "SQS queue to publish accepted build events based on the runner type. When exact match is disabled the webhook accepts the event if one of the workflow job labels is part of the matcher. The priority defines the order the matchers are applied." type = map(object({ - arn = string - id = string - fifo = bool + arn = string + id = string matcherConfig = object({ labelMatchers = list(list(string)) exactMatch = bool diff --git a/variables.tf b/variables.tf index a7d4267a1d..de1cba7956 100644 --- a/variables.tf +++ b/variables.tf @@ -641,9 +641,14 @@ variable "lambda_principals" { } variable "enable_fifo_build_queue" { - description = "Enable a FIFO queue to keep the order of events received by the webhook. Recommended for repo level runners." + description = "(FEATURE REMOVED) Enable a FIFO queue to keep the order of events received by the webhook. Recommended for repo level runners." type = bool default = false + + validation { + condition = var.enable_fifo_build_queue == false + error_message = "The feature for FIFO build queue is not supported anymore." + } } variable "redrive_build_queue" { From c7f3712eca125fe951fcfc1bd6e81a31fba6dac8 Mon Sep 17 00:00:00 2001 From: Niek Palm Date: Fri, 16 Aug 2024 12:58:18 +0200 Subject: [PATCH 02/11] fix: Remove fifo queues --- lambdas/functions/control-plane/src/aws/sqs.test.ts | 4 ++-- lambdas/functions/control-plane/src/aws/sqs.ts | 3 +-- lambdas/functions/webhook/src/ConfigLoader.test.ts | 5 ----- lambdas/functions/webhook/src/runners/dispatch.test.ts | 2 -- lambdas/functions/webhook/src/runners/dispatch.ts | 1 - 5 files changed, 3 insertions(+), 12 deletions(-) diff --git a/lambdas/functions/control-plane/src/aws/sqs.test.ts b/lambdas/functions/control-plane/src/aws/sqs.test.ts index 693d6c1321..d75d4e0cf5 100644 --- a/lambdas/functions/control-plane/src/aws/sqs.test.ts +++ b/lambdas/functions/control-plane/src/aws/sqs.test.ts @@ -34,11 +34,11 @@ describe('Publish message to SQS', () => { }); // act - await publishMessage('test', 'https://sqs.eu-west-1.amazonaws.com/123456789/queued-builds.fifo'); + await publishMessage('test', 'https://sqs.eu-west-1.amazonaws.com/123456789/queued-builds'); // assert expect(mockSQSClient).toHaveReceivedCommandWith(SendMessageCommand, { - QueueUrl: 'https://sqs.eu-west-1.amazonaws.com/123456789/queued-builds.fifo', + QueueUrl: 'https://sqs.eu-west-1.amazonaws.com/123456789/queued-builds', MessageBody: 'test', MessageGroupId: '1', // Fifo queue }); diff --git a/lambdas/functions/control-plane/src/aws/sqs.ts b/lambdas/functions/control-plane/src/aws/sqs.ts index 3ff51d7ca1..10a770dc00 100644 --- a/lambdas/functions/control-plane/src/aws/sqs.ts +++ b/lambdas/functions/control-plane/src/aws/sqs.ts @@ -18,8 +18,7 @@ export async function publishMessage(message: string, queueUrl: string, delayInS const messageCommand = new SendMessageCommand({ QueueUrl: queueUrl, MessageBody: message, - DelaySeconds: delayInSeconds, - MessageGroupId: queueUrl.endsWith('.fifo') ? '1' : undefined, + DelaySeconds: delayInSeconds }); try { diff --git a/lambdas/functions/webhook/src/ConfigLoader.test.ts b/lambdas/functions/webhook/src/ConfigLoader.test.ts index 4063630760..5b4c52983d 100644 --- a/lambdas/functions/webhook/src/ConfigLoader.test.ts +++ b/lambdas/functions/webhook/src/ConfigLoader.test.ts @@ -29,7 +29,6 @@ describe('ConfigLoader Tests', () => { { id: '1', arn: 'arn:aws:sqs:us-east-1:123456789012:queue1', - fifo: false, matcherConfig: { labelMatchers: [['label1', 'label2']], exactMatch: true, @@ -100,7 +99,6 @@ describe('ConfigLoader Tests', () => { { id: '1', arn: 'arn:aws:sqs:us-east-1:123456789012:queue1', - fifo: false, matcherConfig: { labelMatchers: [['label1', 'label2']], exactMatch: true, @@ -131,7 +129,6 @@ describe('ConfigLoader Tests', () => { { id: '1', arn: 'arn:aws:sqs:us-east-1:123456789012:queue1', - fifo: false, matcherConfig: { labelMatchers: [['label1', 'label2']], exactMatch: true, @@ -211,7 +208,6 @@ describe('ConfigLoader Tests', () => { const matcherConfig: RunnerMatcherConfig[] = [ { arn: 'arn:aws:sqs:eu-central-1:123456:npalm-default-queued-builds', - fifo: true, id: 'https://sqs.eu-central-1.amazonaws.com/123456/npalm-default-queued-builds', matcherConfig: { exactMatch: true, @@ -248,7 +244,6 @@ describe('ConfigLoader Tests', () => { const matcherConfig: RunnerMatcherConfig[] = [ { arn: 'arn:aws:sqs:eu-central-1:123456:npalm-default-queued-builds', - fifo: true, id: 'https://sqs.eu-central-1.amazonaws.com/123456/npalm-default-queued-builds', matcherConfig: { exactMatch: true, diff --git a/lambdas/functions/webhook/src/runners/dispatch.test.ts b/lambdas/functions/webhook/src/runners/dispatch.test.ts index 902022c410..3b6107ca79 100644 --- a/lambdas/functions/webhook/src/runners/dispatch.test.ts +++ b/lambdas/functions/webhook/src/runners/dispatch.test.ts @@ -101,7 +101,6 @@ describe('Dispatcher', () => { eventType: 'workflow_job', installationId: 0, queueId: runnerConfig[0].id, - queueFifo: false, repoOwnerType: 'Organization', }); }); @@ -149,7 +148,6 @@ describe('Dispatcher', () => { eventType: 'workflow_job', installationId: 0, queueId: 'match', - queueFifo: false, repoOwnerType: 'Organization', }); }); diff --git a/lambdas/functions/webhook/src/runners/dispatch.ts b/lambdas/functions/webhook/src/runners/dispatch.ts index d628f4c846..a7a3a0307c 100644 --- a/lambdas/functions/webhook/src/runners/dispatch.ts +++ b/lambdas/functions/webhook/src/runners/dispatch.ts @@ -44,7 +44,6 @@ async function handleWorkflowJob( eventType: githubEvent, installationId: body.installation?.id ?? 0, queueId: queue.id, - queueFifo: queue.fifo, repoOwnerType: body.repository.owner.type, }); logger.info(`Successfully dispatched job for ${body.repository.full_name} to the queue ${queue.id}`); From b85609a86698c06de421b0e90d52d4ae98daf3ba Mon Sep 17 00:00:00 2001 From: Niek Palm Date: Fri, 20 Dec 2024 10:20:12 +0100 Subject: [PATCH 03/11] fix: Remove fifo queues --- lambdas/functions/webhook/src/sqs/index.test.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/lambdas/functions/webhook/src/sqs/index.test.ts b/lambdas/functions/webhook/src/sqs/index.test.ts index 131de6eb7d..49b92e8559 100644 --- a/lambdas/functions/webhook/src/sqs/index.test.ts +++ b/lambdas/functions/webhook/src/sqs/index.test.ts @@ -42,5 +42,4 @@ describe('Test sending message to SQS.', () => { expect(mockSQS.sendMessage).toHaveBeenCalledWith(sqsMessage); await expect(result).resolves.not.toThrow(); }); - }); From bfc4e79e360dea456703862ebe74ebd0b0f7d345 Mon Sep 17 00:00:00 2001 From: Niek Palm Date: Fri, 20 Dec 2024 10:20:52 +0100 Subject: [PATCH 04/11] docs: auto update terraform docs --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 39bc16336a..5e35df4609 100644 --- a/README.md +++ b/README.md @@ -139,7 +139,7 @@ Talk to the forestkeepers in the `runners-channel` on Slack. | [enable\_cloudwatch\_agent](#input\_enable\_cloudwatch\_agent) | Enables the cloudwatch agent on the ec2 runner instances. The runner uses a default config that can be overridden via `cloudwatch_config`. | `bool` | `true` | no | | [enable\_ephemeral\_runners](#input\_enable\_ephemeral\_runners) | Enable ephemeral runners, runners will only be used once. | `bool` | `false` | no | | [enable\_event\_rule\_binaries\_syncer](#input\_enable\_event\_rule\_binaries\_syncer) | DEPRECATED: Replaced by `state_event_rule_binaries_syncer`. | `bool` | `null` | no | -| [enable\_fifo\_build\_queue](#input\_enable\_fifo\_build\_queue) | Enable a FIFO queue to keep the order of events received by the webhook. Recommended for repo level runners. | `bool` | `false` | no | +| [enable\_fifo\_build\_queue](#input\_enable\_fifo\_build\_queue) | (FEATURE REMOVED) Enable a FIFO queue to keep the order of events received by the webhook. Recommended for repo level runners. | `bool` | `false` | no | | [enable\_jit\_config](#input\_enable\_jit\_config) | Overwrite the default behavior for JIT configuration. By default JIT configuration is enabled for ephemeral runners and disabled for non-ephemeral runners. In case of GHES check first if the JIT config API is avaialbe. In case you upgradeing from 3.x to 4.x you can set `enable_jit_config` to `false` to avoid a breaking change when having your own AMI. | `bool` | `null` | no | | [enable\_job\_queued\_check](#input\_enable\_job\_queued\_check) | Only scale if the job event received by the scale up lambda is in the queued state. By default enabled for non ephemeral runners and disabled for ephemeral. Set this variable to overwrite the default behavior. | `bool` | `null` | no | | [enable\_managed\_runner\_security\_group](#input\_enable\_managed\_runner\_security\_group) | Enables creation of the default managed security group. Unmanaged security groups can be specified via `runner_additional_security_group_ids`. | `bool` | `true` | no | From 1371153891a8d8f8edc874c8ee845939eef20e03 Mon Sep 17 00:00:00 2001 From: Niek Palm Date: Fri, 16 Aug 2024 13:08:52 +0200 Subject: [PATCH 05/11] fix: Remove fifo queues --- examples/default/main.tf | 3 --- main.tf | 13 +++++-------- variables.tf | 11 ----------- 3 files changed, 5 insertions(+), 22 deletions(-) diff --git a/examples/default/main.tf b/examples/default/main.tf index 189116b8c9..f45196d7b6 100644 --- a/examples/default/main.tf +++ b/examples/default/main.tf @@ -84,9 +84,6 @@ module "runners" { delay_webhook_event = 5 runners_maximum_count = 2 - # set up a fifo queue to remain order - enable_fifo_build_queue = true - # override scaling down scale_down_schedule_expression = "cron(* * * * ? *)" diff --git a/main.tf b/main.tf index ce53b3746a..259d603b68 100644 --- a/main.tf +++ b/main.tf @@ -53,13 +53,11 @@ resource "aws_sqs_queue_policy" "build_queue_policy" { } resource "aws_sqs_queue" "queued_builds" { - name = "${var.prefix}-queued-builds" - delay_seconds = var.delay_webhook_event - visibility_timeout_seconds = var.runners_scale_up_lambda_timeout - message_retention_seconds = var.job_queue_retention_in_seconds - fifo_queue = var.enable_fifo_build_queue - receive_wait_time_seconds = 0 - content_based_deduplication = var.enable_fifo_build_queue + name = "${var.prefix}-queued-builds" + delay_seconds = var.delay_webhook_event + visibility_timeout_seconds = var.runners_scale_up_lambda_timeout + message_retention_seconds = var.job_queue_retention_in_seconds + receive_wait_time_seconds = 0 redrive_policy = var.redrive_build_queue.enabled ? jsonencode({ deadLetterTargetArn = aws_sqs_queue.queued_builds_dlq[0].arn, maxReceiveCount = var.redrive_build_queue.maxReceiveCount @@ -85,7 +83,6 @@ resource "aws_sqs_queue" "queued_builds_dlq" { sqs_managed_sse_enabled = var.queue_encryption.sqs_managed_sse_enabled kms_master_key_id = var.queue_encryption.kms_master_key_id kms_data_key_reuse_period_seconds = var.queue_encryption.kms_data_key_reuse_period_seconds - fifo_queue = var.enable_fifo_build_queue tags = var.tags } diff --git a/variables.tf b/variables.tf index de1cba7956..36b67a20a0 100644 --- a/variables.tf +++ b/variables.tf @@ -640,17 +640,6 @@ variable "lambda_principals" { default = [] } -variable "enable_fifo_build_queue" { - description = "(FEATURE REMOVED) Enable a FIFO queue to keep the order of events received by the webhook. Recommended for repo level runners." - type = bool - default = false - - validation { - condition = var.enable_fifo_build_queue == false - error_message = "The feature for FIFO build queue is not supported anymore." - } -} - variable "redrive_build_queue" { description = "Set options to attach (optional) a dead letter queue to the build queue, the queue between the webhook and the scale up lambda. You have the following options. 1. Disable by setting `enabled` to false. 2. Enable by setting `enabled` to `true`, `maxReceiveCount` to a number of max retries." type = object({ From ebfa2595a7d5f22857ef8e4afa0652045bab0952 Mon Sep 17 00:00:00 2001 From: forest-pr|bot Date: Fri, 16 Aug 2024 11:09:29 +0000 Subject: [PATCH 06/11] docs: auto update terraform docs --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 5e35df4609..b4db9efb22 100644 --- a/README.md +++ b/README.md @@ -139,7 +139,6 @@ Talk to the forestkeepers in the `runners-channel` on Slack. | [enable\_cloudwatch\_agent](#input\_enable\_cloudwatch\_agent) | Enables the cloudwatch agent on the ec2 runner instances. The runner uses a default config that can be overridden via `cloudwatch_config`. | `bool` | `true` | no | | [enable\_ephemeral\_runners](#input\_enable\_ephemeral\_runners) | Enable ephemeral runners, runners will only be used once. | `bool` | `false` | no | | [enable\_event\_rule\_binaries\_syncer](#input\_enable\_event\_rule\_binaries\_syncer) | DEPRECATED: Replaced by `state_event_rule_binaries_syncer`. | `bool` | `null` | no | -| [enable\_fifo\_build\_queue](#input\_enable\_fifo\_build\_queue) | (FEATURE REMOVED) Enable a FIFO queue to keep the order of events received by the webhook. Recommended for repo level runners. | `bool` | `false` | no | | [enable\_jit\_config](#input\_enable\_jit\_config) | Overwrite the default behavior for JIT configuration. By default JIT configuration is enabled for ephemeral runners and disabled for non-ephemeral runners. In case of GHES check first if the JIT config API is avaialbe. In case you upgradeing from 3.x to 4.x you can set `enable_jit_config` to `false` to avoid a breaking change when having your own AMI. | `bool` | `null` | no | | [enable\_job\_queued\_check](#input\_enable\_job\_queued\_check) | Only scale if the job event received by the scale up lambda is in the queued state. By default enabled for non ephemeral runners and disabled for ephemeral. Set this variable to overwrite the default behavior. | `bool` | `null` | no | | [enable\_managed\_runner\_security\_group](#input\_enable\_managed\_runner\_security\_group) | Enables creation of the default managed security group. Unmanaged security groups can be specified via `runner_additional_security_group_ids`. | `bool` | `true` | no | From ca4f1a13778792a82381dd823280d5012ef244b6 Mon Sep 17 00:00:00 2001 From: Niek Palm Date: Fri, 20 Dec 2024 10:26:59 +0100 Subject: [PATCH 07/11] formatting and fixing tests --- .../functions/control-plane/src/aws/sqs.test.ts | 17 ----------------- lambdas/functions/control-plane/src/aws/sqs.ts | 2 +- 2 files changed, 1 insertion(+), 18 deletions(-) diff --git a/lambdas/functions/control-plane/src/aws/sqs.test.ts b/lambdas/functions/control-plane/src/aws/sqs.test.ts index d75d4e0cf5..7a5a7ca6d9 100644 --- a/lambdas/functions/control-plane/src/aws/sqs.test.ts +++ b/lambdas/functions/control-plane/src/aws/sqs.test.ts @@ -27,23 +27,6 @@ describe('Publish message to SQS', () => { }); }); - it('should publish message to SQS Fifo queue', async () => { - // setup - mockSQSClient.on(SendMessageCommand).resolves({ - MessageId: '123', - }); - - // act - await publishMessage('test', 'https://sqs.eu-west-1.amazonaws.com/123456789/queued-builds'); - - // assert - expect(mockSQSClient).toHaveReceivedCommandWith(SendMessageCommand, { - QueueUrl: 'https://sqs.eu-west-1.amazonaws.com/123456789/queued-builds', - MessageBody: 'test', - MessageGroupId: '1', // Fifo queue - }); - }); - it('should log error if queue URL not found', async () => { // setup const logErrorSpy = jest.spyOn(logger, 'error'); diff --git a/lambdas/functions/control-plane/src/aws/sqs.ts b/lambdas/functions/control-plane/src/aws/sqs.ts index 10a770dc00..7ddfd94217 100644 --- a/lambdas/functions/control-plane/src/aws/sqs.ts +++ b/lambdas/functions/control-plane/src/aws/sqs.ts @@ -18,7 +18,7 @@ export async function publishMessage(message: string, queueUrl: string, delayInS const messageCommand = new SendMessageCommand({ QueueUrl: queueUrl, MessageBody: message, - DelaySeconds: delayInSeconds + DelaySeconds: delayInSeconds, }); try { From 157889f9df041dc0cd6dd79e5418dcb37ada6b1c Mon Sep 17 00:00:00 2001 From: Niek Palm Date: Fri, 20 Dec 2024 10:29:37 +0100 Subject: [PATCH 08/11] clean docs from fifo --- modules/multi-runner/notes.md | 1 + modules/multi-runner/variables.tf | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 modules/multi-runner/notes.md diff --git a/modules/multi-runner/notes.md b/modules/multi-runner/notes.md new file mode 100644 index 0000000000..3d34f71282 --- /dev/null +++ b/modules/multi-runner/notes.md @@ -0,0 +1 @@ +enable_workflow_job_events_queue diff --git a/modules/multi-runner/variables.tf b/modules/multi-runner/variables.tf index 7cc4568b3a..8185fe95ae 100644 --- a/modules/multi-runner/variables.tf +++ b/modules/multi-runner/variables.tf @@ -194,7 +194,6 @@ variable "multi_runner_config" { exactMatch: "If set to true all labels in the workflow job must match the GitHub labels (os, architecture and `self-hosted`). When false if __any__ workflow label matches it will trigger the webhook." priority: "If set it defines the priority of the matcher, the matcher with the lowest priority will be evaluated first. Default is 999, allowed values 0-999." } - fifo: "Enable a FIFO queue to remain the order of events received by the webhook. Suggest to set to true for repo level runners." redrive_build_queue: "Set options to attach (optional) a dead letter queue to the build queue, the queue between the webhook and the scale up lambda. You have the following options. 1. Disable by setting `enabled` to false. 2. Enable by setting `enabled` to `true`, `maxReceiveCount` to a number of max retries." } EOT From adf77a3b542294de99ac5a2837d2231185873446 Mon Sep 17 00:00:00 2001 From: philips-labs-pr|bot Date: Fri, 20 Dec 2024 09:30:16 +0000 Subject: [PATCH 09/11] docs: auto update terraform docs --- modules/multi-runner/README.md | 2 +- modules/webhook/README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/multi-runner/README.md b/modules/multi-runner/README.md index 625e865117..a1be05889d 100644 --- a/modules/multi-runner/README.md +++ b/modules/multi-runner/README.md @@ -150,7 +150,7 @@ module "multi-runner" { | [matcher\_config\_parameter\_store\_tier](#input\_matcher\_config\_parameter\_store\_tier) | The tier of the parameter store for the matcher configuration. Valid values are `Standard`, and `Advanced`. | `string` | `"Standard"` | no | | [metrics](#input\_metrics) | Configuration for metrics created by the module, by default metrics are disabled to avoid additional costs. When metrics are enable all metrics are created unless explicit configured otherwise. |
object({
enable = optional(bool, false)
namespace = optional(string, "GitHub Runners")
metric = optional(object({
enable_github_app_rate_limit = optional(bool, true)
enable_job_retry = optional(bool, true)
enable_spot_termination_warning = optional(bool, true)
}), {})
})
| `{}` | no | | [metrics\_namespace](#input\_metrics\_namespace) | The namespace for the metrics created by the module. Merics will only be created if explicit enabled. | `string` | `null` | no | -| [multi\_runner\_config](#input\_multi\_runner\_config) | multi\_runner\_config = {
runner\_config: {
runner\_os: "The EC2 Operating System type to use for action runner instances (linux,windows)."
runner\_architecture: "The platform architecture of the runner instance\_type."
runner\_metadata\_options: "(Optional) Metadata options for the ec2 runner instances."
ami\_filter: "(Optional) List of maps used to create the AMI filter for the action runner AMI. By default amazon linux 2 is used."
ami\_owners: "(Optional) The list of owners used to select the AMI of action runner instances."
create\_service\_linked\_role\_spot: (Optional) create the serviced linked role for spot instances that is required by the scale-up lambda.
credit\_specification: "(Optional) The credit specification of the runner instance\_type. Can be unset, `standard` or `unlimited`.
delay\_webhook\_event: "The number of seconds the event accepted by the webhook is invisible on the queue before the scale up lambda will receive the event."
disable\_runner\_autoupdate: "Disable the auto update of the github runner agent. Be aware there is a grace period of 30 days, see also the [GitHub article](https://github.blog/changelog/2022-02-01-github-actions-self-hosted-runners-can-now-disable-automatic-updates/)"
ebs\_optimized: "The EC2 EBS optimized configuration."
enable\_ephemeral\_runners: "Enable ephemeral runners, runners will only be used once."
enable\_job\_queued\_check: "Enables JIT configuration for creating runners instead of registration token based registraton. JIT configuration will only be applied for ephemeral runners. By default JIT confiugration is enabled for ephemeral runners an can be disabled via this override. When running on GHES without support for JIT configuration this variable should be set to true for ephemeral runners."
enable\_on\_demand\_failover\_for\_errors: "Enable on-demand failover. For example to fall back to on demand when no spot capacity is available the variable can be set to `InsufficientInstanceCapacity`. When not defined the default behavior is to retry later."
enable\_organization\_runners: "Register runners to organization, instead of repo level"
enable\_runner\_binaries\_syncer: "Option to disable the lambda to sync GitHub runner distribution, useful when using a pre-build AMI."
enable\_ssm\_on\_runners: "Enable to allow access the runner instances for debugging purposes via SSM. Note that this adds additional permissions to the runner instances."
enable\_userdata: "Should the userdata script be enabled for the runner. Set this to false if you are using your own prebuilt AMI."
instance\_allocation\_strategy: "The allocation strategy for spot instances. AWS recommends to use `capacity-optimized` however the AWS default is `lowest-price`."
instance\_max\_spot\_price: "Max price price for spot intances per hour. This variable will be passed to the create fleet as max spot price for the fleet."
instance\_target\_capacity\_type: "Default lifecycle used for runner instances, can be either `spot` or `on-demand`."
instance\_types: "List of instance types for the action runner. Defaults are based on runner\_os (al2023 for linux and Windows Server Core for win)."
job\_queue\_retention\_in\_seconds: "The number of seconds the job is held in the queue before it is purged"
minimum\_running\_time\_in\_minutes: "The time an ec2 action runner should be running at minimum before terminated if not busy."
pool\_runner\_owner: "The pool will deploy runners to the GitHub org ID, set this value to the org to which you want the runners deployed. Repo level is not supported."
runner\_additional\_security\_group\_ids: "List of additional security groups IDs to apply to the runner. If added outside the multi\_runner\_config block, the additional security group(s) will be applied to all runner configs. If added inside the multi\_runner\_config, the additional security group(s) will be applied to the individual runner."
runner\_as\_root: "Run the action runner under the root user. Variable `runner_run_as` will be ignored."
runner\_boot\_time\_in\_minutes: "The minimum time for an EC2 runner to boot and register as a runner."
runner\_disable\_default\_labels: "Disable default labels for the runners (os, architecture and `self-hosted`). If enabled, the runner will only have the extra labels provided in `runner_extra_labels`. In case you on own start script is used, this configuration parameter needs to be parsed via SSM."
runner\_extra\_labels: "Extra (custom) labels for the runners (GitHub). Separate each label by a comma. Labels checks on the webhook can be enforced by setting `multi_runner_config.matcherConfig.exactMatch`. GitHub read-only labels should not be provided."
runner\_group\_name: "Name of the runner group."
runner\_name\_prefix: "Prefix for the GitHub runner name."
runner\_run\_as: "Run the GitHub actions agent as user."
runners\_maximum\_count: "The maximum number of runners that will be created. Setting the variable to `-1` desiables the maximum check."
scale\_down\_schedule\_expression: "Scheduler expression to check every x for scale down."
scale\_up\_reserved\_concurrent\_executions: "Amount of reserved concurrent executions for the scale-up lambda function. A value of 0 disables lambda from being triggered and -1 removes any concurrency limitations."
userdata\_template: "Alternative user-data template, replacing the default template. By providing your own user\_data you have to take care of installing all required software, including the action runner. Variables userdata\_pre/post\_install are ignored."
enable\_jit\_config "Overwrite the default behavior for JIT configuration. By default JIT configuration is enabled for ephemeral runners and disabled for non-ephemeral runners. In case of GHES check first if the JIT config API is avaialbe. In case you upgradeing from 3.x to 4.x you can set `enable_jit_config` to `false` to avoid a breaking change when having your own AMI."
enable\_runner\_detailed\_monitoring: "Should detailed monitoring be enabled for the runner. Set this to true if you want to use detailed monitoring. See https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-cloudwatch-new.html for details."
enable\_cloudwatch\_agent: "Enabling the cloudwatch agent on the ec2 runner instances, the runner contains default config. Configuration can be overridden via `cloudwatch_config`."
cloudwatch\_config: "(optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details."
userdata\_pre\_install: "Script to be ran before the GitHub Actions runner is installed on the EC2 instances"
userdata\_post\_install: "Script to be ran after the GitHub Actions runner is installed on the EC2 instances"
runner\_ec2\_tags: "Map of tags that will be added to the launch template instance tag specifications."
runner\_iam\_role\_managed\_policy\_arns: "Attach AWS or customer-managed IAM policies (by ARN) to the runner IAM role"
vpc\_id: "The VPC for security groups of the action runners. If not set uses the value of `var.vpc_id`."
subnet\_ids: "List of subnets in which the action runners will be launched, the subnets needs to be subnets in the `vpc_id`. If not set, uses the value of `var.subnet_ids`."
idle\_config: "List of time period that can be defined as cron expression to keep a minimum amount of runners active instead of scaling down to 0. By defining this list you can ensure that in time periods that match the cron expression within 5 seconds a runner is kept idle."
runner\_log\_files: "(optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details."
block\_device\_mappings: "The EC2 instance block device configuration. Takes the following keys: `device_name`, `delete_on_termination`, `volume_type`, `volume_size`, `encrypted`, `iops`, `throughput`, `kms_key_id`, `snapshot_id`."
job\_retry: "Experimental! Can be removed / changed without trigger a major release. Configure job retries. The configuration enables job retries (for ephemeral runners). After creating the insances a message will be published to a job retry queue. The job retry check lambda is checking after a delay if the job is queued. If not the message will be published again on the scale-up (build queue). Using this feature can impact the reate limit of the GitHub app."
pool\_config: "The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for week days to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. Use `schedule_expression_timezone` to override the schedule time zone (defaults to UTC)."
}
matcherConfig: {
labelMatchers: "The list of list of labels supported by the runner configuration. `[[self-hosted, linux, x64, example]]`"
exactMatch: "If set to true all labels in the workflow job must match the GitHub labels (os, architecture and `self-hosted`). When false if __any__ workflow label matches it will trigger the webhook."
priority: "If set it defines the priority of the matcher, the matcher with the lowest priority will be evaluated first. Default is 999, allowed values 0-999."
}
fifo: "Enable a FIFO queue to remain the order of events received by the webhook. Suggest to set to true for repo level runners."
redrive\_build\_queue: "Set options to attach (optional) a dead letter queue to the build queue, the queue between the webhook and the scale up lambda. You have the following options. 1. Disable by setting `enabled` to false. 2. Enable by setting `enabled` to `true`, `maxReceiveCount` to a number of max retries."
} |
map(object({
runner_config = object({
runner_os = string
runner_architecture = string
runner_metadata_options = optional(map(any), {
instance_metadata_tags = "enabled"
http_endpoint = "enabled"
http_tokens = "required"
http_put_response_hop_limit = 1
})
ami_filter = optional(map(list(string)), { state = ["available"] })
ami_owners = optional(list(string), ["amazon"])
ami_id_ssm_parameter_name = optional(string, null)
ami_kms_key_arn = optional(string, "")
create_service_linked_role_spot = optional(bool, false)
credit_specification = optional(string, null)
delay_webhook_event = optional(number, 30)
disable_runner_autoupdate = optional(bool, false)
ebs_optimized = optional(bool, false)
enable_ephemeral_runners = optional(bool, false)
enable_job_queued_check = optional(bool, null)
enable_on_demand_failover_for_errors = optional(list(string), [])
enable_organization_runners = optional(bool, false)
enable_runner_binaries_syncer = optional(bool, true)
enable_ssm_on_runners = optional(bool, false)
enable_userdata = optional(bool, true)
instance_allocation_strategy = optional(string, "lowest-price")
instance_max_spot_price = optional(string, null)
instance_target_capacity_type = optional(string, "spot")
instance_types = list(string)
job_queue_retention_in_seconds = optional(number, 86400)
minimum_running_time_in_minutes = optional(number, null)
pool_runner_owner = optional(string, null)
runner_as_root = optional(bool, false)
runner_boot_time_in_minutes = optional(number, 5)
runner_disable_default_labels = optional(bool, false)
runner_extra_labels = optional(list(string), [])
runner_group_name = optional(string, "Default")
runner_name_prefix = optional(string, "")
runner_run_as = optional(string, "ec2-user")
runners_maximum_count = number
runner_additional_security_group_ids = optional(list(string), [])
scale_down_schedule_expression = optional(string, "cron(*/5 * * * ? *)")
scale_up_reserved_concurrent_executions = optional(number, 1)
userdata_template = optional(string, null)
userdata_content = optional(string, null)
enable_jit_config = optional(bool, null)
enable_runner_detailed_monitoring = optional(bool, false)
enable_cloudwatch_agent = optional(bool, true)
cloudwatch_config = optional(string, null)
userdata_pre_install = optional(string, "")
userdata_post_install = optional(string, "")
runner_ec2_tags = optional(map(string), {})
runner_iam_role_managed_policy_arns = optional(list(string), [])
vpc_id = optional(string, null)
subnet_ids = optional(list(string), null)
idle_config = optional(list(object({
cron = string
timeZone = string
idleCount = number
evictionStrategy = optional(string, "oldest_first")
})), [])
runner_log_files = optional(list(object({
log_group_name = string
prefix_log_group = bool
file_path = string
log_stream_name = string
})), null)
block_device_mappings = optional(list(object({
delete_on_termination = optional(bool, true)
device_name = optional(string, "/dev/xvda")
encrypted = optional(bool, true)
iops = optional(number)
kms_key_id = optional(string)
snapshot_id = optional(string)
throughput = optional(number)
volume_size = number
volume_type = optional(string, "gp3")
})), [{
volume_size = 30
}])
pool_config = optional(list(object({
schedule_expression = string
schedule_expression_timezone = optional(string)
size = number
})), [])
job_retry = optional(object({
enable = optional(bool, false)
delay_in_seconds = optional(number, 300)
delay_backoff = optional(number, 2)
lambda_memory_size = optional(number, 256)
lambda_timeout = optional(number, 30)
max_attempts = optional(number, 1)
}), {})
})
matcherConfig = object({
labelMatchers = list(list(string))
exactMatch = optional(bool, false)
priority = optional(number, 999)
})
fifo = optional(bool, false)
redrive_build_queue = optional(object({
enabled = bool
maxReceiveCount = number
}), {
enabled = false
maxReceiveCount = null
})
}))
| n/a | yes | +| [multi\_runner\_config](#input\_multi\_runner\_config) | multi\_runner\_config = {
runner\_config: {
runner\_os: "The EC2 Operating System type to use for action runner instances (linux,windows)."
runner\_architecture: "The platform architecture of the runner instance\_type."
runner\_metadata\_options: "(Optional) Metadata options for the ec2 runner instances."
ami\_filter: "(Optional) List of maps used to create the AMI filter for the action runner AMI. By default amazon linux 2 is used."
ami\_owners: "(Optional) The list of owners used to select the AMI of action runner instances."
create\_service\_linked\_role\_spot: (Optional) create the serviced linked role for spot instances that is required by the scale-up lambda.
credit\_specification: "(Optional) The credit specification of the runner instance\_type. Can be unset, `standard` or `unlimited`.
delay\_webhook\_event: "The number of seconds the event accepted by the webhook is invisible on the queue before the scale up lambda will receive the event."
disable\_runner\_autoupdate: "Disable the auto update of the github runner agent. Be aware there is a grace period of 30 days, see also the [GitHub article](https://github.blog/changelog/2022-02-01-github-actions-self-hosted-runners-can-now-disable-automatic-updates/)"
ebs\_optimized: "The EC2 EBS optimized configuration."
enable\_ephemeral\_runners: "Enable ephemeral runners, runners will only be used once."
enable\_job\_queued\_check: "Enables JIT configuration for creating runners instead of registration token based registraton. JIT configuration will only be applied for ephemeral runners. By default JIT confiugration is enabled for ephemeral runners an can be disabled via this override. When running on GHES without support for JIT configuration this variable should be set to true for ephemeral runners."
enable\_on\_demand\_failover\_for\_errors: "Enable on-demand failover. For example to fall back to on demand when no spot capacity is available the variable can be set to `InsufficientInstanceCapacity`. When not defined the default behavior is to retry later."
enable\_organization\_runners: "Register runners to organization, instead of repo level"
enable\_runner\_binaries\_syncer: "Option to disable the lambda to sync GitHub runner distribution, useful when using a pre-build AMI."
enable\_ssm\_on\_runners: "Enable to allow access the runner instances for debugging purposes via SSM. Note that this adds additional permissions to the runner instances."
enable\_userdata: "Should the userdata script be enabled for the runner. Set this to false if you are using your own prebuilt AMI."
instance\_allocation\_strategy: "The allocation strategy for spot instances. AWS recommends to use `capacity-optimized` however the AWS default is `lowest-price`."
instance\_max\_spot\_price: "Max price price for spot intances per hour. This variable will be passed to the create fleet as max spot price for the fleet."
instance\_target\_capacity\_type: "Default lifecycle used for runner instances, can be either `spot` or `on-demand`."
instance\_types: "List of instance types for the action runner. Defaults are based on runner\_os (al2023 for linux and Windows Server Core for win)."
job\_queue\_retention\_in\_seconds: "The number of seconds the job is held in the queue before it is purged"
minimum\_running\_time\_in\_minutes: "The time an ec2 action runner should be running at minimum before terminated if not busy."
pool\_runner\_owner: "The pool will deploy runners to the GitHub org ID, set this value to the org to which you want the runners deployed. Repo level is not supported."
runner\_additional\_security\_group\_ids: "List of additional security groups IDs to apply to the runner. If added outside the multi\_runner\_config block, the additional security group(s) will be applied to all runner configs. If added inside the multi\_runner\_config, the additional security group(s) will be applied to the individual runner."
runner\_as\_root: "Run the action runner under the root user. Variable `runner_run_as` will be ignored."
runner\_boot\_time\_in\_minutes: "The minimum time for an EC2 runner to boot and register as a runner."
runner\_disable\_default\_labels: "Disable default labels for the runners (os, architecture and `self-hosted`). If enabled, the runner will only have the extra labels provided in `runner_extra_labels`. In case you on own start script is used, this configuration parameter needs to be parsed via SSM."
runner\_extra\_labels: "Extra (custom) labels for the runners (GitHub). Separate each label by a comma. Labels checks on the webhook can be enforced by setting `multi_runner_config.matcherConfig.exactMatch`. GitHub read-only labels should not be provided."
runner\_group\_name: "Name of the runner group."
runner\_name\_prefix: "Prefix for the GitHub runner name."
runner\_run\_as: "Run the GitHub actions agent as user."
runners\_maximum\_count: "The maximum number of runners that will be created. Setting the variable to `-1` desiables the maximum check."
scale\_down\_schedule\_expression: "Scheduler expression to check every x for scale down."
scale\_up\_reserved\_concurrent\_executions: "Amount of reserved concurrent executions for the scale-up lambda function. A value of 0 disables lambda from being triggered and -1 removes any concurrency limitations."
userdata\_template: "Alternative user-data template, replacing the default template. By providing your own user\_data you have to take care of installing all required software, including the action runner. Variables userdata\_pre/post\_install are ignored."
enable\_jit\_config "Overwrite the default behavior for JIT configuration. By default JIT configuration is enabled for ephemeral runners and disabled for non-ephemeral runners. In case of GHES check first if the JIT config API is avaialbe. In case you upgradeing from 3.x to 4.x you can set `enable_jit_config` to `false` to avoid a breaking change when having your own AMI."
enable\_runner\_detailed\_monitoring: "Should detailed monitoring be enabled for the runner. Set this to true if you want to use detailed monitoring. See https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-cloudwatch-new.html for details."
enable\_cloudwatch\_agent: "Enabling the cloudwatch agent on the ec2 runner instances, the runner contains default config. Configuration can be overridden via `cloudwatch_config`."
cloudwatch\_config: "(optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details."
userdata\_pre\_install: "Script to be ran before the GitHub Actions runner is installed on the EC2 instances"
userdata\_post\_install: "Script to be ran after the GitHub Actions runner is installed on the EC2 instances"
runner\_ec2\_tags: "Map of tags that will be added to the launch template instance tag specifications."
runner\_iam\_role\_managed\_policy\_arns: "Attach AWS or customer-managed IAM policies (by ARN) to the runner IAM role"
vpc\_id: "The VPC for security groups of the action runners. If not set uses the value of `var.vpc_id`."
subnet\_ids: "List of subnets in which the action runners will be launched, the subnets needs to be subnets in the `vpc_id`. If not set, uses the value of `var.subnet_ids`."
idle\_config: "List of time period that can be defined as cron expression to keep a minimum amount of runners active instead of scaling down to 0. By defining this list you can ensure that in time periods that match the cron expression within 5 seconds a runner is kept idle."
runner\_log\_files: "(optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details."
block\_device\_mappings: "The EC2 instance block device configuration. Takes the following keys: `device_name`, `delete_on_termination`, `volume_type`, `volume_size`, `encrypted`, `iops`, `throughput`, `kms_key_id`, `snapshot_id`."
job\_retry: "Experimental! Can be removed / changed without trigger a major release. Configure job retries. The configuration enables job retries (for ephemeral runners). After creating the insances a message will be published to a job retry queue. The job retry check lambda is checking after a delay if the job is queued. If not the message will be published again on the scale-up (build queue). Using this feature can impact the reate limit of the GitHub app."
pool\_config: "The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for week days to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. Use `schedule_expression_timezone` to override the schedule time zone (defaults to UTC)."
}
matcherConfig: {
labelMatchers: "The list of list of labels supported by the runner configuration. `[[self-hosted, linux, x64, example]]`"
exactMatch: "If set to true all labels in the workflow job must match the GitHub labels (os, architecture and `self-hosted`). When false if __any__ workflow label matches it will trigger the webhook."
priority: "If set it defines the priority of the matcher, the matcher with the lowest priority will be evaluated first. Default is 999, allowed values 0-999."
}
redrive\_build\_queue: "Set options to attach (optional) a dead letter queue to the build queue, the queue between the webhook and the scale up lambda. You have the following options. 1. Disable by setting `enabled` to false. 2. Enable by setting `enabled` to `true`, `maxReceiveCount` to a number of max retries."
} |
map(object({
runner_config = object({
runner_os = string
runner_architecture = string
runner_metadata_options = optional(map(any), {
instance_metadata_tags = "enabled"
http_endpoint = "enabled"
http_tokens = "required"
http_put_response_hop_limit = 1
})
ami_filter = optional(map(list(string)), { state = ["available"] })
ami_owners = optional(list(string), ["amazon"])
ami_id_ssm_parameter_name = optional(string, null)
ami_kms_key_arn = optional(string, "")
create_service_linked_role_spot = optional(bool, false)
credit_specification = optional(string, null)
delay_webhook_event = optional(number, 30)
disable_runner_autoupdate = optional(bool, false)
ebs_optimized = optional(bool, false)
enable_ephemeral_runners = optional(bool, false)
enable_job_queued_check = optional(bool, null)
enable_on_demand_failover_for_errors = optional(list(string), [])
enable_organization_runners = optional(bool, false)
enable_runner_binaries_syncer = optional(bool, true)
enable_ssm_on_runners = optional(bool, false)
enable_userdata = optional(bool, true)
instance_allocation_strategy = optional(string, "lowest-price")
instance_max_spot_price = optional(string, null)
instance_target_capacity_type = optional(string, "spot")
instance_types = list(string)
job_queue_retention_in_seconds = optional(number, 86400)
minimum_running_time_in_minutes = optional(number, null)
pool_runner_owner = optional(string, null)
runner_as_root = optional(bool, false)
runner_boot_time_in_minutes = optional(number, 5)
runner_disable_default_labels = optional(bool, false)
runner_extra_labels = optional(list(string), [])
runner_group_name = optional(string, "Default")
runner_name_prefix = optional(string, "")
runner_run_as = optional(string, "ec2-user")
runners_maximum_count = number
runner_additional_security_group_ids = optional(list(string), [])
scale_down_schedule_expression = optional(string, "cron(*/5 * * * ? *)")
scale_up_reserved_concurrent_executions = optional(number, 1)
userdata_template = optional(string, null)
userdata_content = optional(string, null)
enable_jit_config = optional(bool, null)
enable_runner_detailed_monitoring = optional(bool, false)
enable_cloudwatch_agent = optional(bool, true)
cloudwatch_config = optional(string, null)
userdata_pre_install = optional(string, "")
userdata_post_install = optional(string, "")
runner_ec2_tags = optional(map(string), {})
runner_iam_role_managed_policy_arns = optional(list(string), [])
vpc_id = optional(string, null)
subnet_ids = optional(list(string), null)
idle_config = optional(list(object({
cron = string
timeZone = string
idleCount = number
evictionStrategy = optional(string, "oldest_first")
})), [])
runner_log_files = optional(list(object({
log_group_name = string
prefix_log_group = bool
file_path = string
log_stream_name = string
})), null)
block_device_mappings = optional(list(object({
delete_on_termination = optional(bool, true)
device_name = optional(string, "/dev/xvda")
encrypted = optional(bool, true)
iops = optional(number)
kms_key_id = optional(string)
snapshot_id = optional(string)
throughput = optional(number)
volume_size = number
volume_type = optional(string, "gp3")
})), [{
volume_size = 30
}])
pool_config = optional(list(object({
schedule_expression = string
schedule_expression_timezone = optional(string)
size = number
})), [])
job_retry = optional(object({
enable = optional(bool, false)
delay_in_seconds = optional(number, 300)
delay_backoff = optional(number, 2)
lambda_memory_size = optional(number, 256)
lambda_timeout = optional(number, 30)
max_attempts = optional(number, 1)
}), {})
})
matcherConfig = object({
labelMatchers = list(list(string))
exactMatch = optional(bool, false)
priority = optional(number, 999)
})
redrive_build_queue = optional(object({
enabled = bool
maxReceiveCount = number
}), {
enabled = false
maxReceiveCount = null
})
}))
| n/a | yes | | [pool\_lambda\_reserved\_concurrent\_executions](#input\_pool\_lambda\_reserved\_concurrent\_executions) | Amount of reserved concurrent executions for the scale-up lambda function. A value of 0 disables lambda from being triggered and -1 removes any concurrency limitations. | `number` | `1` | no | | [pool\_lambda\_timeout](#input\_pool\_lambda\_timeout) | Time out for the pool lambda in seconds. | `number` | `60` | no | | [prefix](#input\_prefix) | The prefix used for naming resources | `string` | `"github-actions"` | no | diff --git a/modules/webhook/README.md b/modules/webhook/README.md index c066cc51c9..72b2ee4687 100644 --- a/modules/webhook/README.md +++ b/modules/webhook/README.md @@ -87,7 +87,7 @@ yarn run dist | [repository\_white\_list](#input\_repository\_white\_list) | List of github repository full names (owner/repo\_name) that will be allowed to use the github app. Leave empty for no filtering. | `list(string)` | `[]` | no | | [role\_path](#input\_role\_path) | The path that will be added to the role; if not set, the environment name will be used. | `string` | `null` | no | | [role\_permissions\_boundary](#input\_role\_permissions\_boundary) | Permissions boundary that will be added to the created role for the lambda. | `string` | `null` | no | -| [runner\_matcher\_config](#input\_runner\_matcher\_config) | SQS queue to publish accepted build events based on the runner type. When exact match is disabled the webhook accepts the event if one of the workflow job labels is part of the matcher. The priority defines the order the matchers are applied. |
map(object({
arn = string
id = string
fifo = bool
matcherConfig = object({
labelMatchers = list(list(string))
exactMatch = bool
priority = optional(number, 999)
})
}))
| n/a | yes | +| [runner\_matcher\_config](#input\_runner\_matcher\_config) | SQS queue to publish accepted build events based on the runner type. When exact match is disabled the webhook accepts the event if one of the workflow job labels is part of the matcher. The priority defines the order the matchers are applied. |
map(object({
arn = string
id = string
matcherConfig = object({
labelMatchers = list(list(string))
exactMatch = bool
priority = optional(number, 999)
})
}))
| n/a | yes | | [ssm\_paths](#input\_ssm\_paths) | The root path used in SSM to store configuration and secrets. |
object({
root = string
webhook = string
})
| n/a | yes | | [tags](#input\_tags) | Map of tags that will be added to created resources. By default resources will be tagged with name and environment. | `map(string)` | `{}` | no | | [tracing\_config](#input\_tracing\_config) | Configuration for lambda tracing. |
object({
mode = optional(string, null)
capture_http_requests = optional(bool, false)
capture_error = optional(bool, false)
})
| `{}` | no | From 365661e191634e2c1e9f92ec14a5a3d6feae12d7 Mon Sep 17 00:00:00 2001 From: Niek Palm Date: Fri, 20 Dec 2024 10:33:15 +0100 Subject: [PATCH 10/11] cleanup --- docs/configuration.md | 1 - lambdas/functions/webhook/src/sqs/index.test.ts | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index 94c071eafb..bbd3d836d0 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -120,7 +120,6 @@ You can configure runners to be ephemeral, in which case runners will be used on - The scale down lambda is still active, and should only remove orphan instances. But there is no strict check in place. So ensure you configure the `minimum_running_time_in_minutes` to a value that is high enough to get your runner booted and connected to avoid it being terminated before executing a job. - The messages sent from the webhook lambda to the scale-up lambda are by default delayed by SQS, to give available runners a chance to start the job before the decision is made to scale more runners. For ephemeral runners there is no need to wait. Set `delay_webhook_event` to `0`. - All events in the queue will lead to a new runner created by the lambda. By setting `enable_job_queued_check` to `true` you can enforce a rule of only creating a runner if the event has a correlated queued job. Setting this can avoid creating useless runners. For example, a job getting cancelled before a runner was created or if the job was already picked up by another runner. We suggest using this in combination with a pool. -- To ensure runners are created in the same order GitHub sends the events, by default we use a FIFO queue. This is mainly relevant for repo level runners. For ephemeral runners you can set `enable_fifo_build_queue` to `false`. - Errors related to scaling should be retried via SQS. You can configure `job_queue_retention_in_seconds` and `redrive_build_queue` to tune the behavior. We have no mechanism to avoid events never being processed, which means potentially no runner gets created and the job in GitHub times out in 6 hours. The example for [ephemeral runners](examples/ephemeral.md) is based on the [default example](examples/default.md). Have look at the diff to see the major configuration differences. diff --git a/lambdas/functions/webhook/src/sqs/index.test.ts b/lambdas/functions/webhook/src/sqs/index.test.ts index 49b92e8559..5b7c445407 100644 --- a/lambdas/functions/webhook/src/sqs/index.test.ts +++ b/lambdas/functions/webhook/src/sqs/index.test.ts @@ -1,5 +1,5 @@ import { SendMessageCommandInput } from '@aws-sdk/client-sqs'; -import { ActionRequestMessage, sendActionRequest } from '.'; +import { sendActionRequest } from '.'; const mockSQS = { sendMessage: jest.fn(() => { From c50690790ac3644801745dbff3090b03cceda558 Mon Sep 17 00:00:00 2001 From: philips-labs-pr|bot Date: Fri, 20 Dec 2024 13:13:03 +0000 Subject: [PATCH 11/11] docs: auto update terraform docs --- README.md | 1 - modules/multi-runner/README.md | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 6bff44f97e..4c793b6686 100644 --- a/README.md +++ b/README.md @@ -224,7 +224,6 @@ Talk to the forestkeepers in the `runners-channel` on Slack. | [runners\_maximum\_count](#input\_runners\_maximum\_count) | The maximum number of runners that will be created. | `number` | `3` | no | | [runners\_scale\_down\_lambda\_memory\_size](#input\_runners\_scale\_down\_lambda\_memory\_size) | Memory size limit in MB for scale-down lambda. | `number` | `512` | no | | [runners\_scale\_down\_lambda\_timeout](#input\_runners\_scale\_down\_lambda\_timeout) | Time out for the scale down lambda in seconds. | `number` | `60` | no | -| [runners\_scale\_up\_Lambda\_memory\_size](#input\_runners\_scale\_up\_Lambda\_memory\_size) | Memory size limit in MB for scale-up lambda. | `number` | `null` | no | | [runners\_scale\_up\_lambda\_memory\_size](#input\_runners\_scale\_up\_lambda\_memory\_size) | Memory size limit in MB for scale-up lambda. | `number` | `512` | no | | [runners\_scale\_up\_lambda\_timeout](#input\_runners\_scale\_up\_lambda\_timeout) | Time out for the scale up lambda in seconds. | `number` | `30` | no | | [runners\_ssm\_housekeeper](#input\_runners\_ssm\_housekeeper) | Configuration for the SSM housekeeper lambda. This lambda deletes token / JIT config from SSM.

`schedule_expression`: is used to configure the schedule for the lambda.
`enabled`: enable or disable the lambda trigger via the EventBridge.
`lambda_memory_size`: lambda memery size limit.
`lambda_timeout`: timeout for the lambda in seconds.
`config`: configuration for the lambda function. Token path will be read by default from the module. |
object({
schedule_expression = optional(string, "rate(1 day)")
enabled = optional(bool, true)
lambda_memory_size = optional(number, 512)
lambda_timeout = optional(number, 60)
config = object({
tokenPath = optional(string)
minimumDaysOld = optional(number, 1)
dryRun = optional(bool, false)
})
})
|
{
"config": {}
}
| no | diff --git a/modules/multi-runner/README.md b/modules/multi-runner/README.md index 6fbbc868f7..0e18d69a2e 100644 --- a/modules/multi-runner/README.md +++ b/modules/multi-runner/README.md @@ -150,7 +150,7 @@ module "multi-runner" { | [matcher\_config\_parameter\_store\_tier](#input\_matcher\_config\_parameter\_store\_tier) | The tier of the parameter store for the matcher configuration. Valid values are `Standard`, and `Advanced`. | `string` | `"Standard"` | no | | [metrics](#input\_metrics) | Configuration for metrics created by the module, by default metrics are disabled to avoid additional costs. When metrics are enable all metrics are created unless explicit configured otherwise. |
object({
enable = optional(bool, false)
namespace = optional(string, "GitHub Runners")
metric = optional(object({
enable_github_app_rate_limit = optional(bool, true)
enable_job_retry = optional(bool, true)
enable_spot_termination_warning = optional(bool, true)
}), {})
})
| `{}` | no | | [metrics\_namespace](#input\_metrics\_namespace) | The namespace for the metrics created by the module. Merics will only be created if explicit enabled. | `string` | `null` | no | -| [multi\_runner\_config](#input\_multi\_runner\_config) | multi\_runner\_config = {
runner\_config: {
runner\_os: "The EC2 Operating System type to use for action runner instances (linux,windows)."
runner\_architecture: "The platform architecture of the runner instance\_type."
runner\_metadata\_options: "(Optional) Metadata options for the ec2 runner instances."
ami\_filter: "(Optional) List of maps used to create the AMI filter for the action runner AMI. By default amazon linux 2 is used."
ami\_owners: "(Optional) The list of owners used to select the AMI of action runner instances."
create\_service\_linked\_role\_spot: (Optional) create the serviced linked role for spot instances that is required by the scale-up lambda.
credit\_specification: "(Optional) The credit specification of the runner instance\_type. Can be unset, `standard` or `unlimited`.
delay\_webhook\_event: "The number of seconds the event accepted by the webhook is invisible on the queue before the scale up lambda will receive the event."
disable\_runner\_autoupdate: "Disable the auto update of the github runner agent. Be aware there is a grace period of 30 days, see also the [GitHub article](https://github.blog/changelog/2022-02-01-github-actions-self-hosted-runners-can-now-disable-automatic-updates/)"
ebs\_optimized: "The EC2 EBS optimized configuration."
enable\_ephemeral\_runners: "Enable ephemeral runners, runners will only be used once."
enable\_job\_queued\_check: "Enables JIT configuration for creating runners instead of registration token based registraton. JIT configuration will only be applied for ephemeral runners. By default JIT confiugration is enabled for ephemeral runners an can be disabled via this override. When running on GHES without support for JIT configuration this variable should be set to true for ephemeral runners."
enable\_on\_demand\_failover\_for\_errors: "Enable on-demand failover. For example to fall back to on demand when no spot capacity is available the variable can be set to `InsufficientInstanceCapacity`. When not defined the default behavior is to retry later."
enable\_organization\_runners: "Register runners to organization, instead of repo level"
enable\_runner\_binaries\_syncer: "Option to disable the lambda to sync GitHub runner distribution, useful when using a pre-build AMI."
enable\_ssm\_on\_runners: "Enable to allow access the runner instances for debugging purposes via SSM. Note that this adds additional permissions to the runner instances."
enable\_userdata: "Should the userdata script be enabled for the runner. Set this to false if you are using your own prebuilt AMI."
instance\_allocation\_strategy: "The allocation strategy for spot instances. AWS recommends to use `capacity-optimized` however the AWS default is `lowest-price`."
instance\_max\_spot\_price: "Max price price for spot intances per hour. This variable will be passed to the create fleet as max spot price for the fleet."
instance\_target\_capacity\_type: "Default lifecycle used for runner instances, can be either `spot` or `on-demand`."
instance\_types: "List of instance types for the action runner. Defaults are based on runner\_os (al2023 for linux and Windows Server Core for win)."
job\_queue\_retention\_in\_seconds: "The number of seconds the job is held in the queue before it is purged"
minimum\_running\_time\_in\_minutes: "The time an ec2 action runner should be running at minimum before terminated if not busy."
pool\_runner\_owner: "The pool will deploy runners to the GitHub org ID, set this value to the org to which you want the runners deployed. Repo level is not supported."
runner\_additional\_security\_group\_ids: "List of additional security groups IDs to apply to the runner. If added outside the multi\_runner\_config block, the additional security group(s) will be applied to all runner configs. If added inside the multi\_runner\_config, the additional security group(s) will be applied to the individual runner."
runner\_as\_root: "Run the action runner under the root user. Variable `runner_run_as` will be ignored."
runner\_boot\_time\_in\_minutes: "The minimum time for an EC2 runner to boot and register as a runner."
runner\_disable\_default\_labels: "Disable default labels for the runners (os, architecture and `self-hosted`). If enabled, the runner will only have the extra labels provided in `runner_extra_labels`. In case you on own start script is used, this configuration parameter needs to be parsed via SSM."
runner\_extra\_labels: "Extra (custom) labels for the runners (GitHub). Separate each label by a comma. Labels checks on the webhook can be enforced by setting `multi_runner_config.matcherConfig.exactMatch`. GitHub read-only labels should not be provided."
runner\_group\_name: "Name of the runner group."
runner\_name\_prefix: "Prefix for the GitHub runner name."
runner\_run\_as: "Run the GitHub actions agent as user."
runners\_maximum\_count: "The maximum number of runners that will be created. Setting the variable to `-1` desiables the maximum check."
scale\_down\_schedule\_expression: "Scheduler expression to check every x for scale down."
scale\_up\_reserved\_concurrent\_executions: "Amount of reserved concurrent executions for the scale-up lambda function. A value of 0 disables lambda from being triggered and -1 removes any concurrency limitations."
userdata\_template: "Alternative user-data template, replacing the default template. By providing your own user\_data you have to take care of installing all required software, including the action runner. Variables userdata\_pre/post\_install are ignored."
enable\_jit\_config "Overwrite the default behavior for JIT configuration. By default JIT configuration is enabled for ephemeral runners and disabled for non-ephemeral runners. In case of GHES check first if the JIT config API is avaialbe. In case you upgradeing from 3.x to 4.x you can set `enable_jit_config` to `false` to avoid a breaking change when having your own AMI."
enable\_runner\_detailed\_monitoring: "Should detailed monitoring be enabled for the runner. Set this to true if you want to use detailed monitoring. See https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-cloudwatch-new.html for details."
enable\_cloudwatch\_agent: "Enabling the cloudwatch agent on the ec2 runner instances, the runner contains default config. Configuration can be overridden via `cloudwatch_config`."
cloudwatch\_config: "(optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details."
userdata\_pre\_install: "Script to be ran before the GitHub Actions runner is installed on the EC2 instances"
userdata\_post\_install: "Script to be ran after the GitHub Actions runner is installed on the EC2 instances"
runner\_hook\_job\_started: "Script to be ran in the runner environment at the beginning of every job"
runner\_hook\_job\_completed: "Script to be ran in the runner environment at the end of every job"
runner\_ec2\_tags: "Map of tags that will be added to the launch template instance tag specifications."
runner\_iam\_role\_managed\_policy\_arns: "Attach AWS or customer-managed IAM policies (by ARN) to the runner IAM role"
vpc\_id: "The VPC for security groups of the action runners. If not set uses the value of `var.vpc_id`."
subnet\_ids: "List of subnets in which the action runners will be launched, the subnets needs to be subnets in the `vpc_id`. If not set, uses the value of `var.subnet_ids`."
idle\_config: "List of time period that can be defined as cron expression to keep a minimum amount of runners active instead of scaling down to 0. By defining this list you can ensure that in time periods that match the cron expression within 5 seconds a runner is kept idle."
runner\_log\_files: "(optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details."
block\_device\_mappings: "The EC2 instance block device configuration. Takes the following keys: `device_name`, `delete_on_termination`, `volume_type`, `volume_size`, `encrypted`, `iops`, `throughput`, `kms_key_id`, `snapshot_id`."
job\_retry: "Experimental! Can be removed / changed without trigger a major release. Configure job retries. The configuration enables job retries (for ephemeral runners). After creating the insances a message will be published to a job retry queue. The job retry check lambda is checking after a delay if the job is queued. If not the message will be published again on the scale-up (build queue). Using this feature can impact the reate limit of the GitHub app."
pool\_config: "The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for week days to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. Use `schedule_expression_timezone` to override the schedule time zone (defaults to UTC)."
}
matcherConfig: {
labelMatchers: "The list of list of labels supported by the runner configuration. `[[self-hosted, linux, x64, example]]`"
exactMatch: "If set to true all labels in the workflow job must match the GitHub labels (os, architecture and `self-hosted`). When false if __any__ workflow label matches it will trigger the webhook."
priority: "If set it defines the priority of the matcher, the matcher with the lowest priority will be evaluated first. Default is 999, allowed values 0-999."
}
fifo: "Enable a FIFO queue to remain the order of events received by the webhook. Suggest to set to true for repo level runners."
redrive\_build\_queue: "Set options to attach (optional) a dead letter queue to the build queue, the queue between the webhook and the scale up lambda. You have the following options. 1. Disable by setting `enabled` to false. 2. Enable by setting `enabled` to `true`, `maxReceiveCount` to a number of max retries."
} |
map(object({
runner_config = object({
runner_os = string
runner_architecture = string
runner_metadata_options = optional(map(any), {
instance_metadata_tags = "enabled"
http_endpoint = "enabled"
http_tokens = "required"
http_put_response_hop_limit = 1
})
ami_filter = optional(map(list(string)), { state = ["available"] })
ami_owners = optional(list(string), ["amazon"])
ami_id_ssm_parameter_name = optional(string, null)
ami_kms_key_arn = optional(string, "")
create_service_linked_role_spot = optional(bool, false)
credit_specification = optional(string, null)
delay_webhook_event = optional(number, 30)
disable_runner_autoupdate = optional(bool, false)
ebs_optimized = optional(bool, false)
enable_ephemeral_runners = optional(bool, false)
enable_job_queued_check = optional(bool, null)
enable_on_demand_failover_for_errors = optional(list(string), [])
enable_organization_runners = optional(bool, false)
enable_runner_binaries_syncer = optional(bool, true)
enable_ssm_on_runners = optional(bool, false)
enable_userdata = optional(bool, true)
instance_allocation_strategy = optional(string, "lowest-price")
instance_max_spot_price = optional(string, null)
instance_target_capacity_type = optional(string, "spot")
instance_types = list(string)
job_queue_retention_in_seconds = optional(number, 86400)
minimum_running_time_in_minutes = optional(number, null)
pool_runner_owner = optional(string, null)
runner_as_root = optional(bool, false)
runner_boot_time_in_minutes = optional(number, 5)
runner_disable_default_labels = optional(bool, false)
runner_extra_labels = optional(list(string), [])
runner_group_name = optional(string, "Default")
runner_name_prefix = optional(string, "")
runner_run_as = optional(string, "ec2-user")
runners_maximum_count = number
runner_additional_security_group_ids = optional(list(string), [])
scale_down_schedule_expression = optional(string, "cron(*/5 * * * ? *)")
scale_up_reserved_concurrent_executions = optional(number, 1)
userdata_template = optional(string, null)
userdata_content = optional(string, null)
enable_jit_config = optional(bool, null)
enable_runner_detailed_monitoring = optional(bool, false)
enable_cloudwatch_agent = optional(bool, true)
cloudwatch_config = optional(string, null)
userdata_pre_install = optional(string, "")
userdata_post_install = optional(string, "")
runner_hook_job_started = optional(string, "")
runner_hook_job_completed = optional(string, "")
runner_ec2_tags = optional(map(string), {})
runner_iam_role_managed_policy_arns = optional(list(string), [])
vpc_id = optional(string, null)
subnet_ids = optional(list(string), null)
idle_config = optional(list(object({
cron = string
timeZone = string
idleCount = number
evictionStrategy = optional(string, "oldest_first")
})), [])
runner_log_files = optional(list(object({
log_group_name = string
prefix_log_group = bool
file_path = string
log_stream_name = string
})), null)
block_device_mappings = optional(list(object({
delete_on_termination = optional(bool, true)
device_name = optional(string, "/dev/xvda")
encrypted = optional(bool, true)
iops = optional(number)
kms_key_id = optional(string)
snapshot_id = optional(string)
throughput = optional(number)
volume_size = number
volume_type = optional(string, "gp3")
})), [{
volume_size = 30
}])
pool_config = optional(list(object({
schedule_expression = string
schedule_expression_timezone = optional(string)
size = number
})), [])
job_retry = optional(object({
enable = optional(bool, false)
delay_in_seconds = optional(number, 300)
delay_backoff = optional(number, 2)
lambda_memory_size = optional(number, 256)
lambda_timeout = optional(number, 30)
max_attempts = optional(number, 1)
}), {})
})
matcherConfig = object({
labelMatchers = list(list(string))
exactMatch = optional(bool, false)
priority = optional(number, 999)
})
fifo = optional(bool, false)
redrive_build_queue = optional(object({
enabled = bool
maxReceiveCount = number
}), {
enabled = false
maxReceiveCount = null
})
}))
| n/a | yes | +| [multi\_runner\_config](#input\_multi\_runner\_config) | multi\_runner\_config = {
runner\_config: {
runner\_os: "The EC2 Operating System type to use for action runner instances (linux,windows)."
runner\_architecture: "The platform architecture of the runner instance\_type."
runner\_metadata\_options: "(Optional) Metadata options for the ec2 runner instances."
ami\_filter: "(Optional) List of maps used to create the AMI filter for the action runner AMI. By default amazon linux 2 is used."
ami\_owners: "(Optional) The list of owners used to select the AMI of action runner instances."
create\_service\_linked\_role\_spot: (Optional) create the serviced linked role for spot instances that is required by the scale-up lambda.
credit\_specification: "(Optional) The credit specification of the runner instance\_type. Can be unset, `standard` or `unlimited`.
delay\_webhook\_event: "The number of seconds the event accepted by the webhook is invisible on the queue before the scale up lambda will receive the event."
disable\_runner\_autoupdate: "Disable the auto update of the github runner agent. Be aware there is a grace period of 30 days, see also the [GitHub article](https://github.blog/changelog/2022-02-01-github-actions-self-hosted-runners-can-now-disable-automatic-updates/)"
ebs\_optimized: "The EC2 EBS optimized configuration."
enable\_ephemeral\_runners: "Enable ephemeral runners, runners will only be used once."
enable\_job\_queued\_check: "Enables JIT configuration for creating runners instead of registration token based registraton. JIT configuration will only be applied for ephemeral runners. By default JIT confiugration is enabled for ephemeral runners an can be disabled via this override. When running on GHES without support for JIT configuration this variable should be set to true for ephemeral runners."
enable\_on\_demand\_failover\_for\_errors: "Enable on-demand failover. For example to fall back to on demand when no spot capacity is available the variable can be set to `InsufficientInstanceCapacity`. When not defined the default behavior is to retry later."
enable\_organization\_runners: "Register runners to organization, instead of repo level"
enable\_runner\_binaries\_syncer: "Option to disable the lambda to sync GitHub runner distribution, useful when using a pre-build AMI."
enable\_ssm\_on\_runners: "Enable to allow access the runner instances for debugging purposes via SSM. Note that this adds additional permissions to the runner instances."
enable\_userdata: "Should the userdata script be enabled for the runner. Set this to false if you are using your own prebuilt AMI."
instance\_allocation\_strategy: "The allocation strategy for spot instances. AWS recommends to use `capacity-optimized` however the AWS default is `lowest-price`."
instance\_max\_spot\_price: "Max price price for spot intances per hour. This variable will be passed to the create fleet as max spot price for the fleet."
instance\_target\_capacity\_type: "Default lifecycle used for runner instances, can be either `spot` or `on-demand`."
instance\_types: "List of instance types for the action runner. Defaults are based on runner\_os (al2023 for linux and Windows Server Core for win)."
job\_queue\_retention\_in\_seconds: "The number of seconds the job is held in the queue before it is purged"
minimum\_running\_time\_in\_minutes: "The time an ec2 action runner should be running at minimum before terminated if not busy."
pool\_runner\_owner: "The pool will deploy runners to the GitHub org ID, set this value to the org to which you want the runners deployed. Repo level is not supported."
runner\_additional\_security\_group\_ids: "List of additional security groups IDs to apply to the runner. If added outside the multi\_runner\_config block, the additional security group(s) will be applied to all runner configs. If added inside the multi\_runner\_config, the additional security group(s) will be applied to the individual runner."
runner\_as\_root: "Run the action runner under the root user. Variable `runner_run_as` will be ignored."
runner\_boot\_time\_in\_minutes: "The minimum time for an EC2 runner to boot and register as a runner."
runner\_disable\_default\_labels: "Disable default labels for the runners (os, architecture and `self-hosted`). If enabled, the runner will only have the extra labels provided in `runner_extra_labels`. In case you on own start script is used, this configuration parameter needs to be parsed via SSM."
runner\_extra\_labels: "Extra (custom) labels for the runners (GitHub). Separate each label by a comma. Labels checks on the webhook can be enforced by setting `multi_runner_config.matcherConfig.exactMatch`. GitHub read-only labels should not be provided."
runner\_group\_name: "Name of the runner group."
runner\_name\_prefix: "Prefix for the GitHub runner name."
runner\_run\_as: "Run the GitHub actions agent as user."
runners\_maximum\_count: "The maximum number of runners that will be created. Setting the variable to `-1` desiables the maximum check."
scale\_down\_schedule\_expression: "Scheduler expression to check every x for scale down."
scale\_up\_reserved\_concurrent\_executions: "Amount of reserved concurrent executions for the scale-up lambda function. A value of 0 disables lambda from being triggered and -1 removes any concurrency limitations."
userdata\_template: "Alternative user-data template, replacing the default template. By providing your own user\_data you have to take care of installing all required software, including the action runner. Variables userdata\_pre/post\_install are ignored."
enable\_jit\_config "Overwrite the default behavior for JIT configuration. By default JIT configuration is enabled for ephemeral runners and disabled for non-ephemeral runners. In case of GHES check first if the JIT config API is avaialbe. In case you upgradeing from 3.x to 4.x you can set `enable_jit_config` to `false` to avoid a breaking change when having your own AMI."
enable\_runner\_detailed\_monitoring: "Should detailed monitoring be enabled for the runner. Set this to true if you want to use detailed monitoring. See https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-cloudwatch-new.html for details."
enable\_cloudwatch\_agent: "Enabling the cloudwatch agent on the ec2 runner instances, the runner contains default config. Configuration can be overridden via `cloudwatch_config`."
cloudwatch\_config: "(optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details."
userdata\_pre\_install: "Script to be ran before the GitHub Actions runner is installed on the EC2 instances"
userdata\_post\_install: "Script to be ran after the GitHub Actions runner is installed on the EC2 instances"
runner\_hook\_job\_started: "Script to be ran in the runner environment at the beginning of every job"
runner\_hook\_job\_completed: "Script to be ran in the runner environment at the end of every job"
runner\_ec2\_tags: "Map of tags that will be added to the launch template instance tag specifications."
runner\_iam\_role\_managed\_policy\_arns: "Attach AWS or customer-managed IAM policies (by ARN) to the runner IAM role"
vpc\_id: "The VPC for security groups of the action runners. If not set uses the value of `var.vpc_id`."
subnet\_ids: "List of subnets in which the action runners will be launched, the subnets needs to be subnets in the `vpc_id`. If not set, uses the value of `var.subnet_ids`."
idle\_config: "List of time period that can be defined as cron expression to keep a minimum amount of runners active instead of scaling down to 0. By defining this list you can ensure that in time periods that match the cron expression within 5 seconds a runner is kept idle."
runner\_log\_files: "(optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details."
block\_device\_mappings: "The EC2 instance block device configuration. Takes the following keys: `device_name`, `delete_on_termination`, `volume_type`, `volume_size`, `encrypted`, `iops`, `throughput`, `kms_key_id`, `snapshot_id`."
job\_retry: "Experimental! Can be removed / changed without trigger a major release. Configure job retries. The configuration enables job retries (for ephemeral runners). After creating the insances a message will be published to a job retry queue. The job retry check lambda is checking after a delay if the job is queued. If not the message will be published again on the scale-up (build queue). Using this feature can impact the reate limit of the GitHub app."
pool\_config: "The configuration for updating the pool. The `pool_size` to adjust to by the events triggered by the `schedule_expression`. For example you can configure a cron expression for week days to adjust the pool to 10 and another expression for the weekend to adjust the pool to 1. Use `schedule_expression_timezone` to override the schedule time zone (defaults to UTC)."
}
matcherConfig: {
labelMatchers: "The list of list of labels supported by the runner configuration. `[[self-hosted, linux, x64, example]]`"
exactMatch: "If set to true all labels in the workflow job must match the GitHub labels (os, architecture and `self-hosted`). When false if __any__ workflow label matches it will trigger the webhook."
priority: "If set it defines the priority of the matcher, the matcher with the lowest priority will be evaluated first. Default is 999, allowed values 0-999."
}
redrive\_build\_queue: "Set options to attach (optional) a dead letter queue to the build queue, the queue between the webhook and the scale up lambda. You have the following options. 1. Disable by setting `enabled` to false. 2. Enable by setting `enabled` to `true`, `maxReceiveCount` to a number of max retries."
} |
map(object({
runner_config = object({
runner_os = string
runner_architecture = string
runner_metadata_options = optional(map(any), {
instance_metadata_tags = "enabled"
http_endpoint = "enabled"
http_tokens = "required"
http_put_response_hop_limit = 1
})
ami_filter = optional(map(list(string)), { state = ["available"] })
ami_owners = optional(list(string), ["amazon"])
ami_id_ssm_parameter_name = optional(string, null)
ami_kms_key_arn = optional(string, "")
create_service_linked_role_spot = optional(bool, false)
credit_specification = optional(string, null)
delay_webhook_event = optional(number, 30)
disable_runner_autoupdate = optional(bool, false)
ebs_optimized = optional(bool, false)
enable_ephemeral_runners = optional(bool, false)
enable_job_queued_check = optional(bool, null)
enable_on_demand_failover_for_errors = optional(list(string), [])
enable_organization_runners = optional(bool, false)
enable_runner_binaries_syncer = optional(bool, true)
enable_ssm_on_runners = optional(bool, false)
enable_userdata = optional(bool, true)
instance_allocation_strategy = optional(string, "lowest-price")
instance_max_spot_price = optional(string, null)
instance_target_capacity_type = optional(string, "spot")
instance_types = list(string)
job_queue_retention_in_seconds = optional(number, 86400)
minimum_running_time_in_minutes = optional(number, null)
pool_runner_owner = optional(string, null)
runner_as_root = optional(bool, false)
runner_boot_time_in_minutes = optional(number, 5)
runner_disable_default_labels = optional(bool, false)
runner_extra_labels = optional(list(string), [])
runner_group_name = optional(string, "Default")
runner_name_prefix = optional(string, "")
runner_run_as = optional(string, "ec2-user")
runners_maximum_count = number
runner_additional_security_group_ids = optional(list(string), [])
scale_down_schedule_expression = optional(string, "cron(*/5 * * * ? *)")
scale_up_reserved_concurrent_executions = optional(number, 1)
userdata_template = optional(string, null)
userdata_content = optional(string, null)
enable_jit_config = optional(bool, null)
enable_runner_detailed_monitoring = optional(bool, false)
enable_cloudwatch_agent = optional(bool, true)
cloudwatch_config = optional(string, null)
userdata_pre_install = optional(string, "")
userdata_post_install = optional(string, "")
runner_hook_job_started = optional(string, "")
runner_hook_job_completed = optional(string, "")
runner_ec2_tags = optional(map(string), {})
runner_iam_role_managed_policy_arns = optional(list(string), [])
vpc_id = optional(string, null)
subnet_ids = optional(list(string), null)
idle_config = optional(list(object({
cron = string
timeZone = string
idleCount = number
evictionStrategy = optional(string, "oldest_first")
})), [])
runner_log_files = optional(list(object({
log_group_name = string
prefix_log_group = bool
file_path = string
log_stream_name = string
})), null)
block_device_mappings = optional(list(object({
delete_on_termination = optional(bool, true)
device_name = optional(string, "/dev/xvda")
encrypted = optional(bool, true)
iops = optional(number)
kms_key_id = optional(string)
snapshot_id = optional(string)
throughput = optional(number)
volume_size = number
volume_type = optional(string, "gp3")
})), [{
volume_size = 30
}])
pool_config = optional(list(object({
schedule_expression = string
schedule_expression_timezone = optional(string)
size = number
})), [])
job_retry = optional(object({
enable = optional(bool, false)
delay_in_seconds = optional(number, 300)
delay_backoff = optional(number, 2)
lambda_memory_size = optional(number, 256)
lambda_timeout = optional(number, 30)
max_attempts = optional(number, 1)
}), {})
})
matcherConfig = object({
labelMatchers = list(list(string))
exactMatch = optional(bool, false)
priority = optional(number, 999)
})
redrive_build_queue = optional(object({
enabled = bool
maxReceiveCount = number
}), {
enabled = false
maxReceiveCount = null
})
}))
| n/a | yes | | [pool\_lambda\_reserved\_concurrent\_executions](#input\_pool\_lambda\_reserved\_concurrent\_executions) | Amount of reserved concurrent executions for the scale-up lambda function. A value of 0 disables lambda from being triggered and -1 removes any concurrency limitations. | `number` | `1` | no | | [pool\_lambda\_timeout](#input\_pool\_lambda\_timeout) | Time out for the pool lambda in seconds. | `number` | `60` | no | | [prefix](#input\_prefix) | The prefix used for naming resources | `string` | `"github-actions"` | no |