Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
afc8f09
eli-304 adding splunk firehose module
eddalmond1 Aug 6, 2025
26e6f44
eli-304 adding splunk firehose to api-layer stack
eddalmond1 Aug 6, 2025
c99d0d8
eli-304 adding kms encryption to firehose and ssm, refactoring so spe…
eddalmond1 Aug 7, 2025
8efdf12
eli-304 adding kms key rotation for splunk_hec_kms
eddalmond1 Aug 7, 2025
2f61b9e
eli-304 removing blank lines
eddalmond1 Aug 7, 2025
5a983b9
eli-304 removing redundant comment
eddalmond1 Aug 7, 2025
03be7fd
Merge remote-tracking branch 'origin/main' into feature/eja-eli-304-p…
eddalmond1 Aug 13, 2025
47e17ec
eli-304 adding overwrite to allow initial deploy
eddalmond1 Aug 13, 2025
0879b69
eli-304 added deployment instructions for new ssm parameters
eddalmond1 Aug 13, 2025
bdb1d59
eli-304 added README.md for api-layer stack
eddalmond1 Aug 13, 2025
9572585
eli-304 updated deployment instructions
eddalmond1 Aug 13, 2025
bdbba4c
eli-304 vale corrections
eddalmond1 Aug 13, 2025
23bab3f
eli-304 adding dev and preprod to word allowlist
eddalmond1 Aug 13, 2025
19ff417
Merge branch 'main' into feature/eja-eli-304-push-cloudwatch-alarms-t…
eddalmond1 Aug 13, 2025
d9601da
Merge branch 'main' into feature/eja-eli-304-push-cloudwatch-alarms-t…
eddalmond1 Aug 19, 2025
77d6f99
eli-304 shortening bucket name
eddalmond1 Aug 19, 2025
c16f398
Merge branch 'main' into feature/eja-eli-304-push-cloudwatch-alarms-t…
eddalmond1 Aug 20, 2025
3e887c6
eli-304 vale issues, addressing in markdown
eddalmond1 Aug 20, 2025
e73a679
eli-304 dealing with ssm + secrets
eddalmond1 Aug 20, 2025
1ca5352
eli-304 changing formatting of message for Splunk
eddalmond1 Aug 20, 2025
fcfc4f1
eli-304 amending firehose to use 'raw' endpoint
eddalmond1 Aug 21, 2025
ce694f8
eli-304 removing severity as we can do this logic in Splunk
eddalmond1 Aug 21, 2025
ac6457e
eli-304 removing readme, as worked out a better way to deal with SSM
eddalmond1 Aug 22, 2025
dbbbe3f
eli-304 removing / changing code comments
eddalmond1 Aug 22, 2025
65c13cf
Eli 387 lambda hardening (#306)
Karthikeyannhs Aug 21, 2025
68fa0f1
Lambda versioning for provisioned concurrancy (#309)
Karthikeyannhs Aug 22, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/base-deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,9 @@ jobs:
TF_VAR_API_CA_CERT: ${{ secrets.API_CA_CERT }}
TF_VAR_API_CLIENT_CERT: ${{ secrets.API_CLIENT_CERT }}
TF_VAR_API_PRIVATE_KEY_CERT: ${{ secrets.API_PRIVATE_KEY_CERT }}
TF_VAR_SPLUNK_HEC_TOKEN: ${{ secrets.SPLUNK_HEC_TOKEN }}
TF_VAR_SPLUNK_HEC_ENDPOINT: ${{ secrets.SPLUNK_HEC_ENDPOINT }}

working-directory: ./infrastructure
shell: bash
run: |
Expand Down
4 changes: 3 additions & 1 deletion .github/workflows/cicd-2-publish.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -93,13 +93,15 @@ jobs:
role-to-assume: arn:aws:iam::${{ secrets.AWS_ACCOUNT_ID }}:role/service-roles/github-actions-api-deployment-role
aws-region: eu-west-2

- name: "Terraform Plan Stacks"
- name: "Terraform Apply"
env:
ENVIRONMENT: dev
WORKSPACE: "default"
TF_VAR_API_CA_CERT: ${{ secrets.API_CA_CERT }}
TF_VAR_API_CLIENT_CERT: ${{ secrets.API_CLIENT_CERT }}
TF_VAR_API_PRIVATE_KEY_CERT: ${{ secrets.API_PRIVATE_KEY_CERT }}
TF_VAR_SPLUNK_HEC_TOKEN: ${{ secrets.SPLUNK_HEC_TOKEN }}
TF_VAR_SPLUNK_HEC_ENDPOINT: ${{ secrets.SPLUNK_HEC_ENDPOINT }}

# just planning for now for safety and until review
run: |
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/cicd-3-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,8 @@ jobs:
TF_VAR_API_CA_CERT: ${{ secrets.API_CA_CERT }}
TF_VAR_API_CLIENT_CERT: ${{ secrets.API_CLIENT_CERT }}
TF_VAR_API_PRIVATE_KEY_CERT: ${{ secrets.API_PRIVATE_KEY_CERT }}

TF_VAR_SPLUNK_HEC_TOKEN: ${{ secrets.SPLUNK_HEC_TOKEN }}
TF_VAR_SPLUNK_HEC_ENDPOINT: ${{ secrets.SPLUNK_HEC_ENDPOINT }}
run: |
mkdir -p ./build
echo "Running: make terraform env=$ENVIRONMENT workspace=$WORKSPACE stack=networking tf-command=apply"
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/manual-terraform-apply.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ jobs:
TF_VAR_API_CA_CERT: ${{ secrets.API_CA_CERT }}
TF_VAR_API_CLIENT_CERT: ${{ secrets.API_CLIENT_CERT }}
TF_VAR_API_PRIVATE_KEY_CERT: ${{ secrets.API_PRIVATE_KEY_CERT }}

TF_VAR_SPLUNK_HEC_TOKEN: ${{ secrets.SPLUNK_HEC_TOKEN }}
TF_VAR_SPLUNK_HEC_ENDPOINT: ${{ secrets.SPLUNK_HEC_ENDPOINT }}
run: |
mkdir -p ./build
echo "Running: make terraform env=$ENVIRONMENT workspace=$WORKSPACE stack=networking tf-command=plan args=\"-auto-approve\""
Expand Down
2 changes: 1 addition & 1 deletion infrastructure/modules/lambda/kms.tf
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ resource "aws_kms_key" "lambda_cmk" {
}

resource "aws_kms_alias" "lambda_cmk" {
name = "alias/${terraform.workspace == "default" ? "" : "${terraform.workspace}-"}${var.lambda_func_name}-cmk"
name = "alias/${terraform.workspace == "default" ? "" : "${terraform.workspace}-"}${var.lambda_func_name}-key"
target_key_id = aws_kms_key.lambda_cmk.key_id
}

Expand Down
27 changes: 25 additions & 2 deletions infrastructure/modules/lambda/lambda.tf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
resource "aws_lambda_function" "eligibility_signposting_lambda" {
#checkov:skip=CKV_AWS_116: No deadletter queue is configured for this Lambda function, yet
#checkov:skip=CKV_AWS_116: No deadletter queue is configured for this Lambda function, as the requests are synchronous
#checkov:skip=CKV_AWS_115: Concurrent execution limit will be set at APIM level, not at Lambda level
#checkov:skip=CKV_AWS_272: Skipping code signing but flagged to create ticket to investigate on ELI-238
# If the file is not in the current working directory you will need to include a
Expand All @@ -11,7 +11,7 @@ resource "aws_lambda_function" "eligibility_signposting_lambda" {

source_code_hash = filebase64sha256(var.file_name)

runtime = "python3.13"
runtime = var.runtime
timeout = 30
memory_size = 2048

Expand All @@ -28,12 +28,35 @@ resource "aws_lambda_function" "eligibility_signposting_lambda" {

kms_key_arn = aws_kms_key.lambda_cmk.arn

publish = true

vpc_config {
subnet_ids = var.vpc_intra_subnets
security_group_ids = var.security_group_ids
}

layers = compact([
var.environment == "prod" ? "arn:aws:lambda:${var.region}:580247275435:layer:LambdaInsightsExtension:${var.lambda_insights_extension_version}" : null
])

tracing_config {
mode = "Active"
}
}

# lambda alias required for provisioning concurrency
resource "aws_lambda_alias" "campaign_alias" {
count = var.environment == "prod" ? 1 : 0
name = "live"
function_name = aws_lambda_function.eligibility_signposting_lambda.function_name
function_version = aws_lambda_function.eligibility_signposting_lambda.version
}

# provisioned concurrency - number of pre-warmed lambda containers
resource "aws_lambda_provisioned_concurrency_config" "campaign_pc" {
count = var.environment == "prod" ? 1 : 0
function_name = var.lambda_func_name
qualifier = aws_lambda_alias.campaign_alias[0].name
provisioned_concurrent_executions = var.provisioned_concurrency_count
}

23 changes: 22 additions & 1 deletion infrastructure/modules/lambda/variables.tf
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
variable "eligibility_lambda_role_arn" {
description = "lambda read role arn for dynamodb"
description = "lambda role arn"
type = string
}

variable "eligibility_lambda_role_name" {
description = "lambda role name"
type = string
}

Expand All @@ -8,6 +13,12 @@ variable "lambda_func_name" {
type = string
}

variable "runtime" {
description = "runtime of the Lambda function"
type = string
}


variable "vpc_intra_subnets" {
description = "vpc private subnets for lambda"
type = list(string)
Expand Down Expand Up @@ -52,3 +63,13 @@ variable "enable_xray_patching"{
description = "flag to enable xray tracing, which puts an entry for dynamodb, s3 and firehose in trace map"
type = string
}

variable "provisioned_concurrency_count" {
description = "Number of prewarmed Lambda instances"
type = number
}

variable "lambda_insights_extension_version" {
description = "version number of LambdaInsightsExtension"
type = number
}
1 change: 1 addition & 0 deletions infrastructure/modules/splunk_forwarder/data.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
data "aws_caller_identity" "current" {}
45 changes: 45 additions & 0 deletions infrastructure/modules/splunk_forwarder/firehose.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# KMS Key for Firehose encryption
resource "aws_kms_key" "firehose_splunk_cmk" {
description = "KMS key for encrypting Kinesis Firehose delivery stream data"
deletion_window_in_days = 7
enable_key_rotation = true
tags = {
Name = "firehose-splunk-cmk"
Purpose = "Firehose encryption"
ManagedBy = "terraform"
}
}

# KMS Key Alias for easier identification
resource "aws_kms_alias" "firehose_splunk_cmk_alias" {
name = "alias/firehose-splunk-cmk"
target_key_id = aws_kms_key.firehose_splunk_cmk.key_id
}

resource "aws_kinesis_firehose_delivery_stream" "splunk_delivery_stream" {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This part of the code is basically setting up a new Firehose Stream with the Splunk endpoint. The module doesn't deal with getting logs/alarms into Firehose, that's dealt with in the main stack (via eventbridge.tf)

name = "splunk-alarm-events"
destination = "splunk"
server_side_encryption {
enabled = true
key_type = "CUSTOMER_MANAGED_CMK"
key_arn = aws_kms_key.firehose_splunk_cmk.arn
}
# VPC configuration is only supported for HTTP endpoint destinations in Kinesis Firehose
# For Splunk destinations, the service runs in AWS-managed VPC but you can control network access
# via the subnets where EventBridge (the source) runs and IAM policies

splunk_configuration {
hec_endpoint = var.splunk_hec_endpoint
hec_token = var.splunk_hec_token
hec_endpoint_type = "Raw"
s3_backup_mode = "FailedEventsOnly"

s3_configuration {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note this bit - if we fail to deliver a record to Splunk, then we put it in a bucket for further investigation. We could add an alarm for this, so it's called out both on our console and in ITOC splunk, but will leave that to a future ticket.

role_arn = var.splunk_firehose_s3_role_arn
bucket_arn = var.splunk_firehose_s3_backup_arn
buffering_size = 10
buffering_interval = 400
compression_format = "GZIP"
}
}
}
56 changes: 56 additions & 0 deletions infrastructure/modules/splunk_forwarder/iam.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# EventBridge IAM roles now defined in api-layer stack for specific integration

resource "aws_kms_key_policy" "firehose_splunk_cmk_policy" {
key_id = aws_kms_key.firehose_splunk_cmk.id
policy = jsonencode({
Version = "2012-10-17",
Statement = [
{
Sid = "AllowRootAccountFullAccess"
Effect = "Allow"
Principal = { AWS = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:root" }
Action = "kms:*"
Resource = "*"
},
{
Sid = "AllowFirehoseServiceUseOfKey"
Effect = "Allow"
Principal = { Service = "firehose.amazonaws.com" }
Action = [
"kms:Encrypt",
"kms:Decrypt",
"kms:ReEncrypt*",
"kms:GenerateDataKey*",
"kms:DescribeKey"
]
Resource = "*"
},
{
Sid = "AllowEventBridgeUseOfKey"
Effect = "Allow"
Principal = { Service = "events.amazonaws.com" }
Action = [
"kms:Encrypt",
"kms:Decrypt",
"kms:ReEncrypt*",
"kms:GenerateDataKey*",
"kms:DescribeKey"
]
Resource = "*"
},
{
Sid = "AllowCloudWatchUseOfKey"
Effect = "Allow"
Principal = { Service = "cloudwatch.amazonaws.com" }
Action = [
"kms:Encrypt",
"kms:Decrypt",
"kms:ReEncrypt*",
"kms:GenerateDataKey*",
"kms:DescribeKey"
]
Resource = "*"
}
]
})
}
11 changes: 11 additions & 0 deletions infrastructure/modules/splunk_forwarder/outputs.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Output the Firehose delivery stream ARN for use by EventBridge
output "firehose_delivery_stream_arn" {
description = "ARN of the Kinesis Firehose delivery stream for Splunk"
value = aws_kinesis_firehose_delivery_stream.splunk_delivery_stream.arn
}

# Output the KMS key ARN for reference
output "firehose_kms_key_arn" {
description = "ARN of the KMS key used for Firehose encryption"
value = aws_kms_key.firehose_splunk_cmk.arn
}
19 changes: 19 additions & 0 deletions infrastructure/modules/splunk_forwarder/variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
variable "splunk_hec_endpoint" {
description = "Splunk HEC endpoint URL"
type = string
}

variable "splunk_hec_token" {
description = "Splunk HEC token"
type = string
}

variable "splunk_firehose_s3_backup_arn" {
description = "s3 bucket ARN for Firehose backups"
type = string
}

variable "splunk_firehose_s3_role_arn" {
description = "IAM role ARN for Firehose to access S3"
type = string
}
89 changes: 89 additions & 0 deletions infrastructure/stacks/api-layer/eventbridge.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# IAM role for EventBridge to write to Firehose
resource "aws_iam_role" "eventbridge_firehose_role" {
name = "${var.environment}-eventbridge-to-firehose-role"

assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [{
Effect = "Allow"
Principal = {
Service = "events.amazonaws.com"
}
Action = "sts:AssumeRole"
}]
})

tags = {
Environment = var.environment
Purpose = "splunk-forwarding"
ManagedBy = "terraform"
}
}

# IAM policy for EventBridge to access Firehose
resource "aws_iam_role_policy" "eventbridge_to_firehose_policy" {
name = "${var.environment}-eventbridge-to-firehose-policy"
role = aws_iam_role.eventbridge_firehose_role.id

policy = jsonencode({
Version = "2012-10-17"
Statement = [{
Effect = "Allow"
Action = [
"firehose:PutRecord",
"firehose:PutRecordBatch"
]
Resource = module.splunk_forwarder.firehose_delivery_stream_arn
}]
})
}

# EventBridge rule to capture CloudWatch alarm state changes
resource "aws_cloudwatch_event_rule" "alarm_state_change" {
name = "cloudwatch-alarm-state-change-to-splunk"
description = "Forward CloudWatch alarm state changes to Splunk via Firehose"

event_pattern = jsonencode({
source = ["aws.cloudwatch"]
detail-type = ["CloudWatch Alarm State Change"]
})

tags = {
Environment = var.environment
Purpose = "splunk-forwarding"
ManagedBy = "terraform"
}
}

# EventBridge target to send events to Firehose
resource "aws_cloudwatch_event_target" "firehose_target" {
rule = aws_cloudwatch_event_rule.alarm_state_change.name
arn = module.splunk_forwarder.firehose_delivery_stream_arn
role_arn = aws_iam_role.eventbridge_firehose_role.arn

# Transform the CloudWatch alarm event into a format suitable for Splunk
input_transformer {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've left the transformation pretty minimal, as I think we'd want ITOC to feed back on 'version 1' of these logs to their Splunk.

input_paths = {
account = "$.account"
region = "$.region"
time = "$.time"
alarm_name = "$.detail.alarmName"
new_state = "$.detail.state.value"
old_state = "$.detail.previousState.value"
reason = "$.detail.state.reason"
}

input_template = jsonencode({
time = "<time>"
source = "elid-${var.environment}:cloudwatch:alarm"
sourcetype = "aws:cloudwatch:alarm"
event = {
alarm_name = "<alarm_name>"
new_state = "<new_state>"
old_state = "<old_state>"
reason = "<reason>"
region = "<region>"
}
})
}
}
Loading