Skip to content

Commit af227a2

Browse files
committed
Implement CloudWatch LogGroup retention policies
1 parent 68b755d commit af227a2

File tree

4 files changed

+99
-11
lines changed

4 files changed

+99
-11
lines changed

packer/linux/conf/bin/bk-install-elastic-stack.sh

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -429,6 +429,32 @@ systemctl daemon-reload
429429
echo Starting buildkite-agent...
430430
systemctl enable --now buildkite-agent
431431

432+
echo Configuring CloudWatch agent log retention...
433+
if [[ -n "${EC2_LOG_RETENTION_DAYS:-}" && "${ENABLE_EC2_LOG_RETENTION_POLICY:-false}" == "true" ]]; then
434+
echo "Setting CloudWatch EC2 log retention to ${EC2_LOG_RETENTION_DAYS} days"
435+
echo "WARNING: This will delete EC2 logs older than ${EC2_LOG_RETENTION_DAYS} days from existing log groups"
436+
437+
# Update the CloudWatch agent config with the retention value
438+
CONFIG_FILE="/opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json"
439+
if [[ -f "$CONFIG_FILE" ]]; then
440+
# Add retention_in_days to all collect_list items using jq
441+
jq --arg retention "$EC2_LOG_RETENTION_DAYS" '
442+
.logs.logs_collected.files.collect_list |= map(. + {"retention_in_days": ($retention | tonumber)})
443+
' "$CONFIG_FILE" >/tmp/cloudwatch_config.json && mv /tmp/cloudwatch_config.json "$CONFIG_FILE"
444+
445+
# Restart CloudWatch agent to pick up the new configuration
446+
/opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -s -c "file:$CONFIG_FILE" || echo "Warning: Failed to restart CloudWatch agent"
447+
echo "CloudWatch agent configuration updated and restarted"
448+
else
449+
echo "Warning: CloudWatch agent config file not found at $CONFIG_FILE"
450+
fi
451+
elif [[ -n "${EC2_LOG_RETENTION_DAYS:-}" ]]; then
452+
echo "EC2 log retention set to ${EC2_LOG_RETENTION_DAYS} days but EnableEC2LogRetentionPolicy is false"
453+
echo "Skipping EC2 log retention configuration to protect existing logs"
454+
else
455+
echo "EC2 log retention not set, using CloudWatch agent defaults (never expire)"
456+
fi
457+
432458
echo Signaling success to CloudFormation...
433459
# This will fail if the stack has already completed, for instance if there is a min size
434460
# of 1 and this is the 2nd instance. This is ok, so we just ignore the error

packer/linux/conf/cloudwatch-agent/config.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,12 @@
5353
"log_group_name": "/buildkite/auth",
5454
"log_stream_name": "{instance_id}",
5555
"timestamp_format": "%Y-%m-%d %H:%M:%S,%f"
56+
},
57+
{
58+
"file_path": "/var/log/lifecycled.log",
59+
"log_group_name": "/buildkite/lifecycled",
60+
"log_stream_name": "{instance_id}",
61+
"timestamp_format": "%Y-%m-%dT%H:%M:%S.%f"
5662
}
5763
]
5864
}

packer/windows/conf/bin/bk-install-elastic-stack.ps1

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,38 @@ If ($lastexitcode -ne 0) { Exit $lastexitcode }
306306

307307
Restart-Service buildkite-agent
308308

309+
Write-Output "Configuring CloudWatch agent log retention..."
310+
if ($Env:EC2_LOG_RETENTION_DAYS -and $Env:ENABLE_EC2_LOG_RETENTION_POLICY -eq "true") {
311+
Write-Output "Setting CloudWatch EC2 log retention to $($Env:EC2_LOG_RETENTION_DAYS) days"
312+
Write-Output "WARNING: This will delete EC2 logs older than $($Env:EC2_LOG_RETENTION_DAYS) days from existing log groups"
313+
314+
$configFile = "C:\ProgramData\Amazon\AmazonCloudWatchAgent\amazon-cloudwatch-agent.json"
315+
if (Test-Path $configFile) {
316+
# Add retention_in_days to all collect_list items using jq
317+
$tempFile = "$env:TEMP\cloudwatch_config.json"
318+
& jq --arg retention $Env:EC2_LOG_RETENTION_DAYS '
319+
.logs.logs_collected.files.collect_list |= map(. + {"retention_in_days": ($retention | tonumber)}) |
320+
.logs.logs_collected.windows_events.collect_list |= map(. + {"retention_in_days": ($retention | tonumber)})
321+
' $configFile > $tempFile
322+
Move-Item $tempFile $configFile
323+
324+
# Restart CloudWatch agent to pick up the new configuration
325+
try {
326+
& "C:\Program Files\Amazon\AmazonCloudWatchAgent\amazon-cloudwatch-agent-ctl.ps1" -a fetch-config -m ec2 -s -c "file:$configFile"
327+
Write-Output "CloudWatch agent configuration updated and restarted"
328+
} catch {
329+
Write-Output "Warning: Failed to restart CloudWatch agent: $($_.Exception.Message)"
330+
}
331+
} else {
332+
Write-Output "Warning: CloudWatch agent config file not found at $configFile"
333+
}
334+
} elseif ($Env:EC2_LOG_RETENTION_DAYS) {
335+
Write-Output "EC2 log retention set to $($Env:EC2_LOG_RETENTION_DAYS) days but EnableEC2LogRetentionPolicy is false"
336+
Write-Output "Skipping EC2 log retention configuration to protect existing logs"
337+
} else {
338+
Write-Output "EC2 log retention not set, using CloudWatch agent defaults (never expire)"
339+
}
340+
309341
# renable debug tracing
310342
Set-PSDebug -Trace 2
311343

templates/aws-stack.yml

Lines changed: 35 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ Metadata:
5959
- BuildkiteWindowsAdministrator
6060
- BuildkiteAgentScalerServerlessARN
6161
- BuildkiteAgentScalerVersion
62+
- EnableEC2LogRetentionPolicy
63+
- EC2LogRetentionDays
6264
- LogRetentionDays
6365
- BuildkiteAgentEnableGracefulShutdown
6466

@@ -234,10 +236,27 @@ Parameters:
234236
Type: Number
235237
Default: 3600
236238

239+
EnableEC2LogRetentionPolicy:
240+
Type: String
241+
Default: "false"
242+
AllowedValues: ["true", "false"]
243+
Description: >
244+
Enable CloudWatch log retention policy for EC2 instance logs managed by the CloudWatch agent.
245+
When enabled, EC2 logs older than EC2LogRetentionDays will be automatically deleted to reduce storage costs.
246+
This only affects logs from Buildkite agents, system logs, and other EC2-generated logs - not Lambda or other AWS service logs.
247+
WARNING: For existing stacks, this will delete historical EC2 logs older than the retention period. This action cannot be undone.
248+
249+
EC2LogRetentionDays:
250+
Type: Number
251+
Description: The number of days to retain CloudWatch Logs for EC2 instances managed by the CloudWatch agent (Buildkite agents, system logs, etc).
252+
Default: 7
253+
AllowedValues: [1, 3, 5, 7, 14, 30, 60, 90, 120, 150, 180, 365, 400, 545, 731, 1827, 3653]
254+
237255
LogRetentionDays:
238256
Type: Number
239-
Description: The number of days to retain the Cloudwatch Logs of the lambda.
240-
Default: "1"
257+
Description: The number of days to retain CloudWatch Logs for Lambda functions in the stack.
258+
Default: 1
259+
AllowedValues: [1, 3, 5, 7, 14, 30, 60, 90, 120, 150, 180, 365, 400, 545, 731, 1827, 3653]
241260

242261
BuildkiteAgentEnableGracefulShutdown:
243262
Description: >
@@ -895,13 +914,13 @@ Conditions:
895914

896915
UseInstanceType7:
897916
!Not [ !Equals [ !Select [ "6", !Split [ ",", !Join [ ",", [ !Ref InstanceTypes, "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""] ] ] ], ""] ]
898-
917+
899918
UseInstanceType8:
900919
!Not [ !Equals [ !Select [ "7", !Split [ ",", !Join [ ",", [ !Ref InstanceTypes, "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""] ] ] ], ""] ]
901-
920+
902921
UseInstanceType9:
903922
!Not [ !Equals [ !Select [ "8", !Split [ ",", !Join [ ",", [ !Ref InstanceTypes, "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""] ] ] ], ""] ]
904-
923+
905924
UseInstanceType10:
906925
!Not [ !Equals [ !Select [ "9", !Split [ ",", !Join [ ",", [ !Ref InstanceTypes, "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""] ] ] ], ""] ]
907926

@@ -910,13 +929,13 @@ Conditions:
910929

911930
UseInstanceType12:
912931
!Not [ !Equals [ !Select [ "11", !Split [ ",", !Join [ ",", [ !Ref InstanceTypes, "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""] ] ] ], ""] ]
913-
932+
914933
UseInstanceType13:
915934
!Not [ !Equals [ !Select [ "12", !Split [ ",", !Join [ ",", [ !Ref InstanceTypes, "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""] ] ] ], ""] ]
916935

917936
UseInstanceType14:
918937
!Not [ !Equals [ !Select [ "13", !Split [ ",", !Join [ ",", [ !Ref InstanceTypes, "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""] ] ] ], ""] ]
919-
938+
920939
UseInstanceType15:
921940
!Not [ !Equals [ !Select [ "14", !Split [ ",", !Join [ ",", [ !Ref InstanceTypes, "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""] ] ] ], ""] ]
922941

@@ -925,13 +944,13 @@ Conditions:
925944

926945
UseInstanceType17:
927946
!Not [ !Equals [ !Select [ "16", !Split [ ",", !Join [ ",", [ !Ref InstanceTypes, "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""] ] ] ], ""] ]
928-
947+
929948
UseInstanceType18:
930949
!Not [ !Equals [ !Select [ "17", !Split [ ",", !Join [ ",", [ !Ref InstanceTypes, "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""] ] ] ], ""] ]
931-
950+
932951
UseInstanceType19:
933952
!Not [ !Equals [ !Select [ "18", !Split [ ",", !Join [ ",", [ !Ref InstanceTypes, "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""] ] ] ], ""] ]
934-
953+
935954
UseInstanceType20:
936955
!Not [ !Equals [ !Select [ "19", !Split [ ",", !Join [ ",", [ !Ref InstanceTypes, "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""] ] ] ], ""] ]
937956

@@ -943,7 +962,7 @@ Conditions:
943962

944963
UseInstanceType23:
945964
!Not [ !Equals [ !Select [ "22", !Split [ ",", !Join [ ",", [ !Ref InstanceTypes, "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""] ] ] ], ""] ]
946-
965+
947966
UseInstanceType24:
948967
!Not [ !Equals [ !Select [ "23", !Split [ ",", !Join [ ",", [ !Ref InstanceTypes, "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""] ] ] ], ""] ]
949968

@@ -1336,6 +1355,7 @@ Resources:
13361355
- logs:PutLogEvents
13371356
- logs:DescribeLogGroups
13381357
- logs:DescribeLogStreams
1358+
- logs:PutRetentionPolicy
13391359
Resource: "*"
13401360
- Sid: Ssm
13411361
Effect: Allow
@@ -1597,6 +1617,8 @@ Resources:
15971617
$Env:ECR_PLUGIN_ENABLED="${EnableECRPlugin}"
15981618
$Env:DOCKER_LOGIN_PLUGIN_ENABLED="${EnableDockerLoginPlugin}"
15991619
$Env:AWS_REGION="${AWS::Region}"
1620+
$Env:ENABLE_EC2_LOG_RETENTION_POLICY="${EnableEC2LogRetentionPolicy}"
1621+
$Env:EC2_LOG_RETENTION_DAYS="${EC2LogRetentionDays}"
16001622
powershell -file C:\buildkite-agent\bin\bk-install-elastic-stack.ps1 >> C:\buildkite-agent\elastic-stack.log
16011623
</powershell>
16021624
- LocalSecretsBucket: !If
@@ -1686,6 +1708,8 @@ Resources:
16861708
RESOURCE_LIMITS_CPU_WEIGHT="${ResourceLimitsCPUWeight}" \
16871709
RESOURCE_LIMITS_CPU_QUOTA="${ResourceLimitsCPUQuota}" \
16881710
RESOURCE_LIMITS_IO_WEIGHT="${ResourceLimitsIOWeight}" \
1711+
ENABLE_EC2_LOG_RETENTION_POLICY="${EnableEC2LogRetentionPolicy}" \
1712+
EC2_LOG_RETENTION_DAYS="${EC2LogRetentionDays}" \
16891713
/usr/local/bin/bk-install-elastic-stack.sh
16901714
--==BOUNDARY==--
16911715
- LocalSecretsBucket: !If

0 commit comments

Comments
 (0)