Skip to content

Commit 06a2cb1

Browse files
authored
Merge pull request #1555 from buildkite/SUP-4545-log-group-retention-policies
Implement CloudWatch LogGroup retention policies
2 parents aa25418 + c201afc commit 06a2cb1

File tree

4 files changed

+90
-2
lines changed

4 files changed

+90
-2
lines changed

packer/linux/conf/bin/bk-install-elastic-stack.sh

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -432,6 +432,32 @@ systemctl daemon-reload
432432
echo Starting buildkite-agent...
433433
systemctl enable --now buildkite-agent
434434

435+
echo Configuring CloudWatch agent log retention...
436+
if [[ -n "${EC2_LOG_RETENTION_DAYS:-}" && "${ENABLE_EC2_LOG_RETENTION_POLICY:-false}" == "true" ]]; then
437+
echo "Setting CloudWatch EC2 log retention to ${EC2_LOG_RETENTION_DAYS} days"
438+
echo "WARNING: This will delete EC2 logs older than ${EC2_LOG_RETENTION_DAYS} days from existing log groups"
439+
440+
# Update the CloudWatch agent config with the retention value
441+
CONFIG_FILE="/opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json"
442+
if [[ -f "$CONFIG_FILE" ]]; then
443+
# Add retention_in_days to all collect_list items using jq
444+
jq --arg retention "$EC2_LOG_RETENTION_DAYS" '
445+
.logs.logs_collected.files.collect_list |= map(. + {"retention_in_days": ($retention | tonumber)})
446+
' "$CONFIG_FILE" >/tmp/cloudwatch_config.json && mv /tmp/cloudwatch_config.json "$CONFIG_FILE"
447+
448+
# Restart CloudWatch agent to pick up the new configuration
449+
/opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -s -c "file:$CONFIG_FILE" || echo "Warning: Failed to restart CloudWatch agent"
450+
echo "CloudWatch agent configuration updated and restarted"
451+
else
452+
echo "Warning: CloudWatch agent config file not found at $CONFIG_FILE"
453+
fi
454+
elif [[ -n "${EC2_LOG_RETENTION_DAYS:-}" ]]; then
455+
echo "EC2 log retention set to ${EC2_LOG_RETENTION_DAYS} days but EnableEC2LogRetentionPolicy is false"
456+
echo "Skipping EC2 log retention configuration to protect existing logs"
457+
else
458+
echo "EC2 log retention not set, using CloudWatch agent defaults (never expire)"
459+
fi
460+
435461
echo Signaling success to CloudFormation...
436462
# This will fail if the stack has already completed, for instance if there is a min size
437463
# of 1 and this is the 2nd instance. This is ok, so we just ignore the error

packer/linux/conf/cloudwatch-agent/config.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,12 @@
5353
"log_group_name": "/buildkite/auth",
5454
"log_stream_name": "{instance_id}",
5555
"timestamp_format": "%Y-%m-%d %H:%M:%S,%f"
56+
},
57+
{
58+
"file_path": "/var/log/lifecycled.log",
59+
"log_group_name": "/buildkite/lifecycled",
60+
"log_stream_name": "{instance_id}",
61+
"timestamp_format": "%Y-%m-%dT%H:%M:%S.%f"
5662
}
5763
]
5864
}

packer/windows/conf/bin/bk-install-elastic-stack.ps1

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,38 @@ If ($lastexitcode -ne 0) { Exit $lastexitcode }
309309

310310
Restart-Service buildkite-agent
311311

312+
Write-Output "Configuring CloudWatch agent log retention..."
313+
if ($Env:EC2_LOG_RETENTION_DAYS -and $Env:ENABLE_EC2_LOG_RETENTION_POLICY -eq "true") {
314+
Write-Output "Setting CloudWatch EC2 log retention to $($Env:EC2_LOG_RETENTION_DAYS) days"
315+
Write-Output "WARNING: This will delete EC2 logs older than $($Env:EC2_LOG_RETENTION_DAYS) days from existing log groups"
316+
317+
$configFile = "C:\ProgramData\Amazon\AmazonCloudWatchAgent\amazon-cloudwatch-agent.json"
318+
if (Test-Path $configFile) {
319+
# Add retention_in_days to all collect_list items using jq
320+
$tempFile = "$env:TEMP\cloudwatch_config.json"
321+
& jq --arg retention $Env:EC2_LOG_RETENTION_DAYS '
322+
.logs.logs_collected.files.collect_list |= map(. + {"retention_in_days": ($retention | tonumber)}) |
323+
.logs.logs_collected.windows_events.collect_list |= map(. + {"retention_in_days": ($retention | tonumber)})
324+
' $configFile > $tempFile
325+
Move-Item $tempFile $configFile
326+
327+
# Restart CloudWatch agent to pick up the new configuration
328+
try {
329+
& "C:\Program Files\Amazon\AmazonCloudWatchAgent\amazon-cloudwatch-agent-ctl.ps1" -a fetch-config -m ec2 -s -c "file:$configFile"
330+
Write-Output "CloudWatch agent configuration updated and restarted"
331+
} catch {
332+
Write-Output "Warning: Failed to restart CloudWatch agent: $($_.Exception.Message)"
333+
}
334+
} else {
335+
Write-Output "Warning: CloudWatch agent config file not found at $configFile"
336+
}
337+
} elseif ($Env:EC2_LOG_RETENTION_DAYS) {
338+
Write-Output "EC2 log retention set to $($Env:EC2_LOG_RETENTION_DAYS) days but EnableEC2LogRetentionPolicy is false"
339+
Write-Output "Skipping EC2 log retention configuration to protect existing logs"
340+
} else {
341+
Write-Output "EC2 log retention not set, using CloudWatch agent defaults (never expire)"
342+
}
343+
312344
# renable debug tracing
313345
Set-PSDebug -Trace 2
314346

templates/aws-stack.yml

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ Metadata:
5959
- BuildkiteWindowsAdministrator
6060
- BuildkiteAgentScalerServerlessARN
6161
- BuildkiteAgentScalerVersion
62+
- EnableEC2LogRetentionPolicy
63+
- EC2LogRetentionDays
6264
- LogRetentionDays
6365
- BuildkiteAgentEnableGracefulShutdown
6466

@@ -236,10 +238,27 @@ Parameters:
236238
Type: Number
237239
Default: 3600
238240

241+
EnableEC2LogRetentionPolicy:
242+
Type: String
243+
Default: "false"
244+
AllowedValues: ["true", "false"]
245+
Description: >
246+
Enable CloudWatch log retention policy for EC2 instance logs managed by the CloudWatch agent.
247+
When enabled, EC2 logs older than EC2LogRetentionDays will be automatically deleted to reduce storage costs.
248+
This only affects logs from Buildkite agents, system logs, and other EC2-generated logs - not Lambda or other AWS service logs.
249+
WARNING: For existing stacks, this will delete historical EC2 logs older than the retention period. This action cannot be undone.
250+
251+
EC2LogRetentionDays:
252+
Type: Number
253+
Description: The number of days to retain CloudWatch Logs for EC2 instances managed by the CloudWatch agent (Buildkite agents, system logs, etc).
254+
Default: 7
255+
AllowedValues: [1, 3, 5, 7, 14, 30, 60, 90, 120, 150, 180, 365, 400, 545, 731, 1827, 3653]
256+
239257
LogRetentionDays:
240258
Type: Number
241-
Description: The number of days to retain the Cloudwatch Logs of the lambda.
242-
Default: "1"
259+
Description: The number of days to retain CloudWatch Logs for Lambda functions in the stack.
260+
Default: 1
261+
AllowedValues: [1, 3, 5, 7, 14, 30, 60, 90, 120, 150, 180, 365, 400, 545, 731, 1827, 3653]
243262

244263
BuildkiteAgentEnableGracefulShutdown:
245264
Description: >
@@ -1358,6 +1377,7 @@ Resources:
13581377
- logs:PutLogEvents
13591378
- logs:DescribeLogGroups
13601379
- logs:DescribeLogStreams
1380+
- logs:PutRetentionPolicy
13611381
Resource: "*"
13621382
- Sid: Ssm
13631383
Effect: Allow
@@ -1622,6 +1642,8 @@ Resources:
16221642
$Env:ECR_PLUGIN_ENABLED="${EnableECRPlugin}"
16231643
$Env:DOCKER_LOGIN_PLUGIN_ENABLED="${EnableDockerLoginPlugin}"
16241644
$Env:AWS_REGION="${AWS::Region}"
1645+
$Env:ENABLE_EC2_LOG_RETENTION_POLICY="${EnableEC2LogRetentionPolicy}"
1646+
$Env:EC2_LOG_RETENTION_DAYS="${EC2LogRetentionDays}"
16251647
powershell -file C:\buildkite-agent\bin\bk-install-elastic-stack.ps1 >> C:\buildkite-agent\elastic-stack.log
16261648
</powershell>
16271649
- LocalSecretsBucket: !If
@@ -1721,6 +1743,8 @@ Resources:
17211743
RESOURCE_LIMITS_CPU_WEIGHT="${ResourceLimitsCPUWeight}" \
17221744
RESOURCE_LIMITS_CPU_QUOTA="${ResourceLimitsCPUQuota}" \
17231745
RESOURCE_LIMITS_IO_WEIGHT="${ResourceLimitsIOWeight}" \
1746+
ENABLE_EC2_LOG_RETENTION_POLICY="${EnableEC2LogRetentionPolicy}" \
1747+
EC2_LOG_RETENTION_DAYS="${EC2LogRetentionDays}" \
17241748
/usr/local/bin/bk-install-elastic-stack.sh
17251749
--==BOUNDARY==--
17261750
- LocalSecretsBucket: !If

0 commit comments

Comments
 (0)