Skip to content

Commit d560291

Browse files
authored
feat: pgbouncer health check (#183)
* fix: improve dpkg lock check in pgbouncer setup, add unattended-upgrades * chore: add some health checks to the deployment action * feat: add health check custom resource for pgbouncer
1 parent f9e098b commit d560291

File tree

9 files changed

+2969
-286
lines changed

9 files changed

+2969
-286
lines changed

.github/workflows/deploy.yaml

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,110 @@ jobs:
6565
uv run npx cdk deploy --ci --all --require-approval never
6666
cd -
6767
68+
- name: Get stack outputs and run operational checks
69+
id: operational_checks
70+
env:
71+
PROJECT_ID: ${{ steps.short-sha.outputs.sha }}
72+
run: |
73+
cd integration_tests/cdk
74+
75+
echo "=== Retrieving Stack Outputs ==="
76+
77+
# Get list of deployed stacks
78+
DEPLOYED_STACKS=$(uv run npx cdk list)
79+
echo "Deployed stacks: $DEPLOYED_STACKS"
80+
81+
# Create outputs file
82+
echo "{}" > stack_outputs.json
83+
84+
# Get outputs from each stack
85+
for STACK_NAME in $DEPLOYED_STACKS; do
86+
echo "Processing stack: $STACK_NAME"
87+
88+
# Try to get outputs using AWS CLI
89+
STACK_OUTPUTS=$(aws cloudformation describe-stacks \
90+
--stack-name "$STACK_NAME" \
91+
--query 'Stacks[0].Outputs' \
92+
--output json 2>/dev/null || echo "[]")
93+
94+
if [ "$STACK_OUTPUTS" != "[]" ] && [ "$STACK_OUTPUTS" != "null" ]; then
95+
echo "Found outputs for $STACK_NAME:"
96+
echo "$STACK_OUTPUTS" | jq .
97+
98+
# Convert to key-value format and merge with existing outputs
99+
TEMP_OUTPUTS=$(echo "$STACK_OUTPUTS" | jq -r 'if . then [.[] | select(.OutputKey and .OutputValue) | {(.OutputKey): .OutputValue}] | add // {} else {} end')
100+
echo "$TEMP_OUTPUTS" > temp_stack_outputs.json
101+
102+
# Merge with existing outputs
103+
jq -s '.[0] * .[1]' stack_outputs.json temp_stack_outputs.json > merged.json
104+
mv merged.json stack_outputs.json
105+
else
106+
echo "No outputs found for $STACK_NAME"
107+
fi
108+
done
109+
110+
echo "=== Final Combined Outputs ==="
111+
cat stack_outputs.json | jq .
112+
113+
echo "=== Running Operational Checks ==="
114+
115+
echo "Available CloudFormation output keys:"
116+
cat stack_outputs.json | jq -r 'keys[]'
117+
118+
STAC_API_URL=$(cat stack_outputs.json | jq -r '
119+
to_entries[] |
120+
select(.key | test("pgstacapistacapioutput"; "i")) |
121+
.value' | head -1)
122+
123+
TITILER_PGSTAC_API_URL=$(cat stack_outputs.json | jq -r '
124+
to_entries[] |
125+
select(.key | test("titilerpgstacapioutput"; "i")) |
126+
.value' | head -1)
127+
128+
TIPG_API_URL=$(cat stack_outputs.json | jq -r '
129+
to_entries[] |
130+
select(.key | test("tipgapioutput"; "i")) |
131+
.value' | head -1)
132+
133+
echo "Extracted URLs:"
134+
echo "STAC_API_URL: $STAC_API_URL"
135+
echo "TITILER_PGSTAC_API_URL: $TITILER_PGSTAC_API_URL"
136+
echo "TIPG_API_URL: $TIPG_API_URL"
137+
138+
# Array of API URLs to check
139+
declare -a API_HEALTH_ENDPOINTS=(
140+
"STAC_API_URL:${STAC_API_URL}_mgmt/health"
141+
"TITILER_PGSTAC_API_URL:${TITILER_PGSTAC_API_URL}healthz"
142+
"TIPG_API_URL:${TIPG_API_URL}healthz"
143+
)
144+
145+
# Check each API
146+
echo "=== Sleeping for 5 minutes to ensure everything is running ==="
147+
sleep 300
148+
149+
for API_INFO in "${API_HEALTH_ENDPOINTS[@]}"; do
150+
API_NAME=$(echo "$API_INFO" | cut -d: -f1)
151+
API_URL=$(echo "$API_INFO" | cut -d: -f2-)
152+
153+
if [ -n "$API_URL" ] && [ "$API_URL" != "null" ]; then
154+
echo "Checking $API_NAME at: $API_URL"
155+
156+
HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" --max-time 30 "$API_URL" || echo "000")
157+
158+
if [ "$HTTP_STATUS" = "200" ]; then
159+
echo "✅ $API_NAME returned 200"
160+
else
161+
echo "❌ $API_NAME returned $HTTP_STATUS"
162+
exit 1
163+
fi
164+
else
165+
echo "⚠️ $API_NAME URL not found in stack outputs"
166+
fi
167+
done
168+
169+
echo "=== Operational Checks Complete ==="
170+
cd -
171+
68172
- name: Tear down any infrastructure
69173
if: always()
70174
env:

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,5 @@ tests/*.egg*
1212
tests/*venv*
1313
tests/__pycache__
1414
integration_tests/cdk/cdk.out
15+
integration_tests/cdk/stack_outputs.json
16+
integration_tests/cdk/temp_stack_outputs.json

lib/database/PgBouncer.ts

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import {
44
aws_lambda as lambda,
55
aws_secretsmanager as secretsmanager,
66
CustomResource,
7+
Duration,
78
Stack,
89
} from "aws-cdk-lib";
910
import { Construct } from "constructs";
@@ -66,6 +67,7 @@ export class PgBouncer extends Construct {
6667
public readonly pgbouncerSecret: secretsmanager.Secret;
6768
public readonly securityGroup: ec2.SecurityGroup;
6869
public readonly secretUpdateComplete: CustomResource;
70+
public readonly healthCheck: CustomResource;
6971

7072
// The max_connections parameter in PgBouncer determines the maximum number of
7173
// connections to open on the actual database instance. We want that number to
@@ -220,6 +222,47 @@ export class PgBouncer extends Construct {
220222
},
221223
}
222224
);
225+
226+
// Add health check custom resource
227+
const healthCheckFunction = new lambda.Function(
228+
this,
229+
"HealthCheckFunction",
230+
{
231+
runtime: lambda.Runtime.NODEJS_20_X,
232+
handler: "index.handler",
233+
timeout: Duration.minutes(10),
234+
code: lambda.Code.fromAsset(
235+
path.join(__dirname, "lambda/pgbouncer-health-check")
236+
),
237+
description: "PgBouncer health check function",
238+
}
239+
);
240+
241+
// Grant SSM permissions for health check
242+
healthCheckFunction.addToRolePolicy(
243+
new iam.PolicyStatement({
244+
actions: [
245+
"ssm:SendCommand",
246+
"ssm:GetCommandInvocation",
247+
"ssm:DescribeInstanceInformation",
248+
"ssm:ListCommandInvocations",
249+
],
250+
resources: ["*"],
251+
})
252+
);
253+
254+
this.healthCheck = new CustomResource(this, "PgBouncerHealthCheck", {
255+
serviceToken: healthCheckFunction.functionArn,
256+
properties: {
257+
InstanceId: this.instance.instanceId,
258+
// Add timestamp to force re-execution on stack updates
259+
Timestamp: new Date().toISOString(),
260+
},
261+
});
262+
263+
// Ensure health check runs after instance is created but before secret update
264+
this.healthCheck.node.addDependency(this.instance);
265+
this.secretUpdateComplete.node.addDependency(this.healthCheck);
223266
}
224267

225268
private loadUserDataScript(

lib/database/index.ts

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,47 @@ function hasVpc(
2727
}
2828

2929
/**
30-
* An RDS instance with pgSTAC installed. This is a wrapper around the
31-
* `rds.DatabaseInstance` higher-level construct making use
32-
* of the BootstrapPgStac construct.
30+
* An RDS instance with pgSTAC installed and PgBouncer connection pooling.
31+
*
32+
* This construct creates an optimized pgSTAC database setup that includes:
33+
* - RDS PostgreSQL instance with pgSTAC extension
34+
* - PgBouncer connection pooler (enabled by default)
35+
* - Automated health monitoring system
36+
* - Optimized database parameters for the selected instance type
37+
*
38+
* ## Connection Pooling with PgBouncer
39+
*
40+
* By default, this construct deploys PgBouncer as a connection pooler running on
41+
* a dedicated EC2 instance. PgBouncer provides several benefits:
42+
*
43+
* - **Connection Management**: Pools and reuses database connections to reduce overhead
44+
* - **Performance**: Optimizes connection handling for high-traffic applications
45+
* - **Scalability**: Allows more concurrent connections than the RDS instance alone
46+
* - **Health Monitoring**: Includes comprehensive health checks to ensure availability
47+
*
48+
* ### PgBouncer Configuration
49+
* - Pool mode: Transaction-level pooling (default)
50+
* - Maximum client connections: 1000
51+
* - Default pool size: 20 connections per database/user combination
52+
* - Instance type: t3.micro EC2 instance
53+
*
54+
* ### Health Check System
55+
* The construct includes an automated health check system that validates:
56+
* - PgBouncer service is running and listening on port 5432
57+
* - Connection tests to ensure accessibility
58+
* - Cloud-init setup completion before validation
59+
* - Detailed diagnostics for troubleshooting
60+
*
61+
* ### Connection Details
62+
* When PgBouncer is enabled, applications connect through the PgBouncer instance
63+
* rather than directly to RDS. The `pgstacSecret` contains connection information
64+
* pointing to PgBouncer, and the `connectionTarget` property refers to the
65+
* PgBouncer EC2 instance.
66+
*
67+
* To disable PgBouncer and connect directly to RDS, set `addPgbouncer: false`.
68+
*
69+
* This is a wrapper around the `rds.DatabaseInstance` higher-level construct
70+
* making use of the BootstrapPgStac construct.
3371
*/
3472
export class PgStacDatabase extends Construct {
3573
db: rds.DatabaseInstance;
@@ -40,6 +78,7 @@ export class PgStacDatabase extends Construct {
4078
public readonly connectionTarget: rds.IDatabaseInstance | ec2.Instance;
4179
public readonly securityGroup?: ec2.SecurityGroup;
4280
public readonly secretBootstrapper?: CustomResource;
81+
public readonly pgbouncerHealthCheck?: CustomResource;
4382

4483
constructor(scope: Construct, id: string, props: PgStacDatabaseProps) {
4584
super(scope, id);
@@ -186,6 +225,7 @@ export class PgStacDatabase extends Construct {
186225
this.connectionTarget = this._pgBouncerServer.instance;
187226
this.securityGroup = this._pgBouncerServer.securityGroup;
188227
this.secretBootstrapper = this._pgBouncerServer.secretUpdateComplete;
228+
this.pgbouncerHealthCheck = this._pgBouncerServer.healthCheck;
189229
} else {
190230
this.connectionTarget = this.db;
191231
}

0 commit comments

Comments
 (0)