Skip to content

Commit f69aa96

Browse files
committed
feat(RHIDP-11180): Ensure the test instance of RHDH can be deployed with up to 10 replicas
Signed-off-by: Pavel Macík <pavel.macik@gmail.com> Helped-by: Cursor
1 parent 7689ba4 commit f69aa96

File tree

5 files changed

+268
-25
lines changed

5 files changed

+268
-25
lines changed

ci-scripts/rhdh-setup/deploy.sh

Lines changed: 81 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@ export KEYCLOAK_LOG_LEVEL="${KEYCLOAK_LOG_LEVEL:-WARN}"
7676
export PSQL_LOG="${PSQL_LOG:-true}"
7777
export RHDH_METRIC="${RHDH_METRIC:-true}"
7878
export PSQL_EXPORT="${PSQL_EXPORT:-false}"
79+
export ENABLE_PGBOUNCER="${ENABLE_PGBOUNCER:-true}"
80+
export PGBOUNCER_REPLICAS="${PGBOUNCER_REPLICAS:-2}"
7981
export LOG_MIN_DURATION_STATEMENT="${LOG_MIN_DURATION_STATEMENT:-65}"
8082
export LOG_MIN_DURATION_SAMPLE="${LOG_MIN_DURATION_SAMPLE:-50}"
8183
export LOG_STATEMENT_SAMPLE_RATE="${LOG_STATEMENT_SAMPLE_RATE:-0.7}"
@@ -85,26 +87,36 @@ export INSTALL_METHOD=helm
8587
TMP_DIR=$(python3 -c 'import os, sys; print(os.path.realpath(sys.argv[1]))' "${TMP_DIR:-.tmp}")
8688
mkdir -p "${TMP_DIR}"
8789

88-
wait_to_start_in_namespace() {
90+
wait_to_exist() {
8991
namespace=${1:-${RHDH_NAMESPACE}}
9092
resource=${2:-deployment}
9193
name=${3:-name}
9294
initial_timeout=${4:-300}
93-
wait_timeout=${5:-300}
9495
rn=$resource/$name
95-
description=${6:-$rn}
96+
description=${5:-$rn}
9697
timeout_timestamp=$(python3 -c "from datetime import datetime, timedelta; t_add=int('$initial_timeout'); print(int((datetime.now() + timedelta(seconds=t_add)).timestamp()))")
9798

9899
interval=10s
99100
while ! /bin/bash -c "$cli -n $namespace get $rn -o name"; do
100101
if [ "$(date "+%s")" -gt "$timeout_timestamp" ]; then
101-
log_error "Timeout waiting for $description to start"
102+
log_error "Timeout waiting for $description to exist"
102103
exit 1
103104
else
104-
log_info "Waiting $interval for $description to start..."
105+
log_info "Waiting $interval for $description to exist..."
105106
sleep "$interval"
106107
fi
107108
done
109+
}
110+
111+
wait_to_start_in_namespace() {
112+
namespace=${1:-${RHDH_NAMESPACE}}
113+
resource=${2:-deployment}
114+
name=${3:-name}
115+
initial_timeout=${4:-300}
116+
wait_timeout=${5:-300}
117+
rn=$resource/$name
118+
description=${6:-$rn}
119+
wait_to_exist "$namespace" "$resource" "$name" "$initial_timeout" "$description"
108120
$cli -n "$namespace" rollout status "$rn" --timeout="${wait_timeout}s"
109121
return $?
110122
}
@@ -169,6 +181,21 @@ wait_to_start() {
169181
return $?
170182
}
171183

184+
restart_rhdh_deployment() {
185+
replica_count=${1:-1}
186+
if [ "$INSTALL_METHOD" == "helm" ]; then
187+
rhdh_deployment="${RHDH_HELM_RELEASE_NAME}-developer-hub"
188+
elif [ "$INSTALL_METHOD" == "olm" ]; then
189+
rhdh_deployment="backstage-developer-hub"
190+
fi
191+
$clin scale deployment "$rhdh_deployment" --replicas=0
192+
for ((replicas = 1; replicas <= replica_count; replicas++)); do
193+
log_info "Scaling developer-hub deployment to $replicas/$replica_count replicas"
194+
$clin scale deployment "$rhdh_deployment" --replicas=$replicas
195+
wait_to_start deployment "$rhdh_deployment" 300 300
196+
done
197+
}
198+
172199
label() {
173200
namespace=$1
174201
resource=$2
@@ -227,6 +254,9 @@ install() {
227254
install_workflows
228255
fi
229256
psql_debug
257+
258+
log_info "Scaling RHDH deployment to $RHDH_DEPLOYMENT_REPLICAS replicas"
259+
restart_rhdh_deployment "$RHDH_DEPLOYMENT_REPLICAS"
230260
}
231261

232262
keycloak_install() {
@@ -466,12 +496,20 @@ install_rhdh_with_helm() {
466496
log_info "Applying pod affinity for multiple replicas to schedule on same node"
467497
yq -i '.upstream.backstage |= . + load("template/backstage/helm/pod-affinity-patch.yaml")' "$TMP_DIR/chart-values.temp.yaml"
468498
fi
499+
# Connection sizing parameters
500+
CLIENT_CONNECTIONS_PER_RHDH_INSTANCE=50 # Expected DB connections per RHDH replica
501+
DB_CONNECTIONS_HEADROOM_PER_RHDH_INSTANCE=5 # Extra headroom per RHDH replica
502+
DB_CONNECTIONS_ADMIN_HEADROOM=20 # Reserved for admin/monitoring connections
503+
504+
export RHDH_DB_MAX_CONNECTIONS
505+
RHDH_DB_MAX_CONNECTIONS=$(bc <<<"$RHDH_DEPLOYMENT_REPLICAS * ($CLIENT_CONNECTIONS_PER_RHDH_INSTANCE + $DB_CONNECTIONS_HEADROOM_PER_RHDH_INSTANCE)")
469506
envsubst \
470507
'${OPENSHIFT_APP_DOMAIN} \
471508
${RHDH_HELM_RELEASE_NAME} \
472509
${RHDH_HELM_CHART} \
473510
${RHDH_DEPLOYMENT_REPLICAS} \
474511
${RHDH_DB_REPLICAS} \
512+
${RHDH_DB_MAX_CONNECTIONS} \
475513
${RHDH_DB_STORAGE} \
476514
${RHDH_IMAGE_REGISTRY} \
477515
${RHDH_IMAGE_REPO} \
@@ -496,10 +534,43 @@ install_rhdh_with_helm() {
496534
yq -i '.upstream.backstage.readinessProbe |= {"httpGet":{"path":"/healthcheck","port":7007,"scheme":"HTTP"},"initialDelaySeconds":30,"timeoutSeconds":2,"periodSeconds":300,"successThreshold":1,"failureThreshold":3}' "$TMP_DIR/chart-values.yaml"
497535
yq -i '.upstream.backstage.livenessProbe |= {"httpGet":{"path":"/healthcheck","port":7007,"scheme":"HTTP"},"initialDelaySeconds":30,"timeoutSeconds":2,"periodSeconds":300,"successThreshold":1,"failureThreshold":3}' "$TMP_DIR/chart-values.yaml"
498536
fi
537+
538+
# Configure database host (PgBouncer or direct PostgreSQL)
539+
if ${ENABLE_PGBOUNCER}; then
540+
log_info "PgBouncer enabled - configuring Backstage to connect via PgBouncer"
541+
yq -i '.upstream.backstage.appConfig.database.connection.host = "'"${RHDH_HELM_RELEASE_NAME}"'-pgbouncer"' "$TMP_DIR/chart-values.yaml"
542+
fi
543+
499544
#shellcheck disable=SC2086
500-
helm upgrade "${RHDH_HELM_RELEASE_NAME}" -i ${RHDH_HELM_REPO} ${version_arg} -n "${RHDH_NAMESPACE}" --values "$TMP_DIR/chart-values.yaml"
501-
wait_to_start statefulset "${RHDH_HELM_RELEASE_NAME}-postgresql-read" 300 300
502-
wait_to_start deployment "${RHDH_HELM_RELEASE_NAME}-developer-hub" 300 300
545+
helm upgrade "${RHDH_HELM_RELEASE_NAME}" -i "${RHDH_HELM_REPO}" ${version_arg} -n "${RHDH_NAMESPACE}" --values "$TMP_DIR/chart-values.yaml"
546+
547+
# Patch deployment strategy to start replicas one by one
548+
log_info "Patching RHDH deployment strategy for sequential replica startup"
549+
wait_to_exist "${RHDH_NAMESPACE}" deployment "${RHDH_HELM_RELEASE_NAME}-developer-hub" 300 "RHDH deployment"
550+
$clin patch deployment "${RHDH_HELM_RELEASE_NAME}-developer-hub" --type='merge' -p '{"spec":{"strategy":{"type":"RollingUpdate","rollingUpdate":{"maxUnavailable":0,"maxSurge":1}}}}'
551+
552+
wait_to_start statefulset "${RHDH_HELM_RELEASE_NAME}-postgresql-primary" 300 300
553+
554+
# Deploy PgBouncer if enabled
555+
if ${ENABLE_PGBOUNCER}; then
556+
log_info "Deploying PgBouncer connection pooler"
557+
POSTGRESQL_ADMIN_PASSWORD=$($clin get secret "${RHDH_HELM_RELEASE_NAME}-postgresql" -o jsonpath='{.data.postgres-password}' | base64 -d)
558+
export POSTGRESQL_ADMIN_PASSWORD PGBOUNCER_MAX_CLIENT_CONNECTIONS PGBOUNCER_DEFAULT_POOL_SIZE PGBOUNCER_MAX_DB_CONNECTIONS PGBOUNCER_MAX_USER_CONNECTIONS
559+
560+
# Each PgBouncer instance is sized to handle ALL client connections (for HA/failover)
561+
PGBOUNCER_MAX_CLIENT_CONNECTIONS=$(bc <<<"scale=0; $RHDH_DEPLOYMENT_REPLICAS * $CLIENT_CONNECTIONS_PER_RHDH_INSTANCE" | sed 's,\..*,,')
562+
PGBOUNCER_DEFAULT_POOL_SIZE=$(bc <<<"scale=0; $PGBOUNCER_MAX_CLIENT_CONNECTIONS / 5" | sed 's,\..*,,')
563+
# Backend connections per instance - divided by PGBOUNCER_REPLICAS to ensure total doesn't exceed PostgreSQL max_connections
564+
PGBOUNCER_MAX_DB_CONNECTIONS=$(bc <<<"scale=0; ($RHDH_DB_MAX_CONNECTIONS - $DB_CONNECTIONS_ADMIN_HEADROOM) / $PGBOUNCER_REPLICAS" | sed 's,\..*,,')
565+
PGBOUNCER_MAX_USER_CONNECTIONS=$(bc <<<"scale=0; $PGBOUNCER_MAX_DB_CONNECTIONS * 1.2" | sed 's,\..*,,')
566+
567+
envsubst '${RHDH_HELM_RELEASE_NAME} ${RHDH_NAMESPACE} ${POSTGRESQL_ADMIN_PASSWORD} ${PGBOUNCER_REPLICAS} ${PGBOUNCER_MAX_CLIENT_CONNECTIONS} ${PGBOUNCER_DEFAULT_POOL_SIZE} ${PGBOUNCER_MAX_DB_CONNECTIONS} ${PGBOUNCER_MAX_USER_CONNECTIONS}' \
568+
<template/backstage/helm/pgbouncer.yaml >"$TMP_DIR/pgbouncer.yaml"
569+
$clin apply -f "$TMP_DIR/pgbouncer.yaml"
570+
wait_to_start deployment "${RHDH_HELM_RELEASE_NAME}-pgbouncer" 300 300
571+
fi
572+
573+
restart_rhdh_deployment 1
503574
return $?
504575
}
505576

@@ -545,11 +616,9 @@ psql_debug() {
545616
if [ "$INSTALL_METHOD" == "helm" ]; then
546617
psql_db_ss="${RHDH_HELM_RELEASE_NAME}-postgresql-primary"
547618
psql_db="${psql_db_ss}-0"
548-
rhdh_deployment="${RHDH_HELM_RELEASE_NAME}-developer-hub"
549619
elif [ "$INSTALL_METHOD" == "olm" ]; then
550620
psql_db_ss=backstage-psql-developer-hub
551621
psql_db="${psql_db_ss}-0"
552-
rhdh_deployment=backstage-developer-hub
553622
fi
554623
if ${PSQL_LOG}; then
555624
log_info "Setting up PostgreSQL logging"
@@ -595,8 +664,7 @@ psql_debug() {
595664

596665
if ${PSQL_LOG} || ${PSQL_EXPORT}; then
597666
log_info "Restarting RHDH..."
598-
$clin rollout restart deployment/"$rhdh_deployment"
599-
wait_to_start deployment "$rhdh_deployment" 300 300
667+
restart_rhdh_deployment 1
600668
fi
601669

602670
if ${PSQL_EXPORT}; then
@@ -629,7 +697,7 @@ setup_monitoring() {
629697
fi
630698

631699
if [ "$(yq '.prometheusK8s.volumeClaimTemplate' "$config")" == "null" ]; then
632-
yq -i '.prometheusK8s = {"volumeClaimTemplate":{"spec":{"storageClassName":"gp3-csi","volumeMode":"Filesystem","resources":{"requests":{"storage":"30Gi"}}}}}' "$config"
700+
yq -i '.prometheusK8s = {"volumeClaimTemplate":{"spec":{"storageClassName":"gp3-csi","volumeMode":"Filesystem","resources":{"requests":{"storage":"60Gi"}}}}}' "$config"
633701
update_config=1
634702
fi
635703

ci-scripts/rhdh-setup/template/backstage/helm/chart-values.image-override.yaml

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,16 +25,16 @@ orchestrator:
2525
upstream:
2626
backstage:
2727
appConfig:
28-
database:
29-
connection:
30-
password: "${POSTGRESQL_ADMIN_PASSWORD}"
31-
user: postgres
32-
auth:
33-
externalAccess:
34-
- type: legacy
35-
options:
36-
secret: ${BACKEND_SECRET}
37-
subject: legacy-catalog
28+
database:
29+
connection:
30+
password: "${POSTGRESQL_ADMIN_PASSWORD}"
31+
user: postgres
32+
auth:
33+
externalAccess:
34+
- type: legacy
35+
options:
36+
secret: ${BACKEND_SECRET}
37+
subject: legacy-catalog
3838
containerPorts:
3939
backend: 7007
4040
extraAppConfig:
@@ -101,12 +101,23 @@ upstream:
101101
primary:
102102
containerSecurityContext:
103103
enabled: false
104+
resources:
105+
requests:
106+
cpu: 1
107+
memory: 1Gi
108+
limits:
109+
cpu: 2
110+
memory: 5Gi
111+
extendedConfiguration: |
112+
max_connections = ${RHDH_DB_MAX_CONNECTIONS}
104113
extraEnvVars:
105114
- name: POSTGRESQL_ADMIN_PASSWORD
106115
valueFrom:
107116
secretKeyRef:
108117
key: postgres-password
109118
name: "{{ .Release.Name }}-postgresql"
119+
- name: POSTGRESQL_MAX_CONNECTIONS
120+
value: "${RHDH_DB_MAX_CONNECTIONS}"
110121
persistence:
111122
enabled: true
112123
mountPath: /var/lib/pgsql/data
@@ -120,12 +131,16 @@ upstream:
120131
readReplicas:
121132
containerSecurityContext:
122133
enabled: false
134+
extendedConfiguration: |
135+
max_connections = 500
123136
extraEnvVars:
124137
- name: POSTGRESQL_ADMIN_PASSWORD
125138
valueFrom:
126139
secretKeyRef:
127140
key: replication-password
128141
name: "{{ .Release.Name }}-postgresql"
142+
- name: POSTGRESQL_MAX_CONNECTIONS
143+
value: "500"
129144
podSecurityContext:
130145
enabled: false
131146
replicaCount: ${RHDH_DB_REPLICAS}

ci-scripts/rhdh-setup/template/backstage/helm/chart-values.yaml

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ upstream:
7676
value: "${RHDH_LOG_LEVEL}"
7777
- name: NODE_TLS_REJECT_UNAUTHORIZED
7878
value: "0"
79-
replicas: ${RHDH_DEPLOYMENT_REPLICAS}
79+
replicas: 1
8080
metrics:
8181
serviceMonitor:
8282
enabled: ${RHDH_METRIC}
@@ -96,12 +96,23 @@ upstream:
9696
primary:
9797
containerSecurityContext:
9898
enabled: false
99+
resources:
100+
requests:
101+
cpu: 1
102+
memory: 1Gi
103+
limits:
104+
cpu: 2
105+
memory: 5Gi
106+
extendedConfiguration: |
107+
max_connections = ${RHDH_DB_MAX_CONNECTIONS}
99108
extraEnvVars:
100109
- name: POSTGRESQL_ADMIN_PASSWORD
101110
valueFrom:
102111
secretKeyRef:
103112
key: postgres-password
104113
name: "{{ .Release.Name }}-postgresql"
114+
- name: POSTGRESQL_MAX_CONNECTIONS
115+
value: "${RHDH_DB_MAX_CONNECTIONS}"
105116
persistence:
106117
enabled: true
107118
mountPath: /var/lib/pgsql/data
@@ -115,12 +126,16 @@ upstream:
115126
readReplicas:
116127
containerSecurityContext:
117128
enabled: false
129+
extendedConfiguration: |
130+
max_connections = 650
118131
extraEnvVars:
119132
- name: POSTGRESQL_ADMIN_PASSWORD
120133
valueFrom:
121134
secretKeyRef:
122135
key: replication-password
123136
name: "{{ .Release.Name }}-postgresql"
137+
- name: POSTGRESQL_MAX_CONNECTIONS
138+
value: "650"
124139
podSecurityContext:
125140
enabled: false
126141
replicaCount: ${RHDH_DB_REPLICAS}

0 commit comments

Comments
 (0)