Skip to content

Commit e667a58

Browse files
andrewlecuyerjkatz
authored andcommitted
Orchestrate Repo & Primary DB Deployments
This commit updates cluster initialization logic to ensure the pgBackRest repository is online and ready prior to initializing the primary database container. This ensures that the pgBackRest repository host is always available and online as needed to fully initialize the database cluster, e.g. to create the stanza, initial backup, replicas, etc. This is especially beneficial in environments where latency can lead to delays in pulling and initializing the pgBackRest repository image, therefore leading to database initialization (and therefore attempts to create the stanza, etc.) prior to the repo coming online. To ensure the repo is always initialized prior to initializing the database, the primary deployment is now initially created with 0 replicas. Then, once the Operator detects that the repo host is ready (specifically via a new repo init handler in the Pod controller), the primary database is scaled to 1. From here cluster creation then proceeds in the same manner as before. It should be noted that with this change the repo host also initially started with 0 replicas, but then is immediately scaled up. This is to ensure that all Deployments and supporting resources (services, configMaps, PVCs, etc.) have been created and are ready to go prior to creating any Pods. This prevent potential race conditions, e.g. the repo host being created and becoming ready prior the creation of the primary DB deployment. With this solution we can be sure the primary deployment always exists by the time the repo host becomes ready and attempts to scale it to continue initialization of the cluster.
1 parent e21bc7d commit e667a58

File tree

11 files changed

+152
-83
lines changed

11 files changed

+152
-83
lines changed

apiserver/root.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,7 @@ func UserIsPermittedInNamespace(username, requestedNS string) (iAccess, uAccess
366366
requestedNS); err != nil && !errors.Is(err, ns.ErrNamespaceNotWatched) {
367367
return
368368
}
369+
iAccess = true
369370

370371
//get the pgouser Secret for this username
371372
userSecretName := "pgouser-" + username

conf/postgres-operator/pgo-backrest-repo-template.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
}
1313
},
1414
"spec": {
15-
"replicas": 1,
15+
"replicas": {{.Replicas}},
1616
"selector": {
1717
"matchLabels": {
1818
"name": "{{.Name}}",

config/labels.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,5 +169,5 @@ const LABEL_PGHA_SCOPE = "crunchy-pgha-scope"
169169
const LABEL_PGHA_CONFIGMAP = "pgha-config"
170170
const LABEL_PGHA_BACKUP_TYPE = "pgha-backup-type"
171171
const LABEL_PGHA_ROLE = "role"
172-
const LABEL_PGHA_ROLE_PRIMARY = "primary"
172+
const LABEL_PGHA_ROLE_PRIMARY = "master"
173173
const LABEL_PGHA_ROLE_REPLICA = "replica"

controller/pgcluster/pgclustercontroller.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,8 +137,25 @@ func (c *Controller) processNextItem() bool {
137137

138138
log.Debugf("pgcluster added: %s", cluster.ObjectMeta.Name)
139139

140+
// AddClusterBase creates all deployments for the cluster (in addition to various other supporting
141+
// resources such as services, configMaps, secrets, etc.), but leaves them scaled to 0. This
142+
// ensures all deployments exist as needed to properly orchestrate initialization of the
143+
// cluster, e.g. we need to ensure the primary DB deployment resource has been created before
144+
// bringing the repo deployment online, since that in turn will bring the primary DB online.
140145
clusteroperator.AddClusterBase(c.PgclusterClientset, c.PgclusterClient, &cluster, cluster.ObjectMeta.Namespace)
141146

147+
// Now scale the repo deployment only to ensure it is initialized prior to the primary DB.
148+
// Once the repo is ready, the primary datbase deployment will then also be scaled to 1.
149+
clusterInfo, err := clusteroperator.ScaleClusterDeployments(c.PgclusterClientset,
150+
cluster, 1, false, false, true, false)
151+
if err != nil {
152+
log.Error(err)
153+
c.Queue.AddRateLimited(key)
154+
}
155+
156+
log.Debugf("Scaled pgBackRest repo deployment %s to 1 to proceed with initializing "+
157+
"cluster %s", clusterInfo.PrimaryDeployment, cluster.Name)
158+
142159
return true
143160
}
144161

controller/pod/inithandler.go

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,18 @@ import (
3939
// primary PG pod for a new or restored PG cluster reaches a ready status
4040
func (c *Controller) handleClusterInit(newPod *apiv1.Pod, cluster *crv1.Pgcluster) error {
4141

42-
clusterName := cluster.Name
42+
clusterName := cluster.GetName()
43+
44+
// first check to see if the update is a repo pod. If so, then call repo init handler and
45+
// return since the other handlers are only applicable to PG pods
46+
if isBackRestRepoPod(newPod) {
47+
log.Debugf("Pod Controller: calling pgBackRest repo init for cluster %s", clusterName)
48+
if err := c.handleBackRestRepoInit(cluster); err != nil {
49+
log.Error(err)
50+
return err
51+
}
52+
return nil
53+
}
4354

4455
// handle common tasks for initializing a cluster, whether due to bootstap or reinitialization
4556
// following a restore, or if a regular or standby cluster
@@ -69,6 +80,23 @@ func (c *Controller) handleClusterInit(newPod *apiv1.Pod, cluster *crv1.Pgcluste
6980
}
7081
}
7182

83+
// handleBackRestRepoInit handles cluster initialization tasks that must be executed once
84+
// as a result of an update to a cluster's pgBackRest repostiroy pod
85+
func (c *Controller) handleBackRestRepoInit(cluster *crv1.Pgcluster) error {
86+
87+
clusterInfo, err := clusteroperator.ScaleClusterDeployments(c.PodClientset, *cluster, 1,
88+
true, false, false, false)
89+
if err != nil {
90+
log.Error(err)
91+
return err
92+
}
93+
94+
log.Debugf("Pod Controller: scaled primary deployment %s to 1 to proceed with initializing "+
95+
"cluster %s", clusterInfo.PrimaryDeployment, cluster.Name)
96+
97+
return nil
98+
}
99+
72100
// handleCommonInit is resposible for handling common initilization tasks for a PG cluster
73101
// regardless of the specific type of cluster (e.g. regualar or standby) or the reason the
74102
// cluster is being initialized (initial bootstrap or restore)
@@ -169,7 +197,7 @@ func (c *Controller) handleStandbyInit(cluster *crv1.Pgcluster) error {
169197
//
170198

171199
// now scale any replicas deployments to 1
172-
clusteroperator.ScaleClusterDeployments(c.PodClientset, *cluster, 1, false, true, false)
200+
clusteroperator.ScaleClusterDeployments(c.PodClientset, *cluster, 1, false, true, false, false)
173201

174202
// Proceed with stanza-creation of this is not a standby cluster, or if its
175203
// a standby cluster that does not have "s3" storage only enabled.

controller/pod/podcontroller.go

Lines changed: 74 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -84,14 +84,33 @@ func (c *Controller) onUpdate(oldObj, newObj interface{}) {
8484
// Lookup the pgcluster CR for PG cluster associated with this Pod. Since a 'pg-cluster'
8585
// label was found on updated Pod, this lookup should always succeed.
8686
clusterName := newPodLabels[config.LABEL_PG_CLUSTER]
87+
namespace := newPod.ObjectMeta.Namespace
8788
cluster := crv1.Pgcluster{}
88-
_, err := kubeapi.Getpgcluster(c.PodClient, &cluster, clusterName,
89-
newPod.ObjectMeta.Namespace)
89+
_, err := kubeapi.Getpgcluster(c.PodClient, &cluster, clusterName, namespace)
9090
if err != nil {
9191
log.Error(err.Error())
9292
return
9393
}
9494

95+
// For the following upgrade and cluster initialization scenarios we only care about updates
96+
// where the database container within the pod is becoming ready. We can therefore return
97+
// at this point if this condition is false.
98+
if cluster.Status.State != crv1.PgclusterStateInitialized &&
99+
(isDBContainerBecomingReady(oldPod, newPod) ||
100+
isBackRestRepoBecomingReady(oldPod, newPod)) {
101+
if err := c.handleClusterInit(newPod, &cluster); err != nil {
102+
log.Error(err)
103+
return
104+
}
105+
return
106+
}
107+
108+
// the handlers called below are only applicable to PG pods when the cluster is
109+
// in an initialized status
110+
if cluster.Status.State != crv1.PgclusterStateInitialized || !isPostgresPod(newPod) {
111+
return
112+
}
113+
95114
// Handle the "role" label change from "replica" to "master" following a failover. This
96115
// logic is only triggered when the cluster has already been initialized, which implies
97116
// a failover or switchover has occurred.
@@ -104,8 +123,7 @@ func (c *Controller) onUpdate(oldObj, newObj interface{}) {
104123
}
105124
}
106125

107-
if cluster.Status.State == crv1.PgclusterStateInitialized &&
108-
isPromotedStandby(oldPod, newPod) {
126+
if isPromotedStandby(oldPod, newPod) {
109127
log.Debugf("Pod Controller: standby pod %s in namespace %s promoted, calling standby pod "+
110128
"promotion handler", newPod.Name, newPod.Namespace)
111129
if err := c.handleStandbyPromotion(newPod, cluster); err != nil {
@@ -114,32 +132,17 @@ func (c *Controller) onUpdate(oldObj, newObj interface{}) {
114132
}
115133
}
116134

117-
// For the following upgrade and cluster initialization scenarios we only care about updates
118-
// where the database container within the pod is becoming ready. We can therefore return
119-
// at this point if this condition is false.
120-
if !isDBContainerBecomingReady(oldPod, newPod) {
121-
return
122-
}
123-
124135
// First handle pod update as needed if the update was part of an ongoing upgrade
125136
if cluster.Labels[config.LABEL_MINOR_UPGRADE] == config.LABEL_UPGRADE_IN_PROGRESS {
126-
log.Debugf("Pod Controller: upgrade pod %s now ready, calling pod upgrade "+
127-
"handler", newPod.Name, newPod.Namespace)
137+
log.Debugf("Pod Controller: upgrade pod %s (namespace %s) now ready, calling pod upgrade "+
138+
"handler", newPod.Name, namespace)
128139
if err := c.handleUpgradePodUpdate(newPod, &cluster); err != nil {
129140
log.Error(err)
130141
return
131142
}
132143
}
133144

134-
// Handle postgresql pod updates as needed for cluster initialization
135-
if cluster.Status.State != crv1.PgclusterStateInitialized && isPostgresPrimaryPod(newPod) {
136-
log.Debugf("Pod Controller: pg pod %s now ready in an unintialized cluster, calling "+
137-
"cluster init handler", newPod.Name, newPod.Namespace)
138-
if err := c.handleClusterInit(newPod, &cluster); err != nil {
139-
log.Error(err)
140-
return
141-
}
142-
}
145+
return
143146
}
144147

145148
// onDelete is called when a pgcluster is deleted
@@ -166,25 +169,43 @@ func (c *Controller) AddPodEventHandler() {
166169
log.Debugf("Pod Controller: added event handler to informer")
167170
}
168171

169-
// isDBContainerBecomingReady checks to see if the Pod update shows that the Pod has
170-
// transitioned from an 'unready' status to a 'ready' status.
171-
func isDBContainerBecomingReady(oldPod, newPod *apiv1.Pod) bool {
172-
if !isPostgresPod(newPod) {
172+
// isBackRestRepoBecomingReady checks to see if the Pod update shows that the BackRest
173+
// repo Pod has transitioned from an 'unready' status to a 'ready' status.
174+
func isBackRestRepoBecomingReady(oldPod, newPod *apiv1.Pod) bool {
175+
if !isBackRestRepoPod(newPod) {
173176
return false
174177
}
175-
var oldDatabaseStatus bool
176-
// first see if the old version of the pod was not ready
178+
return isContainerBecomingReady("database", oldPod, newPod)
179+
}
180+
181+
// isBackRestRepoPod determines whether or not a pod is a pgBackRest repository Pod. This is
182+
// determined by checking to see if the 'pgo-backrest-repo' label is present on the Pod (also,
183+
// this controller will only process pod with the 'vendor=crunchydata' label, so that label is
184+
// assumed to be present), specifically because this label will only be included on pgBackRest
185+
// repository Pods.
186+
func isBackRestRepoPod(newpod *apiv1.Pod) bool {
187+
188+
_, backrestRepoLabelExists := newpod.ObjectMeta.Labels[config.LABEL_PGO_BACKREST_REPO]
189+
190+
return backrestRepoLabelExists
191+
}
192+
193+
// isContainerBecomingReady determines whether or not that container specified is moving
194+
// from an unready status to a ready status.
195+
func isContainerBecomingReady(containerName string, oldPod, newPod *apiv1.Pod) bool {
196+
var oldContainerStatus bool
197+
// first see if the old version of the container was not ready
177198
for _, v := range oldPod.Status.ContainerStatuses {
178-
if v.Name == "database" {
179-
oldDatabaseStatus = v.Ready
199+
if v.Name == containerName {
200+
oldContainerStatus = v.Ready
180201
break
181202
}
182203
}
183-
// if the old version of the pod was not ready, now check if the
204+
// if the old version of the container was not ready, now check if the
184205
// new version is ready
185-
if !oldDatabaseStatus {
206+
if !oldContainerStatus {
186207
for _, v := range newPod.Status.ContainerStatuses {
187-
if v.Name == "database" {
208+
if v.Name == containerName {
188209
if v.Ready {
189210
return true
190211
}
@@ -194,19 +215,26 @@ func isDBContainerBecomingReady(oldPod, newPod *apiv1.Pod) bool {
194215
return false
195216
}
196217

197-
// isPostgresPrimaryPod determines whether or not the specific Pod provided is the primary database
198-
// Pod within a PG cluster. This is done by checking to see if the "role" label for the Pod is set
199-
// to either "master", as set by Patroni to identify the current primary, or "promoted", as set by
200-
// Patroni when promoting a replica to be the new primary.
201-
func isPostgresPrimaryPod(newPod *apiv1.Pod) bool {
218+
// isDBContainerBecomingReady checks to see if the Pod update shows that the Pod has
219+
// transitioned from an 'unready' status to a 'ready' status.
220+
func isDBContainerBecomingReady(oldPod, newPod *apiv1.Pod) bool {
202221
if !isPostgresPod(newPod) {
203222
return false
204223
}
205-
if newPod.ObjectMeta.Labels[config.LABEL_PGHA_ROLE] == "master" ||
206-
newPod.ObjectMeta.Labels[config.LABEL_PGHA_ROLE] == "promoted" {
207-
return true
208-
}
209-
return false
224+
return isContainerBecomingReady("database", oldPod, newPod)
225+
}
226+
227+
// isPostgresPod determines whether or not a pod is a PostreSQL Pod, specifically either the
228+
// primary or a replica pod within a PG cluster. This is determined by checking to see if the
229+
// 'pgo-pg-database' label is present on the Pod (also, this controller will only process pod with
230+
// the 'vendor=crunchydata' label, so that label is assumed to be present), specifically because
231+
// this label will only be included on primary and replica PostgreSQL database pods (and will be
232+
// present as soon as the deployment and pod is created).
233+
func isPostgresPod(newpod *apiv1.Pod) bool {
234+
235+
_, pgDatabaseLabelExists := newpod.ObjectMeta.Labels[config.LABEL_PG_DATABASE]
236+
237+
return pgDatabaseLabelExists
210238
}
211239

212240
// isPromotedPostgresPod determines if the Pod update is the result of the promotion of the pod
@@ -225,11 +253,11 @@ func isPromotedPostgresPod(oldPod, newPod *apiv1.Pod) bool {
225253
return false
226254
}
227255

228-
// isPromotedPostgresPod determines if the Pod update is the result of the promotion of the pod
256+
// isPromotedStandby determines if the Pod update is the result of the promotion of the standby pod
229257
// from a replica to the primary within a PG cluster. This is determined by comparing the 'role'
230258
// label from the old Pod to the 'role' label in the New pod, specifically to determine if the
231-
// label has changed from "promoted" to "master" (this is the label change that will be performed
232-
// by Patroni when promoting a pod).
259+
// label has changed from "standby_leader" to "master" (this is the label change that will be
260+
// performed by Patroni when promoting a pod).
233261
func isPromotedStandby(oldPod, newPod *apiv1.Pod) bool {
234262
if !isPostgresPod(newPod) {
235263
return false
@@ -243,16 +271,3 @@ func isPromotedStandby(oldPod, newPod *apiv1.Pod) bool {
243271
}
244272
return false
245273
}
246-
247-
// isPostgresPod determines whether or not a pod is a PostreSQL Pod, specifically either the
248-
// primary or a replica pod within a PG cluster. This is determined by checking to see if the
249-
// 'pgo-pg-database' label is present on the Pod (also, this controller will only process pod with
250-
// the 'vendor=crunchydata' label, so that label is assumed to be present), specifically because
251-
// this label will only be included on primary and replica PostgreSQL database pods (and will be
252-
// present as soon as the deployment and pod is created).
253-
func isPostgresPod(newpod *apiv1.Pod) bool {
254-
255-
_, pgDatabaseLabelExists := newpod.ObjectMeta.Labels[config.LABEL_PG_DATABASE]
256-
257-
return pgDatabaseLabelExists
258-
}

controller/pod/promotionhandler.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ func (c *Controller) handleStartupInit(cluster crv1.Pgcluster) error {
8585
}
8686

8787
// now scale any replicas deployments to 1
88-
clusteroperator.ScaleClusterDeployments(c.PodClientset, cluster, 1, false, true, false)
88+
clusteroperator.ScaleClusterDeployments(c.PodClientset, cluster, 1, false, true, false, false)
8989

9090
return nil
9191
}

installers/ansible/roles/pgo-operator/files/pgo-configs/pgo-backrest-repo-template.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
}
1313
},
1414
"spec": {
15-
"replicas": 1,
15+
"replicas": {{.Replicas}},
1616
"selector": {
1717
"matchLabels": {
1818
"name": "{{.Name}}",

operator/backrest/repo.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ type RepoDeploymentTemplateFields struct {
5555
PodAntiAffinity string
5656
PodAntiAffinityLabelName string
5757
PodAntiAffinityLabelValue string
58+
Replicas int
5859
}
5960

6061
type RepoServiceTemplateFields struct {
@@ -63,7 +64,8 @@ type RepoServiceTemplateFields struct {
6364
Port string
6465
}
6566

66-
func CreateRepoDeployment(clientset *kubernetes.Clientset, namespace string, cluster *crv1.Pgcluster, createPVC bool) error {
67+
func CreateRepoDeployment(clientset *kubernetes.Clientset, namespace string, cluster *crv1.Pgcluster, createPVC bool,
68+
replicas int) error {
6769

6870
var b bytes.Buffer
6971

@@ -119,6 +121,7 @@ func CreateRepoDeployment(clientset *kubernetes.Clientset, namespace string, clu
119121
Name: serviceName,
120122
ClusterName: cluster.Name,
121123
SecurityContext: util.GetPodSecurityContext(cluster.Spec.BackrestStorage.GetSupplementalGroups()),
124+
Replicas: replicas,
122125
PodAntiAffinity: operator.GetPodAntiAffinity(cluster,
123126
crv1.PodAntiAffinityDeploymentPgBackRest, cluster.Spec.PodAntiAffinity.PgBackRest),
124127
PodAntiAffinityLabelName: config.LABEL_POD_ANTI_AFFINITY,

operator/cluster/clone.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,8 @@ func cloneStep2(clientset *kubernetes.Clientset, client *rest.RESTClient, restCo
306306
}
307307

308308
// create the deployment without creating the PVC given we've already done that
309-
if err := backrest.CreateRepoDeployment(clientset, namespace, &targetPgcluster, false); err != nil {
309+
if err := backrest.CreateRepoDeployment(clientset, namespace, &targetPgcluster, false,
310+
1); err != nil {
310311
log.Error(err)
311312
// publish a failure event
312313
errorMessage := fmt.Sprintf("Could not create new pgbackrest repo: %s", err.Error())

0 commit comments

Comments
 (0)