Skip to content

Commit 4203302

Browse files
flamingdumpsterjkatz
authored andcommitted
Update pattern of handling minor PostgreSQL version upgrades
This modifies how PostgreSQL minor version upgrades are applied across a cluster to help minimize downtime. The Operator will have each replica in a cluster updated sequentially to the requested minor version before attempting to update the primary. When `autofail` is enabled, the cluster will promote one of the replicas, which at that point will already be upgraded to the updated version of PostgreSQL.
1 parent d689823 commit 4203302

File tree

13 files changed

+530
-104
lines changed

13 files changed

+530
-104
lines changed

apis/cr/v1/common.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,11 @@ type PgContainerResources struct {
5858
LimitsCPU string `json:"limitscpu"`
5959
}
6060

61+
// CompletedStatus -
6162
const CompletedStatus = "completed"
63+
// InProgressStatus -
64+
const InProgressStatus = "in progress"
65+
// SubmittedStatus -
6266
const SubmittedStatus = "submitted"
6367

6468
// JobCompletedStatus ....

apiserver/failoverservice/failoverimpl.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -261,10 +261,10 @@ func preferredNode(nodes []string, targetNode string) bool {
261261

262262
func checkAutofail(cluster *crv1.Pgcluster) error {
263263
var err error
264-
labels := cluster.ObjectMeta.Labels
265-
failLabel := labels[config.LABEL_AUTOFAIL]
266-
if failLabel == "true" {
264+
265+
if util.IsAutofailEnabled(cluster) {
267266
return errors.New("autofail flag is set to true, manual failover requires autofail to be set to false, use pgo update to disable autofail.")
268267
}
268+
269269
return err
270270
}

apiserver/upgradeservice/upgradeimpl.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ func CreateUpgrade(request *msgs.CreateUpgradeRequest, ns string) msgs.CreateUpg
7878
spec.Status = "requested"
7979
spec.Parameters = make(map[string]string)
8080
spec.Parameters[config.LABEL_PG_CLUSTER] = clusterName
81-
spec.Name = clusterName + "-minor-upgrade"
81+
spec.Name = clusterName + "-" + config.LABEL_MINOR_UPGRADE
8282
spec.Namespace = ns
8383
labels := make(map[string]string)
8484
labels[config.LABEL_PG_CLUSTER] = clusterName
@@ -104,7 +104,7 @@ func CreateUpgrade(request *msgs.CreateUpgradeRequest, ns string) msgs.CreateUpg
104104
}
105105
}
106106

107-
//validate the cluster name
107+
//validate the cluster name and ensure autofail is turned off for each cluster.
108108
cl := crv1.Pgcluster{}
109109
found, err = kubeapi.Getpgcluster(apiserver.RESTClient,
110110
&cl, clusterName, ns)
@@ -113,7 +113,7 @@ func CreateUpgrade(request *msgs.CreateUpgradeRequest, ns string) msgs.CreateUpg
113113
response.Status.Msg = clusterName + " is not a valid pgcluster"
114114
return response
115115
}
116-
116+
117117
//figure out what version we are upgrading to
118118
imageToUpgradeTo := apiserver.Pgo.Cluster.CCPImageTag
119119
if request.CCPImageTag != "" {

config/labels.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,13 @@ const LABEL_DELETE_BACKUPS = "delete-backups"
6060
const LABEL_IS_REPLICA = "is-replica"
6161
const LABEL_IS_BACKUP = "is-backup"
6262

63+
const LABEL_MINOR_UPGRADE = "minor-upgrade"
64+
const LABEL_UPGRADE_IN_PROGRESS = "upgrade-in-progress"
65+
const LABEL_UPGRADE_COMPLETED = "upgrade-complete"
66+
const LABEL_UPGRADE_REPLICA = "upgrade-replicas"
67+
const LABEL_UPGRADE_PRIMARY = "upgrade-primary"
68+
const LABEL_UPGRADE_BACKREST = "upgrade-backrest"
69+
6370
const LABEL_BACKREST = "pgo-backrest"
6471
const LABEL_BACKREST_JOB = "pgo-backrest-job"
6572
const LABEL_BACKREST_RESTORE = "pgo-backrest-restore"

controller/podcontroller.go

Lines changed: 81 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,16 @@ func (c *PodController) onUpdate(oldObj, newObj interface{}) {
115115
return
116116
}
117117

118+
// check here if cluster has an upgrade in progress flag set.
119+
clusterInMinorUpgrade := pgcluster.Labels[config.LABEL_MINOR_UPGRADE] == config.LABEL_UPGRADE_IN_PROGRESS
120+
// log.Debugf("Cluster: %s Minor Upgrade: %s ", clusterName, clusterInMinorUpgrade)
121+
122+
// have a pod coming back up from upgrade and is ready - time to kick off the next pod.
123+
if clusterInMinorUpgrade && isUpgradedPostgresPod(newpod, oldpod) {
124+
upgradeTaskName := clusterName + "-" + config.LABEL_MINOR_UPGRADE
125+
clusteroperator.ProcessNextUpgradeItem(c.PodClientset, c.PodClient, clusterName, upgradeTaskName, newpod.ObjectMeta.Namespace)
126+
}
127+
118128
//handle the case when a pg database pod is updated
119129
if isPostgresPod(newpod) {
120130
//only check the status of primary pods
@@ -145,7 +155,8 @@ func (c *PodController) checkReadyStatus(oldpod, newpod *apiv1.Pod, cluster *crv
145155
log.Debug("the pod was updated and the service names were changed in this pod update, not going to check the ReadyStatus")
146156
return
147157
}
148-
//handle the case of a database pod going to Ready that has
158+
159+
//handle the case of a database pod going to Not Ready that has
149160
//autofail enabled
150161
autofailEnabled := c.checkAutofailLabel(newpod, newpod.ObjectMeta.Namespace)
151162
clusterName := newpod.ObjectMeta.Labels[config.LABEL_PG_CLUSTER]
@@ -304,10 +315,6 @@ func isPostgresPod(newpod *apiv1.Pod) bool {
304315
log.Debugf("postgres-operator-pod found [%s]", newpod.Name)
305316
return false
306317
}
307-
if newpod.ObjectMeta.Labels[config.LABEL_PGO_BACKREST_REPO] == "true" {
308-
log.Debugf("pgo-backrest-repo pod found [%s]", newpod.Name)
309-
return false
310-
}
311318
if newpod.ObjectMeta.Labels[config.LABEL_PGPOOL_POD] == "true" {
312319
log.Debugf("pgpool pod found [%s]", newpod.Name)
313320
return false
@@ -318,6 +325,75 @@ func isPostgresPod(newpod *apiv1.Pod) bool {
318325
}
319326
return true
320327
}
328+
// isUpgradedPostgresPod - determines if the pod is one that could be getting a minor upgrade
329+
// differs from above check in that the backrest repo pod is upgradeable.
330+
func isUpgradedPostgresPod(newpod *apiv1.Pod, oldPod *apiv1.Pod) bool {
331+
332+
333+
clusterName := newpod.ObjectMeta.Labels[config.LABEL_PG_CLUSTER]
334+
replicaServiceName := clusterName + "-replica"
335+
336+
var podIsReady bool
337+
for _, v := range newpod.Status.ContainerStatuses {
338+
if v.Name == "database" {
339+
podIsReady = v.Ready
340+
}
341+
}
342+
343+
var oldPodStatus bool
344+
for _, v := range oldPod.Status.ContainerStatuses {
345+
if v.Name == "database" {
346+
oldPodStatus = v.Ready
347+
}
348+
}
349+
350+
log.Debugf("[isUpgradedPostgesPod] oldstatus: %s newstatus: %s ", oldPodStatus, podIsReady)
351+
352+
// only care about pods that have changed from !ready to ready
353+
if podIsReady && !oldPodStatus {
354+
355+
// eliminate anything we don't care about - it will be most things
356+
if newpod.ObjectMeta.Labels[config.LABEL_JOB_NAME] != "" {
357+
log.Debugf("job pod found [%s]", newpod.Name)
358+
return false
359+
}
360+
361+
if newpod.ObjectMeta.Labels[config.LABEL_NAME] == "postgres-operator" {
362+
log.Debugf("postgres-operator-pod found [%s]", newpod.Name)
363+
return false
364+
}
365+
if newpod.ObjectMeta.Labels[config.LABEL_PGPOOL_POD] == "true" {
366+
log.Debugf("pgpool pod found [%s]", newpod.Name)
367+
return false
368+
}
369+
if newpod.ObjectMeta.Labels[config.LABEL_PGBOUNCER] == "true" {
370+
log.Debugf("pgbouncer pod found [%s]", newpod.Name)
371+
return false
372+
}
373+
374+
// look for specific pods that could have just gone through upgrade
375+
376+
if newpod.ObjectMeta.Labels[config.LABEL_PGO_BACKREST_REPO] == "true" {
377+
log.Debugf("Minor Upgrade: upgraded pgo-backrest-repo found %s", newpod.Name)
378+
return true
379+
}
380+
381+
// primary identified by service-name being same as cluster name
382+
if newpod.ObjectMeta.Labels[config.LABEL_SERVICE_NAME] == clusterName {
383+
log.Debugf("Minor Upgrade: upgraded primary found %s", newpod.Name)
384+
return true
385+
}
386+
387+
if newpod.ObjectMeta.Labels[config.LABEL_SERVICE_NAME] == replicaServiceName {
388+
log.Debugf("Minor Upgrade: upgraded replica found %s", newpod.Name)
389+
return true
390+
}
391+
392+
// This indicates there is a pod we didn't account for - shouldn't be the case
393+
log.Debugf(" **** Minor Upgrade: unexpected isUpgraded pod found: [%s] ****", newpod.Name)
394+
}
395+
return false
396+
}
321397

322398
func publishClusterComplete(clusterName, namespace string, cluster *crv1.Pgcluster) error {
323399
//capture the cluster creation event

hugo/content/Upgrade/_index.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,23 @@ This section of the documentation shows specific steps required to upgrade diffe
1717
[Upgrade Postgres Operator from 3.5 to 4.1] ( {{< relref "upgrade/upgrade35to4.md" >}})
1818

1919
[Upgrade Postgres Operator from 4.0.1 to 4.1.0] ( {{< relref "upgrade/upgrade40to41.md" >}})
20+
21+
## Upgrading A Postgres Cluster
22+
23+
Using the operator, it is possible to upgrade a postgres cluster in place. When a pgo upgrade command is issued, and a --CCPImageTag is specified, the operator will upgrade each replica and the primary to the new CCPImageTag version. It is important to note that the postgres version of the new container should be compatible with the current running version. There is currently no version check done to ensure compatibility.
24+
25+
The upgrade is accomplished by updating the CCPImageTag version in the deployment, which causes the old pod to be terminated and a new pod created with the updated deployment specification.
26+
27+
When the upgrade starts, each replica is upgraded seqentially, waiting for the previous replica to go ready before updating the next. After the replicas complete, the primary is then upgraded to the new image. The upgrade process respects the _autofail_ and the _AutofailReplaceReplica_ settings as provided in the pgo.yaml or as a command line flag, if applicable.
28+
29+
When the cluster is not in _autofail_ mode, the deployments simply create a new pod when updated, terminating the old one. When autofail is enabled and the primary deployment is updated, the cluster behaves as though the primary had failed and begins the failover process. See _Automatic Failover_ in the _Overview_ section for more details on this and replica replacement.
30+
31+
At this time, the backrest-repo container is not upgraded during this upgrade as it is part of the postgres operator release and is updated with the operator.
32+
33+
## Minor Upgrade Example
34+
35+
In this example, we are upgrading a cluster from PostgreSQL 11.4 to 11.5 using the `crunchy-postgres:centos7-11.5-2.4.2` container:
36+
37+
`pgo upgrade mycluster --ccp-image-tag=centos7-11.5-2.4.2`
38+
39+
For more information, please see the `pgo upgrade` documentation [here.] ( {{< relref "operatorcli/cli/pgo_upgrade.md" >}})

operator/cluster/failoverlogic.go

Lines changed: 44 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -170,14 +170,14 @@ func deletePrimary(clientset *kubernetes.Clientset, namespace, clusterName, pgou
170170
//the primary will be the one with a pod that has a label
171171
//that looks like service-name=clustername and is not a backrest job
172172
selector := config.LABEL_SERVICE_NAME + "=" + clusterName + "," + config.LABEL_BACKREST_RESTORE + "!=true," + config.LABEL_BACKREST_JOB + "!=true"
173-
pods, err := kubeapi.GetPods(clientset, selector, namespace)
174-
if len(pods.Items) == 0 {
175-
log.Errorf("no primary pod found when trying to delete primary %s", selector)
176-
return errors.New("could not find primary pod")
177-
}
178-
if len(pods.Items) > 1 {
179-
log.Errorf("more than 1 primary pod found when trying to delete primary %s", selector)
180-
return errors.New("more than 1 primary pod found in delete primary logic")
173+
174+
// wait for single primary pod to exist.
175+
pods, success := waitForSinglePrimary(clientset, selector, namespace)
176+
177+
178+
if !success {
179+
log.Errorf("Received false while waiting for single primary, count: ", len(pods.Items))
180+
return errors.New("Couldn't isolate single primary pod")
181181
}
182182

183183
//update the label to 'fenced' on the pod to fence off traffic from
@@ -192,7 +192,7 @@ func deletePrimary(clientset *kubernetes.Clientset, namespace, clusterName, pgou
192192

193193
//delete the deployment with pg-cluster=clusterName,primary=true
194194
log.Debugf("deleting deployment %s", deploymentToDelete)
195-
err = kubeapi.DeleteDeployment(clientset, deploymentToDelete, namespace)
195+
err := kubeapi.DeleteDeployment(clientset, deploymentToDelete, namespace)
196196

197197
err = waitForDelete(deploymentToDelete, pod.Name, clientset, namespace)
198198

@@ -232,14 +232,48 @@ func waitForDelete(deploymentToDelete, podName string, clientset *kubernetes.Cli
232232
log.Error("error getting pod when evaluating old primary in failover %s %s", deploymentToDelete, podName)
233233
return err
234234
}
235-
log.Debugf("waitinf for %s to delete", pod.Name)
235+
log.Debugf("waiting for %s to delete", pod.Name)
236236
time.Sleep(time.Second * time.Duration(9))
237237
}
238238

239239
return errors.New(fmt.Sprintf("timeout waiting for %s %s to delete", deploymentToDelete, podName))
240240

241241
}
242242

243+
// waitForSinglePrimary - during failover, there can exist the possibility that while one pod is in the process of
244+
// terminating, the Deployment will be spinning up another pod - both will appear to be a primary even though the
245+
// terminating pod will not be accessible via the service. This method gets the primary and if both exists, it waits to
246+
// give the terminating pod a chance to complete. If a single primary is never isolated, it returns false with the count
247+
// of primaries found when it gave up. The number of tries and duration can be increased if needed - max wait time is
248+
// tries * duration.
249+
func waitForSinglePrimary(clientset *kubernetes.Clientset, selector, namespace string) (*v1.PodList, bool) {
250+
251+
var tries = 5
252+
var duration = 2 // seconds
253+
var pods *v1.PodList
254+
255+
for i :=0; i < tries; i ++ {
256+
257+
pods, _ = kubeapi.GetPods(clientset, selector, namespace)
258+
259+
if len(pods.Items) > 1 {
260+
log.Errorf("more than 1 primary pod found when looking for primary %s", selector)
261+
log.Debug("Waiting in case a pod is terminating...")
262+
// return errors.New("more than 1 primary pod found in delete primary logic")
263+
time.Sleep(time.Second * time.Duration(duration))
264+
} else if len(pods.Items) == 0 {
265+
log.Errorf("No pods found for primary deployment")
266+
return pods, false
267+
} else {
268+
log.Debug("Found single pod for primary deployment")
269+
return pods, true
270+
}
271+
}
272+
273+
return pods, false
274+
}
275+
276+
243277
func publishPromoteEvent(identifier, namespace, username, clusterName, target string) {
244278
topics := make([]string, 1)
245279
topics[0] = events.EventTopicCluster

0 commit comments

Comments
 (0)