Skip to content

Commit d70f3eb

Browse files
authored
Merge pull request #93 from cybertec-postgresql/importMajorUpgrade
Import major upgrade
2 parents 2b5d699 + 9c16535 commit d70f3eb

File tree

4 files changed

+191
-12
lines changed

4 files changed

+191
-12
lines changed

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ require (
3333
)
3434

3535
require (
36+
github.com/Masterminds/semver v1.5.0 // indirect
3637
github.com/davecgh/go-spew v1.1.1 // indirect
3738
github.com/emicklei/go-restful/v3 v3.11.0 // indirect
3839
github.com/evanphx/json-patch v4.12.0+incompatible // indirect

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
22
cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
33
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
4+
github.com/Masterminds/semver v1.5.0 h1:H65muMkzWKEuNDnfl9d70GUjFniHKHRbFPGBuZ3QEww=
5+
github.com/Masterminds/semver v1.5.0/go.mod h1:MB6lktGJrhw8PrUyiEoblNEGEQ+RzHPF078ddwwvV3Y=
46
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
57
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
68
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=

pkg/cluster/majorversionupgrade.go

Lines changed: 167 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,17 @@
11
package cluster
22

33
import (
4+
"context"
5+
"encoding/json"
46
"fmt"
57
"strings"
68

9+
"github.com/Masterminds/semver"
710
"github.com/cybertec-postgresql/cybertec-pg-operator/pkg/spec"
811
"github.com/cybertec-postgresql/cybertec-pg-operator/pkg/util"
912
v1 "k8s.io/api/core/v1"
13+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
14+
"k8s.io/apimachinery/pkg/types"
1015
)
1116

1217
// VersionMap Map of version numbers
@@ -19,6 +24,11 @@ var VersionMap = map[string]int{
1924
"18": 180000,
2025
}
2126

27+
const (
28+
majorVersionUpgradeSuccessAnnotation = "last-major-upgrade-success"
29+
majorVersionUpgradeFailureAnnotation = "last-major-upgrade-failure"
30+
)
31+
2232
// IsBiggerPostgresVersion Compare two Postgres version numbers
2333
func IsBiggerPostgresVersion(old string, new string) bool {
2434
oldN := VersionMap[old]
@@ -35,7 +45,7 @@ func (c *Cluster) GetDesiredMajorVersionAsInt() int {
3545
func (c *Cluster) GetDesiredMajorVersion() string {
3646

3747
if c.Config.OpConfig.MajorVersionUpgradeMode == "full" {
38-
// e.g. current is 10, minimal is 11 allowing 11 to 15 clusters, everything below is upgraded
48+
// e.g. current is 13, minimal is 13 allowing 13 to 17 clusters, everything below is upgraded
3949
if IsBiggerPostgresVersion(c.Spec.PgVersion, c.Config.OpConfig.MinimalMajorVersion) {
4050
c.logger.Infof("overwriting configured major version %s to %s", c.Spec.PgVersion, c.Config.OpConfig.TargetMajorVersion)
4151
return c.Config.OpConfig.TargetMajorVersion
@@ -55,6 +65,63 @@ func (c *Cluster) isUpgradeAllowedForTeam(owningTeam string) bool {
5565
return util.SliceContains(allowedTeams, owningTeam)
5666
}
5767

68+
func (c *Cluster) annotatePostgresResource(isSuccess bool) error {
69+
annotations := make(map[string]string)
70+
currentTime := metav1.Now().Format("2006-01-02T15:04:05Z")
71+
if isSuccess {
72+
annotations[majorVersionUpgradeSuccessAnnotation] = currentTime
73+
} else {
74+
annotations[majorVersionUpgradeFailureAnnotation] = currentTime
75+
}
76+
patchData, err := metaAnnotationsPatch(annotations)
77+
if err != nil {
78+
c.logger.Errorf("could not form patch for %s postgresql resource: %v", c.Name, err)
79+
return err
80+
}
81+
_, err = c.KubeClient.Postgresqls(c.Namespace).Patch(context.Background(), c.Name, types.MergePatchType, patchData, metav1.PatchOptions{})
82+
if err != nil {
83+
c.logger.Errorf("failed to patch annotations to postgresql resource: %v", err)
84+
return err
85+
}
86+
return nil
87+
}
88+
89+
func (c *Cluster) removeFailuresAnnotation() error {
90+
annotationToRemove := []map[string]string{
91+
{
92+
"op": "remove",
93+
"path": fmt.Sprintf("/metadata/annotations/%s", majorVersionUpgradeFailureAnnotation),
94+
},
95+
}
96+
removePatch, err := json.Marshal(annotationToRemove)
97+
if err != nil {
98+
c.logger.Errorf("could not form removal patch for %s postgresql resource: %v", c.Name, err)
99+
return err
100+
}
101+
_, err = c.KubeClient.Postgresqls(c.Namespace).Patch(context.Background(), c.Name, types.JSONPatchType, removePatch, metav1.PatchOptions{})
102+
if err != nil {
103+
c.logger.Errorf("failed to remove annotations from postgresql resource: %v", err)
104+
return err
105+
}
106+
return nil
107+
}
108+
109+
func (c *Cluster) criticalOperationLabel(pods []v1.Pod, value *string) error {
110+
metadataReq := map[string]map[string]map[string]*string{"metadata": {"labels": {"critical-operation": value}}}
111+
112+
patchReq, err := json.Marshal(metadataReq)
113+
if err != nil {
114+
return fmt.Errorf("could not marshal ObjectMeta: %v", err)
115+
}
116+
for _, pod := range pods {
117+
_, err = c.KubeClient.Pods(c.Namespace).Patch(context.TODO(), pod.Name, types.StrategicMergePatchType, patchReq, metav1.PatchOptions{})
118+
if err != nil {
119+
return err
120+
}
121+
}
122+
return nil
123+
}
124+
58125
/*
59126
Execute upgrade when mode is set to manual or full or when the owning team is allowed for upgrade (and mode is "off").
60127
@@ -70,22 +137,33 @@ func (c *Cluster) majorVersionUpgrade() error {
70137
desiredVersion := c.GetDesiredMajorVersionAsInt()
71138

72139
if c.currentMajorVersion >= desiredVersion {
140+
if _, exists := c.ObjectMeta.Annotations[majorVersionUpgradeFailureAnnotation]; exists { // if failure annotation exists, remove it
141+
c.removeFailuresAnnotation()
142+
c.logger.Infof("removing failure annotation as the cluster is already up to date")
143+
}
73144
c.logger.Infof("cluster version up to date. current: %d, min desired: %d", c.currentMajorVersion, desiredVersion)
74145
return nil
75146
}
76147

77-
pods, err := c.listPodsOfType(TYPE_POSTGRESQL)
148+
pods, err := c.listPods()
78149
if err != nil {
79150
return err
80151
}
81152

82153
allRunning := true
154+
isStandbyCluster := false
83155

84156
var masterPod *v1.Pod
85157

86158
for i, pod := range pods {
87159
ps, _ := c.patroni.GetMemberData(&pod)
88160

161+
if ps.Role == "standby_leader" {
162+
isStandbyCluster = true
163+
c.currentMajorVersion = ps.ServerVersion
164+
break
165+
}
166+
89167
if ps.State != "running" {
90168
allRunning = false
91169
c.logger.Infof("identified non running pod, potentially skipping major version upgrade")
@@ -97,37 +175,114 @@ func (c *Cluster) majorVersionUpgrade() error {
97175
}
98176
}
99177

178+
if masterPod == nil {
179+
c.logger.Infof("no master in the cluster, skipping major version upgrade")
180+
return nil
181+
}
182+
183+
// Recheck version with newest data from Patroni
184+
if c.currentMajorVersion >= desiredVersion {
185+
if _, exists := c.ObjectMeta.Annotations[majorVersionUpgradeFailureAnnotation]; exists { // if failure annotation exists, remove it
186+
c.removeFailuresAnnotation()
187+
c.logger.Infof("removing failure annotation as the cluster is already up to date")
188+
}
189+
c.logger.Infof("recheck cluster version is already up to date. current: %d, min desired: %d", c.currentMajorVersion, desiredVersion)
190+
return nil
191+
} else if isStandbyCluster {
192+
c.logger.Warnf("skipping major version upgrade for %s/%s standby cluster. Re-deploy standby cluster with the required Postgres version specified", c.Namespace, c.Name)
193+
return nil
194+
}
195+
196+
if _, exists := c.ObjectMeta.Annotations[majorVersionUpgradeFailureAnnotation]; exists {
197+
c.logger.Infof("last major upgrade failed, skipping upgrade")
198+
return nil
199+
}
200+
201+
if !isInMaintenanceWindow(c.Spec.MaintenanceWindows) {
202+
c.logger.Infof("skipping major version upgrade, not in maintenance window")
203+
return nil
204+
}
205+
206+
members, err := c.patroni.GetClusterMembers(masterPod)
207+
if err != nil {
208+
c.logger.Error("could not get cluster members data from Patroni API, skipping major version upgrade")
209+
return err
210+
}
211+
patroniData, err := c.patroni.GetMemberData(masterPod)
212+
if err != nil {
213+
c.logger.Error("could not get members data from Patroni API, skipping major version upgrade")
214+
return err
215+
}
216+
patroniVer, err := semver.NewVersion(patroniData.Patroni.Version)
217+
if err != nil {
218+
c.logger.Error("error parsing Patroni version")
219+
patroniVer, _ = semver.NewVersion("3.0.4")
220+
}
221+
verConstraint, _ := semver.NewConstraint(">= 3.0.4")
222+
checkStreaming, _ := verConstraint.Validate(patroniVer)
223+
224+
for _, member := range members {
225+
if PostgresRole(member.Role) == Leader {
226+
continue
227+
}
228+
if checkStreaming && member.State != "streaming" {
229+
c.logger.Infof("skipping major version upgrade, replica %s is not streaming from primary", member.Name)
230+
return nil
231+
}
232+
if member.Lag > 16*1024*1024 {
233+
c.logger.Infof("skipping major version upgrade, replication lag on member %s is too high", member.Name)
234+
return nil
235+
}
236+
}
237+
238+
isUpgradeSuccess := true
100239
numberOfPods := len(pods)
101240
if allRunning && masterPod != nil {
102241
c.logger.Infof("healthy cluster ready to upgrade, current: %d desired: %d", c.currentMajorVersion, desiredVersion)
103242
if c.currentMajorVersion < desiredVersion {
243+
defer func() error {
244+
if err = c.criticalOperationLabel(pods, nil); err != nil {
245+
return fmt.Errorf("failed to remove critical-operation label: %s", err)
246+
}
247+
return nil
248+
}()
249+
val := "true"
250+
if err = c.criticalOperationLabel(pods, &val); err != nil {
251+
return fmt.Errorf("failed to assign critical-operation label: %s", err)
252+
}
253+
104254
podName := &spec.NamespacedName{Namespace: masterPod.Namespace, Name: masterPod.Name}
105255
c.logger.Infof("triggering major version upgrade on pod %s of %d pods", masterPod.Name, numberOfPods)
106-
c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeNormal, "Major Version Upgrade", "Starting major version upgrade on pod %s of %d pods", masterPod.Name, numberOfPods)
256+
c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeNormal, "Major Version Upgrade", "starting major version upgrade on pod %s of %d pods", masterPod.Name, numberOfPods)
107257
upgradeCommand := fmt.Sprintf("set -o pipefail && /usr/bin/python3 /scripts/inplace_upgrade.py %d 2>&1 | tee last_upgrade.log", numberOfPods)
108258

109-
c.logger.Debugf("checking if the spilo image runs with root or non-root (check for user id=0)")
259+
c.logger.Debug("checking if the spilo image runs with root or non-root (check for user id=0)")
110260
resultIdCheck, errIdCheck := c.ExecCommand(podName, "/bin/bash", "-c", "/usr/bin/id -u")
111261
if errIdCheck != nil {
112-
c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeWarning, "Major Version Upgrade", "Checking user id to run upgrade from %d to %d FAILED: %v", c.currentMajorVersion, desiredVersion, errIdCheck)
262+
c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeWarning, "Major Version Upgrade", "checking user id to run upgrade from %d to %d FAILED: %v", c.currentMajorVersion, desiredVersion, errIdCheck)
113263
}
114264

115265
resultIdCheck = strings.TrimSuffix(resultIdCheck, "\n")
116-
var result string
266+
var result, scriptErrMsg string
117267
if resultIdCheck != "0" {
118-
c.logger.Infof("User id was identified as: %s, hence default user is non-root already", resultIdCheck)
268+
c.logger.Infof("user id was identified as: %s, hence default user is non-root already", resultIdCheck)
119269
result, err = c.ExecCommand(podName, "/bin/bash", "-c", upgradeCommand)
270+
scriptErrMsg, _ = c.ExecCommand(podName, "/bin/bash", "-c", "tail -n 1 last_upgrade.log")
120271
} else {
121-
c.logger.Infof("User id was identified as: %s, using su to reach the postgres user", resultIdCheck)
272+
c.logger.Infof("user id was identified as: %s, using su to reach the postgres user", resultIdCheck)
122273
result, err = c.ExecCommand(podName, "/bin/su", "postgres", "-c", upgradeCommand)
274+
scriptErrMsg, _ = c.ExecCommand(podName, "/bin/bash", "-c", "tail -n 1 last_upgrade.log")
123275
}
124276
if err != nil {
125-
c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeWarning, "Major Version Upgrade", "Upgrade from %d to %d FAILED: %v", c.currentMajorVersion, desiredVersion, err)
126-
return err
277+
isUpgradeSuccess = false
278+
c.annotatePostgresResource(isUpgradeSuccess)
279+
c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeWarning, "Major Version Upgrade", "upgrade from %d to %d FAILED: %v", c.currentMajorVersion, desiredVersion, scriptErrMsg)
280+
return fmt.Errorf("%s", scriptErrMsg)
127281
}
128-
c.logger.Infof("upgrade action triggered and command completed: %s", result[:100])
129282

130-
c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeNormal, "Major Version Upgrade", "Upgrade from %d to %d finished", c.currentMajorVersion, desiredVersion)
283+
c.annotatePostgresResource(isUpgradeSuccess)
284+
c.logger.Infof("upgrade action triggered and command completed: %s", result[:100])
285+
c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeNormal, "Major Version Upgrade", "upgrade from %d to %d finished", c.currentMajorVersion, desiredVersion)
131286
}
132287
}
133288

pkg/cluster/util.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -759,3 +759,24 @@ func (c *Cluster) multisiteEnabled() bool {
759759
}
760760
return enable != nil && *enable
761761
}
762+
763+
func isInMaintenanceWindow(specMaintenanceWindows []cpov1.MaintenanceWindow) bool {
764+
if len(specMaintenanceWindows) == 0 {
765+
return true
766+
}
767+
now := time.Now()
768+
currentDay := now.Weekday()
769+
currentTime := now.Format("15:04")
770+
771+
for _, window := range specMaintenanceWindows {
772+
startTime := window.StartTime.Format("15:04")
773+
endTime := window.EndTime.Format("15:04")
774+
775+
if window.Everyday || window.Weekday == currentDay {
776+
if currentTime >= startTime && currentTime <= endTime {
777+
return true
778+
}
779+
}
780+
}
781+
return false
782+
}

0 commit comments

Comments
 (0)