Skip to content

Commit eb5c3a4

Browse files
tjmoore4andrewlecuyer
authored andcommitted
Postgres Operator automated cluster upgrade
Previously, the existing cluster upgrade focused on the updating of a cluster's underlying container images. However, due to the various changes in the Postgres Operator's operation, including numerous updates to the relevant CRDs, integration of Patroni for HA and other significant changes between versions, updates between Postgres Operator releases required the manual deletion of the existing clusters while being sure to preserve the underlying PVC storage. After installing the new version, the clusters could be recreated manually so long as the name of the new cluster matched the existing PVC's name. This process has been completely overhauled so that now, when the Postgres Operator has been updated to a new version, the existing pgclusters will be left in place. Normal Operator functionality will be restricted until the clusters are upgraded to match the currently installed version, although the pods, services, etc will still be in place and accessible via other methods (e.g. kubectl, service IP, etc). To upgrade a particular cluster, use pgo upgrade mycluster This will follow a similar process to the existing documented manual process, where the pods, deployments, replicasets, pgtasks and jobs are deleted, the cluster's replicas are scaled down and replica PVCs deleted, but the primary PVC and backrest-repo PVC are left in place. Existing services for the primary, replica and backrest-shared-repo are also kept. Configmaps and secrets are kept except where deletion is required. For a cluster 'mycluster', the following configmaps will be deleted (if they exist) and recreated: mycluster-leader mycluster-pgha-config One exception to this is during the upgrade of a standby cluster, where the 'mycluster-pgha-config' configmap is modified, but not deleted. This is necessary to ensure the standby cluster is not reinitialized during upgrade. The secret 'mycluster-backrest-repo-config' will also be deleted and recreated in this example to ensure the encryption key used matches the current version requirements. The pgcluster CRD will then be read, updated automatically and replaced, at which point the normal cluster creation process will take over. The end result of the upgrade should be an identical numer of pods, deployments, etc with a new pgbackrest backup taken, but existing backups still available. Please note, this upgrade currently supports cluster upgrades from Postgres Operator version 4.1.0 and later, and can be run multiple times against the same cluster if needed.
1 parent 3208ec5 commit eb5c3a4

File tree

18 files changed

+993
-661
lines changed

18 files changed

+993
-661
lines changed

apis/crunchydata.com/v1/task.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,10 @@ const PgtaskDeleteData = "delete-data"
2727
const PgtaskFailover = "failover"
2828
const PgtaskAutoFailover = "autofailover"
2929
const PgtaskAddPolicies = "addpolicies"
30-
const PgtaskMinorUpgrade = "minorupgradecluster"
30+
31+
const PgtaskUpgrade = "clusterupgrade"
32+
const PgtaskUpgradeCreated = "cluster upgrade - task created"
33+
const PgtaskUpgradeInProgress = "cluster upgrade - in progress"
3134

3235
const PgtaskWorkflow = "workflow"
3336
const PgtaskWorkflowCloneType = "cloneworkflow"

apiserver/upgradeservice/upgradeimpl.go

Lines changed: 166 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -16,26 +16,42 @@ limitations under the License.
1616
*/
1717

1818
import (
19+
"fmt"
20+
"io/ioutil"
21+
"regexp"
22+
"strconv"
23+
"time"
24+
1925
crv1 "github.com/crunchydata/postgres-operator/apis/crunchydata.com/v1"
2026
"github.com/crunchydata/postgres-operator/apiserver"
2127
msgs "github.com/crunchydata/postgres-operator/apiservermsgs"
2228
"github.com/crunchydata/postgres-operator/config"
2329
"github.com/crunchydata/postgres-operator/kubeapi"
30+
2431
log "github.com/sirupsen/logrus"
2532
meta_v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2633
"k8s.io/apimachinery/pkg/labels"
2734
)
2835

29-
// CreateUpgrade ...
30-
func CreateUpgrade(request *msgs.CreateUpgradeRequest, ns string) msgs.CreateUpgradeResponse {
36+
// Currently supported version information for upgrades
37+
const (
38+
REQUIRED_MAJOR_PGO_VERSION = 4
39+
MAXIMUM_MINOR_PGO_VERSION = 3
40+
MINIMUM_MINOR_PGO_VERSION = 1
41+
)
42+
43+
// CreateUpgrade accepts the CreateUpgradeRequest performs the necessary validation checks and
44+
// organizes the needed upgrade information before creating the required pgtask
45+
// Command format: pgo upgrade mycluster
46+
func CreateUpgrade(request *msgs.CreateUpgradeRequest, ns, pgouser string) msgs.CreateUpgradeResponse {
3147
response := msgs.CreateUpgradeResponse{}
3248
response.Status = msgs.Status{Code: msgs.Ok, Msg: ""}
33-
response.Results = make([]string, 1)
49+
response.Results = make([]string, 0)
3450

3551
log.Debugf("createUpgrade called %v", request)
3652

3753
if request.Selector != "" {
38-
//use the selector instead of an argument list to filter on
54+
// use the selector instead of an argument list to filter on
3955

4056
myselector, err := labels.Parse(request.Selector)
4157
if err != nil {
@@ -46,7 +62,7 @@ func CreateUpgrade(request *msgs.CreateUpgradeRequest, ns string) msgs.CreateUpg
4662
}
4763
log.Debugf("myselector is %s", myselector.String())
4864

49-
//get the clusters list
65+
// get the clusters list
5066
clusterList := crv1.PgclusterList{}
5167
err = kubeapi.GetpgclustersBySelector(apiserver.RESTClient,
5268
&clusterList, request.Selector, ns)
@@ -56,6 +72,7 @@ func CreateUpgrade(request *msgs.CreateUpgradeRequest, ns string) msgs.CreateUpg
5672
return response
5773
}
5874

75+
// check that the cluster can be found
5976
if len(clusterList.Items) == 0 {
6077
log.Debug("no clusters found")
6178
response.Status.Msg = "no clusters found"
@@ -72,16 +89,37 @@ func CreateUpgrade(request *msgs.CreateUpgradeRequest, ns string) msgs.CreateUpg
7289
for _, clusterName := range request.Args {
7390
log.Debugf("create upgrade called for %s", clusterName)
7491

75-
//build the pgtask for the minor upgrade
92+
// build the pgtask for the upgrade
7693
spec := crv1.PgtaskSpec{}
77-
spec.TaskType = crv1.PgtaskMinorUpgrade
78-
spec.Status = "requested"
94+
spec.TaskType = crv1.PgtaskUpgrade
95+
// set the status as created
96+
spec.Status = crv1.PgtaskUpgradeCreated
7997
spec.Parameters = make(map[string]string)
8098
spec.Parameters[config.LABEL_PG_CLUSTER] = clusterName
81-
spec.Name = clusterName + "-" + config.LABEL_MINOR_UPGRADE
99+
spec.Parameters[crv1.PgtaskWorkflowSubmittedStatus] = time.Now().Format(time.RFC3339)
100+
101+
u, err := ioutil.ReadFile("/proc/sys/kernel/random/uuid")
102+
if err != nil {
103+
log.Error(err)
104+
response.Status.Code = msgs.Error
105+
response.Status.Msg = fmt.Sprintf("Could not generate UUID for upgrade task. Error: %s", err.Error())
106+
return response
107+
}
108+
spec.Parameters[crv1.PgtaskWorkflowID] = string(u[:len(u)-1])
109+
110+
// pass the CCP Image Tag from the apiserver
111+
spec.Parameters[config.LABEL_CCP_IMAGE_KEY] = apiserver.Pgo.Cluster.CCPImageTag
112+
// pass the PGO version for the upgrade
113+
spec.Parameters[config.LABEL_PGO_VERSION] = msgs.PGO_VERSION
114+
// pass the PGO username for use in the updated CR if missing
115+
spec.Parameters[config.LABEL_PGOUSER] = pgouser
116+
117+
spec.Name = clusterName + "-" + config.LABEL_UPGRADE
82118
spec.Namespace = ns
83119
labels := make(map[string]string)
84120
labels[config.LABEL_PG_CLUSTER] = clusterName
121+
labels[config.LABEL_PGOUSER] = pgouser
122+
labels[crv1.PgtaskWorkflowID] = spec.Parameters[crv1.PgtaskWorkflowID]
85123

86124
newInstance := &crv1.Pgtask{
87125
ObjectMeta: meta_v1.ObjectMeta{
@@ -91,20 +129,17 @@ func CreateUpgrade(request *msgs.CreateUpgradeRequest, ns string) msgs.CreateUpg
91129
Spec: spec,
92130
}
93131

94-
// remove any existing pgtask for this minor upgrade
95-
result := crv1.Pgtask{}
96-
found, err := kubeapi.Getpgtask(apiserver.RESTClient,
97-
&result, spec.Name, ns)
98-
if found {
99-
err := kubeapi.Deletepgtask(apiserver.RESTClient, spec.Name, ns)
100-
if err != nil {
101-
response.Status.Code = msgs.Error
102-
response.Status.Msg = err.Error()
103-
return response
104-
}
132+
// remove any existing pgtask for this upgrade
133+
task := crv1.Pgtask{}
134+
found, err := kubeapi.Getpgtask(apiserver.RESTClient, &task, spec.Name, ns)
135+
136+
if found && task.Spec.Status != crv1.CompletedStatus {
137+
response.Status.Code = msgs.Error
138+
response.Status.Msg = fmt.Sprintf("Could not upgrade cluster: there exists an ongoing upgrade task: [%s]. If you believe this is an error, try deleting this pgtask CR.", task.Spec.Name)
139+
return response
105140
}
106141

107-
//validate the cluster name and ensure autofail is turned off for each cluster.
142+
// validate the cluster name and ensure autofail is turned off for each cluster.
108143
cl := crv1.Pgcluster{}
109144
found, err = kubeapi.Getpgcluster(apiserver.RESTClient,
110145
&cl, clusterName, ns)
@@ -114,31 +149,131 @@ func CreateUpgrade(request *msgs.CreateUpgradeRequest, ns string) msgs.CreateUpg
114149
return response
115150
}
116151

117-
//figure out what version we are upgrading to
118-
imageToUpgradeTo := apiserver.Pgo.Cluster.CCPImageTag
119-
if request.CCPImageTag != "" {
120-
imageToUpgradeTo = request.CCPImageTag
152+
if !supportedOperatorVersion(cl.ObjectMeta.Labels[config.LABEL_PGO_VERSION]) {
153+
response.Status.Code = msgs.Error
154+
response.Status.Msg = "Cannot upgrade " + clusterName + " from Postgres Operator version " + cl.ObjectMeta.Labels[config.LABEL_PGO_VERSION]
155+
return response
121156
}
122-
if imageToUpgradeTo == cl.Spec.CCPImageTag {
157+
158+
// for the upgrade procedure, we only upgrade to the current image used by the
159+
// Postgres Operator. As such, we will validate that the Postgres Operator's configured
160+
// image tag (first value) is compatible (i.e. is the same Major PostgreSQL version) as the
161+
// existing cluster's PG value, unless the --ignore-validation flag is set.
162+
if !upgradeTagValid(cl.Spec.CCPImageTag, apiserver.Pgo.Cluster.CCPImageTag) && !request.IgnoreValidation {
163+
log.Debugf("Cannot upgrade from %s to %s. Image must be the same base OS and the upgrade must be within the same major PG version.", cl.Spec.CCPImageTag, apiserver.Pgo.Cluster.CCPImageTag)
123164
response.Status.Code = msgs.Error
124-
response.Status.Msg = "can not upgrade to the same image tag " + imageToUpgradeTo + " " + cl.Spec.CCPImageTag
165+
response.Status.Msg = fmt.Sprintf("cannot upgrade from %s to %s, upgrade task failed.", cl.Spec.CCPImageTag, apiserver.Pgo.Cluster.CCPImageTag)
125166
return response
126167
}
127-
log.Debugf("upgrading to image tag %s", imageToUpgradeTo)
128-
spec.Parameters["CCPImageTag"] = imageToUpgradeTo
168+
169+
// given the above check passed, save the CCP image tag value
170+
spec.Parameters["CCPImageTag"] = cl.Spec.CCPImageTag
129171

130172
// Create an instance of our CRD
131173
err = kubeapi.Createpgtask(apiserver.RESTClient, newInstance, ns)
132174
if err != nil {
133175
response.Status.Code = msgs.Error
134176
response.Status.Msg = err.Error()
177+
response.WorkflowID = spec.Parameters[crv1.PgtaskWorkflowID]
135178
return response
136179
}
137180

138-
msg := "created minor upgrade task for " + clusterName
181+
msg := "created upgrade task for " + clusterName
139182
response.Results = append(response.Results, msg)
140-
183+
response.WorkflowID = spec.Parameters[crv1.PgtaskWorkflowID]
141184
}
142185

143186
return response
144187
}
188+
189+
// supportedOperatorVersion validates the Postgres Operator version
190+
// information for the candidate pgcluster. If this value is in the
191+
// required range, return true so that the upgrade may continue. Otherwise,
192+
// return false.
193+
func supportedOperatorVersion(version string) bool {
194+
// get the Operator version
195+
operatorVersionRegex := regexp.MustCompile(`^(\d)\.(\d)\.(\d)`)
196+
operatorVersion := operatorVersionRegex.FindStringSubmatch(version)
197+
198+
// if this regex passes, the returned array should always contain
199+
// 4 values. At 0, the full match, then 1-3 are the three defined groups
200+
// If this is not true, the upgrade cannot continue (and we won't want to
201+
// reference potentially missing array items).
202+
if len(operatorVersion) != 4 {
203+
return false
204+
}
205+
206+
// if the first group does not equal the current major version
207+
// then the upgrade cannot continue
208+
if major, err := strconv.Atoi(operatorVersion[1]); err != nil {
209+
log.Error(err)
210+
return false
211+
} else if major != REQUIRED_MAJOR_PGO_VERSION {
212+
return false
213+
}
214+
215+
// if the second group does is not in the supported range,
216+
// then the upgrade cannot continue
217+
minor, err := strconv.Atoi(operatorVersion[2])
218+
if err != nil {
219+
log.Errorf("Cannot convert Postgres Operator's minor version to an integer. Error: ", err)
220+
return false
221+
}
222+
if minor < MINIMUM_MINOR_PGO_VERSION || minor > MAXIMUM_MINOR_PGO_VERSION {
223+
return false
224+
}
225+
226+
// If none of the above is true, the upgrade can continue
227+
return true
228+
229+
}
230+
231+
// upgradeTagValid compares and validates the PostgreSQL version values stored
232+
// in the image tag of the existing pgcluster CR against the values set in the
233+
// Postgres Operator's configuration
234+
func upgradeTagValid(upgradeFrom, upgradeTo string) bool {
235+
236+
log.Debugf("Validating upgrade from %s to %s", upgradeFrom, upgradeTo)
237+
238+
versionRegex := regexp.MustCompile(`-(\d+)\.(\d+)(\.\d+)?-`)
239+
240+
// get the PostgreSQL version values
241+
upgradeFromValue := versionRegex.FindStringSubmatch(upgradeFrom)
242+
upgradeToValue := versionRegex.FindStringSubmatch(upgradeTo)
243+
244+
// if this regex passes, the returned array should always contain
245+
// 4 values. At 0, the full match, then 1-3 are the three defined groups
246+
// If this is not true, the upgrade cannot continue (and we won't want to
247+
// reference potentially missing array items).
248+
if len(upgradeFromValue) != 4 || len(upgradeToValue) != 4 {
249+
return false
250+
}
251+
252+
// if the first group does not match (PG version 9, 10, 11, 12 etc), or if a value is
253+
// missing, then the upgrade cannot continue
254+
if upgradeFromValue[1] != upgradeToValue[1] && upgradeToValue[1] != "" {
255+
return false
256+
}
257+
258+
// if the above check passed, and there is no fourth value, then the PG
259+
// version has only two digits (e.g. PG 10, 11 or 12), meaning this is a minor upgrade.
260+
// After validating the second value is at least equal (this is to allow for multiple executions of the
261+
// upgrade in case an error occurs), the upgrade can continue
262+
if upgradeFromValue[3] == "" && upgradeToValue[3] == "" && upgradeFromValue[2] <= upgradeToValue[2] {
263+
return true
264+
}
265+
266+
// finally, if the second group matches and is not empty, then, based on the
267+
// possibilities remaining for Operator container image tags, this is either PG 9.5 or 9.6.
268+
// if the second group value matches, and the third group was already validated as not
269+
// empty, check that the third value is at least equal (this is to allow for multiple executions of the
270+
// upgrade in case an error occurs). If so, the upgrade can continue.
271+
if upgradeFromValue[2] == upgradeToValue[2] && upgradeToValue[2] != "" && upgradeFromValue[3] <= upgradeToValue[3] {
272+
return true
273+
}
274+
275+
// if none of the above conditions are met, a two digit Major version upgrade is likely being
276+
// attempted, or a tag value or general error occurred, so we cannot continue
277+
return false
278+
279+
}

apiserver/upgradeservice/upgradeservice.go

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,25 +17,26 @@ limitations under the License.
1717

1818
import (
1919
"encoding/json"
20+
"net/http"
21+
2022
"github.com/crunchydata/postgres-operator/apiserver"
2123
msgs "github.com/crunchydata/postgres-operator/apiservermsgs"
2224
log "github.com/sirupsen/logrus"
23-
"net/http"
2425
)
2526

2627
// CreateUpgradeHandler ...
2728
// pgo upgrade mycluster
28-
// parameters --upgrade-type
29-
// parameters --ccp-image-tag
3029
func CreateUpgradeHandler(w http.ResponseWriter, r *http.Request) {
3130
// swagger:operation POST /upgrades upgradeservice upgrades
3231
/*```
33-
UPGRADE performs an upgrade on a PostgreSQL cluster. For example:
34-
32+
UPGRADE performs an upgrade on a PostgreSQL cluster from an earlier version
33+
of the Postgres Operator to the current version.
3534
35+
OTHER UPGRADE DESCRIPTION:
36+
This upgrade will update the scale down any existing replicas while saving the primary
37+
and pgbackrest repo PVCs, then update the existing pgcluster CR and resubmit it for
38+
re-creation.
3639
37-
This upgrade will update the CCPImageTag of the deployment for the following: primary, replicas, and backrest-repo.
38-
The running containers are upgraded one at a time, sequentially, in the following order: replicas, backrest-repo, then primary.
3940
*/
4041
// ---
4142
// produces:
@@ -81,6 +82,6 @@ func CreateUpgradeHandler(w http.ResponseWriter, r *http.Request) {
8182
return
8283
}
8384

84-
resp = CreateUpgrade(&request, ns)
85+
resp = CreateUpgrade(&request, ns, username)
8586
json.NewEncoder(w).Encode(resp)
8687
}

apiservermsgs/common.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ const PGO_VERSION = "4.3.0"
1919

2020
// Ok status
2121
const Ok = "ok"
22+
23+
// Error code string
2224
const Error = "error"
2325

2426
// UpgradeError is the error used for when a command is tried against a cluster that has not

apiservermsgs/upgrademsgs.go

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,17 @@ limitations under the License.
1818
// CreateUpgradeRequest ...
1919
// swagger:model
2020
type CreateUpgradeRequest struct {
21-
Args []string
22-
Selector string
23-
Namespace string
24-
CCPImageTag string
25-
ClientVersion string
21+
Args []string
22+
Selector string
23+
Namespace string
24+
ClientVersion string
25+
IgnoreValidation bool
2626
}
2727

2828
// CreateUpgradeResponse ...
2929
// swagger:model
3030
type CreateUpgradeResponse struct {
3131
Results []string
3232
Status
33+
WorkflowID string
3334
}

config/labels.go

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@ const LABEL_STORAGE_CONFIG = "storage-config"
5555
const LABEL_NODE_LABEL = "node-label"
5656
const LABEL_VERSION = "version"
5757
const LABEL_PGO_VERSION = "pgo-version"
58-
const LABEL_UPGRADE_DATE = "operator-upgrade-date"
5958
const LABEL_DELETE_DATA = "delete-data"
6059
const LABEL_DELETE_DATA_STARTED = "delete-data-started"
6160
const LABEL_DELETE_BACKUPS = "delete-backups"
@@ -64,12 +63,8 @@ const LABEL_IS_BACKUP = "is-backup"
6463
const LABEL_STARTUP = "startup"
6564
const LABEL_SHUTDOWN = "shutdown"
6665

67-
const LABEL_MINOR_UPGRADE = "minor-upgrade"
68-
const LABEL_UPGRADE_IN_PROGRESS = "upgrade-in-progress"
69-
const LABEL_UPGRADE_COMPLETED = "upgrade-complete"
70-
const LABEL_UPGRADE_REPLICA = "upgrade-replicas"
71-
const LABEL_UPGRADE_PRIMARY = "upgrade-primary"
72-
const LABEL_UPGRADE_BACKREST = "upgrade-backrest"
66+
// label for the pgcluster upgrade
67+
const LABEL_UPGRADE = "upgrade"
7368

7469
const LABEL_BACKREST = "pgo-backrest"
7570
const LABEL_BACKREST_JOB = "pgo-backrest-job"

controller/pgtask/pgtaskcontroller.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,8 @@ func (c *Controller) processNextItem() bool {
101101

102102
//process the incoming task
103103
switch tmpTask.Spec.TaskType {
104-
case crv1.PgtaskMinorUpgrade:
105-
log.Debug("delete minor upgrade task added")
104+
case crv1.PgtaskUpgrade:
105+
log.Debug("upgrade task added")
106106
clusteroperator.AddUpgrade(c.PgtaskClientset, c.PgtaskClient, &tmpTask, keyNamespace)
107107
case crv1.PgtaskFailover:
108108
log.Debug("failover task added")

0 commit comments

Comments
 (0)