Skip to content

Commit 347d197

Browse files
committed
Fix upgrade, don't set GTID_PURGED if was an upgrade
1 parent 5cbbca2 commit 347d197

File tree

6 files changed

+62
-105
lines changed

6 files changed

+62
-105
lines changed

docs/operator-upgrades.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ field) to make it smaller that will prevent you from hitting this
4141

4242

4343
The operator should do all the work for you but you have to make sure that you have the latest
44-
`0.2.x` version of the operator.
44+
`0.2.x` version of the operator. For a smooth upgrade, it's recommended to have clusters with only
45+
one node or the master node to be node 0. Doing so will prevent the operator to wait for a failover.
4546

4647
This release drop support for `emptyDir` volume source.

hack/upgrades-tests/test_upgrade_v0.3.0.sh

Lines changed: 16 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -156,60 +156,18 @@ function wait_cluster_ready {
156156

157157
}
158158

159-
function read_endpoint {
160-
name=${1:-my-cluster}
159+
function run_query {
160+
pod=${1}-mysql-${2}
161+
query=${3}
161162

162-
echo "${name}-mysql.${CL_NAMESPACE}"
163+
kubectl exec $pod -- mysql --defaults-file=/etc/mysql/client.conf -NB -e "$query"
163164
}
164165

165-
function start_check_job {
166-
name=${1:-my-cluster}
167-
root_pass=${2:-not-so-secure}
166+
function run_query_old {
167+
pod=${1}-mysql-${2}
168+
query=${3}
168169

169-
msg "Create check job ($name) ..."
170-
cat <<EOF | kubectl apply -f -
171-
apiVersion: batch/v1
172-
kind: Job
173-
metadata:
174-
name: check-mysql-cluster
175-
spec:
176-
backoffLimit: 0
177-
template:
178-
spec:
179-
restartPolicy: Never
180-
containers:
181-
- name: check
182-
image: percona:5.7
183-
command: ["/bin/bash", "-c"]
184-
args:
185-
- |
186-
set -e
187-
while true; do
188-
mysql --host=$(read_endpoint ${name}) --user=root --password=${root_pass} -e 'SELECT 1'
189-
sleep 5
190-
done
191-
EOF
192-
}
193-
194-
function check_job_ok {
195-
JSONPATH='{range @.status.conditions[*]}{@.type}={@.status};{end}'
196-
197-
msg "Check job status ..."
198-
sleep 10
199-
out=$(kubectl get jobs check-mysql-cluster -o jsonpath="$JSONPATH")
200-
if [[ "$out" == *"Failed=True"* ]]; then
201-
msg "Cluser was down!!" error
202-
return 1
203-
else
204-
msg "Cluster was ok!!" success
205-
fi
206-
}
207-
208-
function apply_new_crds {
209-
file=${1:-02x-crds.yaml}
210-
211-
msg "Apply new crds ($file)..."
212-
kubectl apply -f ${file}
170+
kubectl exec $pod -- mysql --defaults-file=/etc/mysql/client.cnf -NB -e "$query"
213171
}
214172

215173

@@ -259,12 +217,8 @@ function cmd_test {
259217
# wait for cluster to be ready
260218
wait_cluster_ready $CL_NAME
261219

262-
# start a job that checks for mysql to be up
263-
# start_check_job $CL_NAME
264-
265-
# check job
266-
# check_job_ok
267-
# [ $? -ne 0 ] && exit 1
220+
run_query_old $CL_NAME 0 "CREATE DATABASE IF NOT EXISTS test; CREATE TABLE IF NOT EXISTS test.test (name varchar(10) PRIMARY KEY)"
221+
run_query_old $CL_NAME 0 "INSERT INTO test.test VALUES ('test1')"
268222

269223
# trigger failover
270224
kubectl delete pod $CL_NAME-mysql-0
@@ -284,10 +238,12 @@ function cmd_test {
284238

285239
check_cluster_version $CL_NAME 300
286240

287-
# check the job status to determine if the cluster was down
288-
# check_job_ok
289-
# [ $? -ne 0 ] && exit 1
290-
241+
current=$(run_query $CL_NAME 0 "SELECT 'ok' FROM test.test WHERE name='test1'")
242+
if [ "$current" == "ok" ]; then
243+
msg "Ok sql test" success
244+
else
245+
msg "Bad sql test" error
246+
fi
291247
}
292248

293249

pkg/controller/mysqlcluster/internal/upgrades/upgrades.go

Lines changed: 0 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -78,11 +78,6 @@ func (u *upgrader) Run(ctx context.Context) error {
7878
// in maintenance except node 0.
7979
// TODO: or set promotion rules
8080
if int(*sts.Spec.Replicas) > 1 {
81-
82-
if err = u.setNodesInMaintenaceExcept(insts, 0); err != nil {
83-
return err
84-
}
85-
8681
one := int32(1)
8782
sts.Spec.Replicas = &one
8883
if err = u.client.Update(ctx, sts); err != nil {
@@ -221,35 +216,6 @@ func (u *upgrader) getPodOldHostname(node int) string {
221216
u.cluster.Namespace)
222217
}
223218

224-
func (u *upgrader) setNodesInMaintenaceExcept(insts []orc.Instance, node int) error {
225-
maintenances, err := u.orcClient.Maintenance()
226-
if err != nil {
227-
return err
228-
}
229-
230-
for _, inst := range insts {
231-
if inst.Key.Hostname == u.getPodOldHostname(node) {
232-
// execept given node
233-
continue
234-
}
235-
236-
inMaintenance := false
237-
for _, m := range maintenances {
238-
if m.Key.Hostname == inst.Key.Hostname {
239-
inMaintenance = true
240-
}
241-
}
242-
243-
if !inMaintenance {
244-
if err := u.orcClient.BeginMaintenance(inst.Key, "upgrader", "upgrade"); err != nil {
245-
return err
246-
}
247-
}
248-
}
249-
250-
return nil
251-
}
252-
253219
func (u *upgrader) forgetFromOrc() error {
254220
for node := 0; node < int(*u.cluster.Spec.Replicas); node++ {
255221
if err := u.orcClient.Forget(u.getPodOldHostname(node), 3306); err != nil {

pkg/controller/node/node_controller.go

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,10 @@ func (r *ReconcileMysqlNode) Reconcile(request reconcile.Request) (reconcile.Res
171171

172172
// if it's a old version cluster then don't do anything
173173
if shouldUpdateToVersion(cluster, 300) {
174-
return reconcile.Result{}, nil
174+
// if the cluster is upgraded then set on the cluster an annotations that skips the GTID configuration
175+
// TODO: this should be removed in the next versions
176+
cluster.Annotations["mysql.presslabs.org/SkipGTIDPurged"] = "true"
177+
return reconcile.Result{}, r.Update(ctx, cluster.Unwrap())
175178
}
176179

177180
// get cluster credentials from k8s secret, like replication and operator credentials
@@ -209,6 +212,15 @@ func (r *ReconcileMysqlNode) initializeMySQL(ctx context.Context, sql SQLInterfa
209212
return err
210213
}
211214

215+
// check if MySQL was configured before to avoid multiple times reconfiguration
216+
if configured, err := sql.IsConfigured(ctx); err != nil {
217+
return err
218+
} else if configured {
219+
// already configured. For example this can be reached if the pod status update fails
220+
log.V(1).Info("MySQL is already configure - skip")
221+
return nil
222+
}
223+
212224
// disable MySQL SUPER readonly to be able to modify settings in MySQL
213225
enableSuperReadOnly, err := sql.DisableSuperReadOnly(ctx)
214226
if err != nil {
@@ -219,8 +231,12 @@ func (r *ReconcileMysqlNode) initializeMySQL(ctx context.Context, sql SQLInterfa
219231
// is slave node?
220232
if cluster.GetMasterHost() != sql.Host() {
221233
log.Info("configure pod as slave", "pod", sql.Host(), "master", cluster.GetMasterHost())
222-
if err := sql.SetPurgedGTID(ctx); err != nil {
223-
return err
234+
235+
// check if the skip annotation is set on the cluster first
236+
if _, ok := cluster.Annotations["mysql.presslabs.org/SkipGTIDPurged"]; !ok {
237+
if err := sql.SetPurgedGTID(ctx); err != nil {
238+
return err
239+
}
224240
}
225241

226242
if err := sql.ChangeMasterTo(ctx, cluster.GetMasterHost(), c.ReplicationUser, c.ReplicationPassword); err != nil {

pkg/controller/node/sql.go

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ type SQLInterface interface {
4040
DisableSuperReadOnly(ctx context.Context) (func(), error)
4141
ChangeMasterTo(ctx context.Context, host string, user string, pass string) error
4242
MarkConfigurationDone(ctx context.Context) error
43+
IsConfigured(ctx context.Context) (bool, error)
4344
SetPurgedGTID(ctx context.Context) error
4445
Host() string
4546
}
@@ -125,14 +126,13 @@ func (r *nodeSQLRunner) ChangeMasterTo(ctx context.Context, masterHost, user, pa
125126

126127
// MarkConfigurationDone write in a MEMORY table value. The readiness probe checks for that value to exist to succeed.
127128
func (r *nodeSQLRunner) MarkConfigurationDone(ctx context.Context) error {
128-
// nolint: gosec
129-
query := fmt.Sprintf("REPLACE INTO %s.%s VALUES ('%s', '1');",
130-
constants.OperatorDbName, constants.OperatorStatusTableName, "configured")
129+
return r.writeStatusValue(ctx, "configured", "1")
130+
}
131131

132-
if err := r.runQuery(ctx, query); err != nil {
133-
return fmt.Errorf("failed to mark configuration done, err: %s", err)
134-
}
135-
return nil
132+
// IsConfigured returns true if MySQL was configured, a key was set in the status table
133+
func (r *nodeSQLRunner) IsConfigured(ctx context.Context) (bool, error) {
134+
val, err := r.readStatusValue(ctx, "configured")
135+
return val == "1", err
136136
}
137137

138138
func (r *nodeSQLRunner) Host() string {
@@ -239,6 +239,7 @@ func (r *nodeSQLRunner) SetPurgedGTID(ctx context.Context) error {
239239
return nil
240240
}
241241

242+
// readStatusValue read from status table the value under the given key
242243
func (r *nodeSQLRunner) readStatusValue(ctx context.Context, key string) (string, error) {
243244
// nolint: gosec
244245
qq := fmt.Sprintf("SELECT value FROM %s.%s WHERE name='%s'",
@@ -254,6 +255,19 @@ func (r *nodeSQLRunner) readStatusValue(ctx context.Context, key string) (string
254255
return value, nil
255256
}
256257

258+
// writeStatusValue updates the value at the provided key
259+
func (r *nodeSQLRunner) writeStatusValue(ctx context.Context, key, value string) error {
260+
// nolint: gosec
261+
query := fmt.Sprintf("REPLACE INTO %s.%s VALUES ('%s', '%s');",
262+
constants.OperatorDbName, constants.OperatorStatusTableName, key, value)
263+
264+
if err := r.runQuery(ctx, query); err != nil {
265+
return err
266+
}
267+
268+
return nil
269+
}
270+
257271
// isMySQLError checks if a mysql error is of the given code.
258272
// more information about mysql error codes can be found here:
259273
// https://dev.mysql.com/doc/refman/8.0/en/server-error-reference.html

pkg/controller/node/sql_fake_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,10 @@ func (f *fakeSQLRunner) SetPurgedGTID(ctx context.Context) error {
5252
return nil
5353
}
5454

55+
func (f *fakeSQLRunner) IsConfigured(ctx context.Context) (bool, error) {
56+
return false, nil
57+
}
58+
5559
var _ = Describe("SQL functions", func() {
5660
It("should find not found error", func() {
5761
err := fmt.Errorf("Error 1146: Table 'a.a' doesn't exist")

0 commit comments

Comments
 (0)