Skip to content

Commit 001b7bc

Browse files
authored
K8SPSMDB-1527: Cluster can't be ready before PBM is ready (#2152)
* K8SPSMDB-1527: Cluster can't be ready before PBM is ready * fix cluster readiness when no storages * fix pause * fix replset-overrides * fix liveness
1 parent 098b0d7 commit 001b7bc

File tree

16 files changed

+734
-271
lines changed

16 files changed

+734
-271
lines changed

e2e-tests/custom-users-roles/conf/some-name-rs0.yml

Lines changed: 0 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -61,39 +61,6 @@ spec:
6161
backup:
6262
enabled: true
6363
image: perconalab/percona-server-mongodb-operator:1.1.0-backup
64-
storages:
65-
aws-s3:
66-
main: true
67-
type: s3
68-
s3:
69-
credentialsSecret: aws-s3-secret
70-
region: us-east-1
71-
bucket: operator-testing
72-
prefix: psmdb
73-
insecureSkipTLSVerify: false
74-
minio:
75-
type: s3
76-
s3:
77-
credentialsSecret: minio-secret
78-
region: us-east-1
79-
bucket: operator-testing
80-
endpointUrl: http://minio-service:9000/
81-
insecureSkipTLSVerify: false
82-
gcp-cs:
83-
type: s3
84-
s3:
85-
credentialsSecret: gcp-cs-secret
86-
region: us-east-1
87-
bucket: operator-testing
88-
prefix: psmdb
89-
endpointUrl: https://storage.googleapis.com
90-
insecureSkipTLSVerify: false
91-
tasks:
92-
- name: weekly
93-
enabled: true
94-
schedule: "0 0 * * 0"
95-
compressionType: gzip
96-
storageName: aws-s3
9764
replsets:
9865
- name: rs0
9966
affinity:

e2e-tests/functions

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2168,7 +2168,7 @@ wait_for_cluster_state() {
21682168
echo -n '.'
21692169
if [[ ${timeout} -gt 1500 ]]; then
21702170
echo
2171-
echo "Waiting timeout has been reached. Exiting..."
2171+
log "Waiting timeout has been reached. Exiting..."
21722172
exit 1
21732173
fi
21742174
done

e2e-tests/liveness/run

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ test_dir=$(realpath $(dirname $0))
77
set_debug
88

99
create_infra $namespace
10+
deploy_minio
1011

1112
desc 'create secrets and start client'
1213
kubectl_bin apply -f $conf_dir/secrets.yml -f $conf_dir/client.yml -f $conf_dir/minio-secret.yml

e2e-tests/replset-overrides/conf/some-name-overridden.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ spec:
2424
size: 3
2525
expose:
2626
enabled: false
27-
exposeType: ClusterIP
27+
type: ClusterIP
2828
replsetOverrides:
2929
some-name-rs0-0:
3030
host: external-rs0-0.NAMESPACE.svc:27017
@@ -58,7 +58,7 @@ spec:
5858
antiAffinityTopologyKey: none
5959
expose:
6060
enabled: false
61-
exposeType: ClusterIP
61+
type: ClusterIP
6262
podDisruptionBudget:
6363
maxUnavailable: 1
6464
resources:
@@ -79,7 +79,7 @@ spec:
7979
affinity:
8080
antiAffinityTopologyKey: none
8181
expose:
82-
exposeType: ClusterIP
82+
type: ClusterIP
8383
podDisruptionBudget:
8484
maxUnavailable: 1
8585
resources:

e2e-tests/replset-overrides/conf/some-name-override-priority.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ spec:
2424
size: 3
2525
expose:
2626
enabled: false
27-
exposeType: ClusterIP
27+
type: ClusterIP
2828
replsetOverrides:
2929
some-name-rs0-0:
3030
priority: 1

e2e-tests/replset-overrides/conf/some-name.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ spec:
2424
size: 3
2525
expose:
2626
enabled: false
27-
exposeType: ClusterIP
27+
type: ClusterIP
2828
resources:
2929
limits:
3030
cpu: 300m
@@ -45,7 +45,7 @@ spec:
4545
antiAffinityTopologyKey: none
4646
expose:
4747
enabled: false
48-
exposeType: ClusterIP
48+
type: ClusterIP
4949
podDisruptionBudget:
5050
maxUnavailable: 1
5151
resources:
@@ -66,7 +66,7 @@ spec:
6666
affinity:
6767
antiAffinityTopologyKey: none
6868
expose:
69-
exposeType: ClusterIP
69+
type: ClusterIP
7070
podDisruptionBudget:
7171
maxUnavailable: 1
7272
resources:

e2e-tests/replset-overrides/run

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ test_override_host_after_deploy() {
6161
echo "patching PSMDB cluster with replsetOverrides: ${cluster}"
6262
sed "s/NAMESPACE/${namespace}/g" ${test_dir}/conf/${cluster}-overridden.yml > ${tmp_dir}/${cluster}-overridden.yml
6363
apply_cluster ${tmp_dir}/${cluster}-overridden.yml
64+
sleep_with_log 10 "wait for replset reconfiguration"
6465
wait_for_cluster_state ${cluster} "ready"
6566

6667
echo "checking if we can read existing data"

pkg/apis/psmdb/v1/psmdb_types.go

Lines changed: 43 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -340,9 +340,13 @@ const (
340340
ConditionUnknown ConditionStatus = "Unknown"
341341
)
342342

343-
// ConditionTypePendingSmartUpdate is a condition type set on PSMDBCluster when a smart update is required
344-
// but has not yet started. For e.g., if a backup/restore is running at the same time as a smart update is triggered.
345-
const ConditionTypePendingSmartUpdate AppState = "pendingSmartUpdate"
343+
const (
344+
// ConditionTypePendingSmartUpdate is a condition type set on PSMDBCluster when a smart update is required
345+
// but has not yet started. For e.g., if a backup/restore is running at the same time as a smart update is triggered.
346+
ConditionTypePendingSmartUpdate AppState = "pendingSmartUpdate"
347+
348+
ConditionTypePBMReady AppState = "PBMReady"
349+
)
346350

347351
type ClusterCondition struct {
348352
Status ConditionStatus `json:"status"`
@@ -370,6 +374,33 @@ func (s *PerconaServerMongoDBStatus) IsStatusConditionTrue(conditionType AppStat
370374
return cond.Status == ConditionTrue
371375
}
372376

377+
func (s *PerconaServerMongoDBStatus) AddCondition(c ClusterCondition) {
378+
existingCondition := s.FindCondition(c.Type)
379+
if existingCondition == nil {
380+
if c.LastTransitionTime.IsZero() {
381+
c.LastTransitionTime = metav1.NewTime(time.Now())
382+
}
383+
s.Conditions = append(s.Conditions, c)
384+
return
385+
}
386+
387+
if existingCondition.Status != c.Status {
388+
existingCondition.Status = c.Status
389+
if !c.LastTransitionTime.IsZero() {
390+
existingCondition.LastTransitionTime = c.LastTransitionTime
391+
} else {
392+
existingCondition.LastTransitionTime = metav1.NewTime(time.Now())
393+
}
394+
}
395+
396+
if existingCondition.Reason != c.Reason {
397+
existingCondition.Reason = c.Reason
398+
}
399+
if existingCondition.Message != c.Message {
400+
existingCondition.Message = c.Message
401+
}
402+
}
403+
373404
type PMMSpec struct {
374405
Enabled bool `json:"enabled,omitempty"`
375406
ServerHost string `json:"serverHost,omitempty"`
@@ -1498,6 +1529,15 @@ func (cr *PerconaServerMongoDB) MongosNamespacedName() types.NamespacedName {
14981529
return types.NamespacedName{Name: cr.Name + "-" + "mongos", Namespace: cr.Namespace}
14991530
}
15001531

1532+
func (cr *PerconaServerMongoDB) GetReplsets() []*ReplsetSpec {
1533+
replsets := make([]*ReplsetSpec, 0)
1534+
replsets = append(replsets, cr.Spec.Replsets...)
1535+
if cr.Spec.Sharding.Enabled {
1536+
replsets = append(replsets, cr.Spec.Sharding.ConfigsvrReplSet)
1537+
}
1538+
return replsets
1539+
}
1540+
15011541
func (cr *PerconaServerMongoDB) CanBackup(ctx context.Context) error {
15021542
log := logf.FromContext(ctx).V(1).WithValues("cluster", cr.Name, "namespace", cr.Namespace)
15031543
log.Info("checking if backup is allowed")
@@ -1547,33 +1587,6 @@ func (s *PerconaServerMongoDBStatus) RemoveCondition(conditionType AppState) {
15471587
}
15481588
}
15491589

1550-
func (s *PerconaServerMongoDBStatus) AddCondition(c ClusterCondition) {
1551-
existingCondition := s.FindCondition(c.Type)
1552-
if existingCondition == nil {
1553-
if c.LastTransitionTime.IsZero() {
1554-
c.LastTransitionTime = metav1.NewTime(time.Now())
1555-
}
1556-
s.Conditions = append(s.Conditions, c)
1557-
return
1558-
}
1559-
1560-
if existingCondition.Status != c.Status {
1561-
existingCondition.Status = c.Status
1562-
if !c.LastTransitionTime.IsZero() {
1563-
existingCondition.LastTransitionTime = c.LastTransitionTime
1564-
} else {
1565-
existingCondition.LastTransitionTime = metav1.NewTime(time.Now())
1566-
}
1567-
}
1568-
1569-
if existingCondition.Reason != c.Reason {
1570-
existingCondition.Reason = c.Reason
1571-
}
1572-
if existingCondition.Message != c.Message {
1573-
existingCondition.Message = c.Message
1574-
}
1575-
}
1576-
15771590
// GetExternalNodes returns all external nodes for all replsets
15781591
func (cr *PerconaServerMongoDB) GetExternalNodes() []*ExternalNode {
15791592
extNodes := make([]*ExternalNode, 0)

pkg/controller/perconaservermongodb/backup.go

Lines changed: 0 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
11
package perconaservermongodb
22

33
import (
4-
"bytes"
54
"container/heap"
65
"context"
7-
"strings"
86
"time"
97

108
"github.com/pkg/errors"
@@ -19,12 +17,9 @@ import (
1917
logf "sigs.k8s.io/controller-runtime/pkg/log"
2018

2119
"github.com/percona/percona-backup-mongodb/pbm/defs"
22-
pbmVersion "github.com/percona/percona-backup-mongodb/pbm/version"
2320

2421
api "github.com/percona/percona-server-mongodb-operator/pkg/apis/psmdb/v1"
25-
"github.com/percona/percona-server-mongodb-operator/pkg/k8s"
2622
"github.com/percona/percona-server-mongodb-operator/pkg/naming"
27-
"github.com/percona/percona-server-mongodb-operator/pkg/psmdb"
2823
"github.com/percona/percona-server-mongodb-operator/pkg/psmdb/backup"
2924
)
3025

@@ -444,117 +439,3 @@ func isPodUpToDate(pod *corev1.Pod, stsRevision, image string) bool {
444439

445440
return true
446441
}
447-
448-
func (r *ReconcilePerconaServerMongoDB) reconcileBackupVersion(ctx context.Context, cr *api.PerconaServerMongoDB) error {
449-
log := logf.FromContext(ctx)
450-
451-
if !cr.Spec.Backup.Enabled {
452-
return nil
453-
}
454-
455-
if cr.Status.State != api.AppStateReady {
456-
return nil
457-
}
458-
459-
if cr.Status.BackupVersion != "" && cr.Status.BackupImage == cr.Spec.Backup.Image {
460-
return nil
461-
}
462-
463-
if len(cr.Spec.Replsets) < 1 {
464-
return errors.New("no replsets found")
465-
}
466-
467-
var rs *api.ReplsetSpec
468-
for _, r := range cr.Spec.Replsets {
469-
rs = r
470-
break
471-
}
472-
473-
stsName := naming.MongodStatefulSetName(cr, rs)
474-
sts := psmdb.NewStatefulSet(stsName, cr.Namespace)
475-
err := r.client.Get(ctx, client.ObjectKeyFromObject(sts), sts)
476-
if err != nil {
477-
return errors.Wrapf(err, "get statefulset/%s", stsName)
478-
}
479-
480-
matchLabels := naming.RSLabels(cr, rs)
481-
label, ok := sts.Labels[naming.LabelKubernetesComponent]
482-
if ok {
483-
matchLabels[naming.LabelKubernetesComponent] = label
484-
}
485-
486-
podList := corev1.PodList{}
487-
if err := r.client.List(ctx,
488-
&podList,
489-
&client.ListOptions{
490-
Namespace: cr.Namespace,
491-
LabelSelector: labels.SelectorFromSet(matchLabels),
492-
},
493-
); err != nil {
494-
return errors.Wrap(err, "get pod list")
495-
}
496-
497-
var pod *corev1.Pod
498-
for _, p := range podList.Items {
499-
if !k8s.IsPodReady(p) {
500-
continue
501-
}
502-
503-
if !isPodUpToDate(&p, sts.Status.UpdateRevision, cr.Spec.Backup.Image) {
504-
continue
505-
}
506-
507-
pod = &p
508-
break
509-
}
510-
if pod == nil {
511-
log.V(1).Error(nil, "no ready pods to get pbm-agent version")
512-
return nil
513-
}
514-
515-
stdout := &bytes.Buffer{}
516-
stderr := &bytes.Buffer{}
517-
cmd := []string{"pbm-agent", "version", "--short"}
518-
519-
err = r.clientcmd.Exec(ctx, pod, naming.ContainerBackupAgent, cmd, nil, stdout, stderr, false)
520-
if err != nil {
521-
return errors.Wrap(err, "get pbm-agent version")
522-
}
523-
524-
// PBM v2.9.0 and above prints version to stderr, below prints it to stdout
525-
stdoutStr := strings.TrimSpace(stdout.String())
526-
stderrStr := strings.TrimSpace(stderr.String())
527-
if stdoutStr != "" && stderrStr != "" {
528-
log.V(1).Info("pbm-agent version found in both stdout and stderr; using stdout",
529-
"stdout", stdoutStr, "stderr", stderrStr)
530-
cr.Status.BackupVersion = stdoutStr
531-
} else if stdoutStr != "" {
532-
cr.Status.BackupVersion = stdoutStr
533-
} else if stderrStr != "" {
534-
cr.Status.BackupVersion = stderrStr
535-
} else {
536-
return errors.New("pbm-agent version not found in stdout or stderr")
537-
}
538-
539-
cr.Status.BackupImage = cr.Spec.Backup.Image
540-
541-
log.Info("pbm-agent version",
542-
"pod", pod.Name,
543-
"image", cr.Status.BackupImage,
544-
"version", cr.Status.BackupVersion)
545-
546-
pbmInfo := pbmVersion.Current()
547-
548-
compare, err := cr.ComparePBMAgentVersion(pbmInfo.Version)
549-
if err != nil {
550-
return errors.Wrap(err, "compare pbm-agent version with go module")
551-
}
552-
553-
if compare != 0 {
554-
log.Info("pbm-agent version is different than the go module, this might create problems",
555-
"pbmAgentVersion", cr.Status.BackupVersion,
556-
"goModuleVersion", pbmInfo.Version)
557-
}
558-
559-
return nil
560-
}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
package perconaservermongodb
2+
3+
import (
4+
"context"
5+
6+
psmdbv1 "github.com/percona/percona-server-mongodb-operator/pkg/apis/psmdb/v1"
7+
)
8+
9+
func (r *ReconcilePerconaServerMongoDB) updateCondition(ctx context.Context, cr *psmdbv1.PerconaServerMongoDB, c psmdbv1.ClusterCondition) error {
10+
cr.Status.AddCondition(c)
11+
return r.writeStatus(ctx, cr)
12+
}

0 commit comments

Comments
 (0)