Skip to content

Commit aea537c

Browse files
committed
OCPBUGS-2183: Updating quorum detection logic to absolve TNF of quorum loss reports.
- Added a new utility function to detect when external etcd cluster have completed transition - Updated the quorum check in the membership controller prevent CEO being marked as unavailable when you would have quorum loss but can't lose quorum because you've completed the transition to ExternalEtcd. - Added new ExternalEtcd lifecycle test to track the flow of an ExternalEtcd transition. - Generated basic validation tests to ensure that we only set CEO to unavailable when you lose quorum on a cluster that doesn't automatically recover quorum - Updated tests to use utils-based initializers by default
1 parent 0357803 commit aea537c

File tree

12 files changed

+636
-58
lines changed

12 files changed

+636
-58
lines changed

pkg/operator/bootstrapteardown/bootstrap_teardown_controller_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,7 @@ func TestRemoveBootstrap(t *testing.T) {
371371
fakeNamespaceLister := corev1listers.NewNamespaceLister(indexer)
372372
fakeConfigmapLister := corev1listers.NewConfigMapLister(indexer)
373373
fakeInfraLister := configv1listers.NewInfrastructureLister(indexer)
374-
fakeStaticPodClient := v1helpers.NewFakeStaticPodOperatorClient(&operatorv1.StaticPodOperatorSpec{}, &operatorv1.StaticPodOperatorStatus{}, nil, nil)
374+
fakeStaticPodClient := v1helpers.NewFakeStaticPodOperatorClient(&operatorv1.StaticPodOperatorSpec{}, u.StaticPodOperatorStatus(), nil, nil)
375375
fakeEtcdClient, err := etcdcli.NewFakeEtcdClient([]*etcdserverpb.Member{u.FakeEtcdBootstrapMember(1)})
376376
require.NoError(t, err)
377377
fakeKubeClient := fake.NewClientset([]runtime.Object{}...)

pkg/operator/ceohelpers/common_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"k8s.io/apimachinery/pkg/runtime"
1010

1111
operatorv1 "github.com/openshift/api/operator/v1"
12+
u "github.com/openshift/cluster-etcd-operator/pkg/testutils"
1213
"github.com/openshift/library-go/pkg/operator/v1helpers"
1314
)
1415

@@ -57,7 +58,7 @@ func TestReadDesiredControlPlaneReplicaCount(t *testing.T) {
5758
for _, scenario := range scenarios {
5859
t.Run(scenario.name, func(t *testing.T) {
5960
// test data
60-
fakeOperatorClient := v1helpers.NewFakeStaticPodOperatorClient(&scenario.operatorSpec, &operatorv1.StaticPodOperatorStatus{}, nil, nil)
61+
fakeOperatorClient := v1helpers.NewFakeStaticPodOperatorClient(&scenario.operatorSpec, u.StaticPodOperatorStatus(), nil, nil)
6162

6263
// act
6364
actualReplicaCount, err := ReadDesiredControlPlaneReplicasCount(fakeOperatorClient)

pkg/operator/ceohelpers/external_etcd_status.go

Lines changed: 52 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,10 @@ import (
1111
)
1212

1313
type ExternalEtcdClusterStatus struct {
14-
IsExternalEtcdCluster bool
15-
IsEtcdRunningInCluster bool
16-
IsReadyForEtcdTransition bool
14+
IsExternalEtcdCluster bool
15+
IsEtcdRunningInCluster bool
16+
IsReadyForEtcdTransition bool
17+
HasExternalEtcdCompletedTransition bool
1718
}
1819

1920
// IsExternalEtcdCluster determines if the cluster is configured for external etcd
@@ -36,13 +37,18 @@ func IsExternalEtcdCluster(ctx context.Context, infraLister configv1listers.Infr
3637
// IsReadyForEtcdTransition checks if the cluster is ready for etcd transition
3738
// by examining the operator status for the ExternalEtcdReadyForTransition condition.
3839
// This condition is set when the TNF setup is ready to take over the etcd container.
39-
func IsReadyForEtcdTransition(ctx context.Context, operatorClient v1helpers.StaticPodOperatorClient) (bool, error) {
40+
func IsReadyForEtcdTransition(operatorClient v1helpers.StaticPodOperatorClient) (bool, error) {
4041
_, opStatus, _, err := operatorClient.GetStaticPodOperatorState()
4142
if err != nil {
4243
klog.Errorf("failed to get static pod operator state: %v", err)
4344
return false, err
4445
}
4546

47+
if opStatus == nil {
48+
klog.V(2).Info("static pod operator status not yet populated; ready for etcd transition unknown")
49+
return false, nil
50+
}
51+
4652
readyForEtcdTransition := v1helpers.IsOperatorConditionTrue(opStatus.Conditions, etcd.OperatorConditionExternalEtcdReadyForTransition)
4753
if readyForEtcdTransition {
4854
klog.V(4).Infof("ready for etcd transition")
@@ -60,6 +66,11 @@ func IsEtcdRunningInCluster(ctx context.Context, operatorClient v1helpers.Static
6066
return false, err
6167
}
6268

69+
if opStatus == nil {
70+
klog.V(2).Info("static pod operator status not yet populated; bootstrap completion unknown")
71+
return false, nil
72+
}
73+
6374
etcdRunningInCluster := v1helpers.IsOperatorConditionTrue(opStatus.Conditions, etcd.OperatorConditionEtcdRunningInCluster)
6475
if etcdRunningInCluster {
6576
klog.V(4).Infof("bootstrap completed, etcd running in cluster")
@@ -68,16 +79,39 @@ func IsEtcdRunningInCluster(ctx context.Context, operatorClient v1helpers.Static
6879
return etcdRunningInCluster, nil
6980
}
7081

82+
// HasExternalEtcdCompletedTransition checks if the transition to external etcd process is completed
83+
// by examining the operator status for the HasExternalEtcdCompletedTransition condition.
84+
func HasExternalEtcdCompletedTransition(ctx context.Context, operatorClient v1helpers.StaticPodOperatorClient) (bool, error) {
85+
_, opStatus, _, err := operatorClient.GetStaticPodOperatorState()
86+
if err != nil {
87+
klog.Errorf("failed to get static pod operator state: %v", err)
88+
return false, err
89+
}
90+
91+
if opStatus == nil {
92+
klog.V(2).Info("static pod operator status not yet populated; transition completion unknown")
93+
return false, nil
94+
}
95+
96+
hasExternalEtcdCompletedTransition := v1helpers.IsOperatorConditionTrue(opStatus.Conditions, etcd.OperatorConditionExternalEtcdHasCompletedTransition)
97+
if hasExternalEtcdCompletedTransition {
98+
klog.V(4).Infof("etcd has transitioned to running externally")
99+
}
100+
101+
return hasExternalEtcdCompletedTransition, nil
102+
}
103+
71104
// GetExternalEtcdClusterStatus provides a comprehensive status check for external etcd clusters.
72105
// It returns the external etcd status, bootstrap completion status, and readiness for transition.
73106
func GetExternalEtcdClusterStatus(ctx context.Context,
74107
operatorClient v1helpers.StaticPodOperatorClient,
75108
infraLister configv1listers.InfrastructureLister) (externalEtcdStatus ExternalEtcdClusterStatus, err error) {
76109

77110
externalEtcdStatus = ExternalEtcdClusterStatus{
78-
IsExternalEtcdCluster: false,
79-
IsEtcdRunningInCluster: false,
80-
IsReadyForEtcdTransition: false,
111+
IsExternalEtcdCluster: false,
112+
IsEtcdRunningInCluster: false,
113+
IsReadyForEtcdTransition: false,
114+
HasExternalEtcdCompletedTransition: false,
81115
}
82116

83117
// Check if this is an external etcd cluster
@@ -98,18 +132,29 @@ func GetExternalEtcdClusterStatus(ctx context.Context,
98132
return externalEtcdStatus, err
99133
}
100134

135+
if opStatus == nil {
136+
klog.V(2).Info("static pod operator status not yet populated; external etcd cluster status unknown")
137+
return externalEtcdStatus, nil
138+
}
139+
101140
// Check bootstrap completion
102141
externalEtcdStatus.IsEtcdRunningInCluster = v1helpers.IsOperatorConditionTrue(opStatus.Conditions, etcd.OperatorConditionEtcdRunningInCluster)
103142

104143
// Check readiness for transition
105144
externalEtcdStatus.IsReadyForEtcdTransition = v1helpers.IsOperatorConditionTrue(opStatus.Conditions, etcd.OperatorConditionExternalEtcdReadyForTransition)
106145

146+
// Check if etcd has completed transition to running externally
147+
externalEtcdStatus.HasExternalEtcdCompletedTransition = v1helpers.IsOperatorConditionTrue(opStatus.Conditions, etcd.OperatorConditionExternalEtcdHasCompletedTransition)
148+
107149
if externalEtcdStatus.IsEtcdRunningInCluster {
108150
klog.V(4).Infof("bootstrap completed, etcd running in cluster")
109151
}
110152
if externalEtcdStatus.IsReadyForEtcdTransition {
111153
klog.V(4).Infof("ready for etcd transition")
112154
}
155+
if externalEtcdStatus.HasExternalEtcdCompletedTransition {
156+
klog.V(4).Infof("etcd has transitioned to running externally")
157+
}
113158

114159
return externalEtcdStatus, nil
115160
}

0 commit comments

Comments
 (0)