Skip to content

Commit b42b363

Browse files
authored
Remove autofailover from graphd (#558)
1 parent 8f10220 commit b42b363

File tree

4 files changed

+9
-310
lines changed

4 files changed

+9
-310
lines changed

apis/apps/v1alpha1/nebulacluster_types.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ type NebulaClusterSpec struct {
9696
// +optional
9797
EnableBR *bool `json:"enableBR,omitempty"`
9898

99-
// Flag to enable/disable auto fail over in use local PV scenario, default false.
99+
// Flag to enable/disable auto failover for storaged. Used only for local PV, default false.
100100
// +optional
101101
EnableAutoFailover *bool `json:"enableAutoFailover,omitempty"`
102102

pkg/controller/component/graphd_cluster.go

Lines changed: 8 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -19,22 +19,16 @@ package component
1919
import (
2020
"fmt"
2121
"strconv"
22-
"strings"
23-
"time"
2422

2523
"github.com/vesoft-inc/nebula-go/v3/nebula/meta"
2624
corev1 "k8s.io/api/core/v1"
2725
apierrors "k8s.io/apimachinery/pkg/api/errors"
28-
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2926
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
30-
"k8s.io/apimachinery/pkg/types"
3127
"k8s.io/client-go/tools/record"
3228
"k8s.io/klog/v2"
33-
"k8s.io/utils/pointer"
3429

3530
"github.com/vesoft-inc/nebula-operator/apis/apps/v1alpha1"
3631
"github.com/vesoft-inc/nebula-operator/apis/pkg/annotation"
37-
"github.com/vesoft-inc/nebula-operator/apis/pkg/label"
3832
"github.com/vesoft-inc/nebula-operator/pkg/kube"
3933
"github.com/vesoft-inc/nebula-operator/pkg/nebula"
4034
"github.com/vesoft-inc/nebula-operator/pkg/util/discovery"
@@ -44,26 +38,23 @@ import (
4438
)
4539

4640
type graphdCluster struct {
47-
clientSet kube.ClientSet
48-
dm discovery.Interface
49-
updateManager UpdateManager
50-
failoverManager FailoverManager
51-
eventRecorder record.EventRecorder
41+
clientSet kube.ClientSet
42+
dm discovery.Interface
43+
updateManager UpdateManager
44+
eventRecorder record.EventRecorder
5245
}
5346

5447
func NewGraphdCluster(
5548
clientSet kube.ClientSet,
5649
dm discovery.Interface,
5750
um UpdateManager,
58-
fm FailoverManager,
5951
recorder record.EventRecorder,
6052
) ReconcileManager {
6153
return &graphdCluster{
62-
clientSet: clientSet,
63-
dm: dm,
64-
updateManager: um,
65-
failoverManager: fm,
66-
eventRecorder: recorder,
54+
clientSet: clientSet,
55+
dm: dm,
56+
updateManager: um,
57+
eventRecorder: recorder,
6758
}
6859
}
6960

@@ -187,22 +178,6 @@ func (c *graphdCluster) syncGraphdWorkload(nc *v1alpha1.NebulaCluster) error {
187178
}
188179
}
189180

190-
if nc.IsAutoFailoverEnabled() {
191-
r, hosts, err := c.shouldRecover(nc)
192-
if err != nil {
193-
return err
194-
}
195-
if r {
196-
if err := c.failoverManager.Recovery(nc, hosts); err != nil {
197-
return err
198-
}
199-
} else if nc.GraphdComponent().IsAutoFailovering() {
200-
if err := c.failoverManager.Failover(nc); err != nil {
201-
return err
202-
}
203-
}
204-
}
205-
206181
equal := extender.PodTemplateEqual(newWorkload, oldWorkload)
207182
if !equal || nc.Status.Graphd.Phase == v1alpha1.UpdatePhase {
208183
if err := c.updateManager.Update(nc, oldWorkload, newWorkload, gvk); err != nil {
@@ -268,74 +243,6 @@ func (c *graphdCluster) syncNebulaClusterStatus(
268243
nc.Status.Graphd.Phase = v1alpha1.RunningPhase
269244
}
270245

271-
if !nc.IsAutoFailoverEnabled() {
272-
return syncComponentStatus(nc.GraphdComponent(), &nc.Status.Graphd, oldWorkload)
273-
}
274-
275-
options, err := nebula.ClientOptions(nc, nebula.SetIsMeta(true))
276-
if err != nil {
277-
return err
278-
}
279-
hosts := []string{nc.GetMetadThriftConnAddress()}
280-
metaClient, err := nebula.NewMetaClient(hosts, options...)
281-
if err != nil {
282-
return err
283-
}
284-
defer func() {
285-
_ = metaClient.Disconnect()
286-
}()
287-
288-
hostItems, err := metaClient.ListHosts(meta.ListHostType_GRAPH)
289-
if err != nil {
290-
return err
291-
}
292-
thriftPort := nc.GraphdComponent().GetPort(v1alpha1.GraphdPortNameThrift)
293-
klog.Infof("Current graphd state: %v. Current number of replicas: %v", nc.Status.Graphd.Phase, pointer.Int32Deref(newReplicas, 0))
294-
for _, host := range hostItems {
295-
klog.Infof("Currently looking at host: %v with status %v", strings.Split(host.HostAddr.Host, ".")[0], host.Status)
296-
if host.Status == meta.HostStatus_OFFLINE && host.HostAddr.Port == thriftPort {
297-
podName := strings.Split(host.HostAddr.Host, ".")[0]
298-
ordinal := getPodOrdinal(podName)
299-
if int32(ordinal) >= pointer.Int32Deref(nc.Spec.Graphd.Replicas, 0) {
300-
klog.Infof("graphd pod [%s/%s] has already been terminated by the sts. Skipping failover and/or removing from auto failover list", nc.Namespace, podName)
301-
// delete is a no-op if FailureHosts or podName is nil
302-
delete(nc.Status.Graphd.FailureHosts, podName)
303-
continue
304-
}
305-
if nc.Status.Graphd.FailureHosts == nil {
306-
nc.Status.Graphd.FailureHosts = make(map[string]v1alpha1.FailureHost)
307-
}
308-
fh, exists := nc.Status.Graphd.FailureHosts[podName]
309-
if exists {
310-
deadline := fh.CreationTime.Add(nc.Spec.FailoverPeriod.Duration)
311-
if time.Now().After(deadline) {
312-
if fh.ConfirmationTime.IsZero() {
313-
fh.ConfirmationTime = metav1.Time{Time: time.Now()}
314-
cl := label.New().Cluster(nc.GetClusterName()).Graphd()
315-
_, pvcs, err := getPodAndPvcs(c.clientSet, nc, cl, podName)
316-
if err != nil {
317-
return err
318-
}
319-
pvcSet := make(map[types.UID]v1alpha1.EmptyStruct)
320-
for _, pvc := range pvcs {
321-
pvcSet[pvc.UID] = v1alpha1.EmptyStruct{}
322-
}
323-
fh.PVCSet = pvcSet
324-
nc.Status.Graphd.FailureHosts[podName] = fh
325-
klog.Infof("graphd pod [%s/%s] failover period exceeds %s", nc.Namespace, podName, nc.Spec.FailoverPeriod.Duration.String())
326-
}
327-
}
328-
continue
329-
}
330-
failureHost := v1alpha1.FailureHost{
331-
Host: host.HostAddr.Host,
332-
CreationTime: metav1.Time{Time: time.Now()},
333-
}
334-
nc.Status.Graphd.FailureHosts[podName] = failureHost
335-
klog.Infof("offline graph host %s found", host.HostAddr.Host)
336-
}
337-
}
338-
339246
return syncComponentStatus(nc.GraphdComponent(), &nc.Status.Graphd, oldWorkload)
340247
}
341248

pkg/controller/component/graphd_failover.go

Lines changed: 0 additions & 206 deletions
This file was deleted.

pkg/controller/nebulacluster/nebula_cluster_controller.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@ func NewClusterReconciler(mgr ctrl.Manager, enableKruise bool) (*ClusterReconcil
6767
graphdUpdater := component.NewGraphdUpdater(clientSet.Pod())
6868
metadUpdater := component.NewMetadUpdater(clientSet.Pod())
6969
storagedUpdater := component.NewStoragedUpdater(clientSet)
70-
graphdFailover := component.NewGraphdFailover(mgr.GetClient(), clientSet)
7170
metadFailover := component.NewMetadFailover(mgr.GetClient(), clientSet)
7271
storagedFailover := component.NewStoragedFailover(mgr.GetClient(), clientSet)
7372

@@ -105,7 +104,6 @@ func NewClusterReconciler(mgr ctrl.Manager, enableKruise bool) (*ClusterReconcil
105104
clientSet,
106105
dm,
107106
graphdUpdater,
108-
graphdFailover,
109107
recorder),
110108
component.NewMetadCluster(
111109
clientSet,

0 commit comments

Comments
 (0)