Skip to content

Commit e7d67f3

Browse files
author
smiletan
authored
Merge pull request #320 from intelligentfu8/revert-replcias-update
fix cluster updated and add safe shield for scale down fe
2 parents a13e1ab + 74caa10 commit e7d67f3

File tree

5 files changed

+60
-10
lines changed

5 files changed

+60
-10
lines changed

pkg/common/utils/k8s/client.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,15 @@ func ApplyStatefulSet(ctx context.Context, k8sclient client.Client, st *appv1.St
110110
return err
111111
}
112112

113+
func ApplyDorisCluster(ctx context.Context, k8sclient client.Client, dcr *dorisv1.DorisCluster) error {
114+
err := PatchClientObject(ctx, k8sclient, dcr)
115+
if err == nil || apierrors.IsConflict(err) {
116+
return nil
117+
}
118+
119+
return err
120+
}
121+
113122
func GetStatefulSet(ctx context.Context, k8sclient client.Client, namespace, name string) (*appv1.StatefulSet, error) {
114123
var est appv1.StatefulSet
115124
err := k8sclient.Get(ctx, types.NamespacedName{Namespace: namespace, Name: name}, &est)

pkg/common/utils/mysql/doris.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ func BuildSeqNumberToFrontendMap(frontends []*Frontend, ipMap map[string]string,
111111
}
112112

113113
// FindNeedDeletedFrontends means descending sort fe by index and return top needRemovedAmount
114-
func FindNeedDeletedFrontends(frontendMap map[int]*Frontend, needRemovedAmount int32) []*Frontend {
114+
func FindNeedDeletedObservers(frontendMap map[int]*Frontend, needRemovedAmount int32) []*Frontend {
115115
var topFrontends []*Frontend
116116
if int(needRemovedAmount) <= len(frontendMap) {
117117
keys := make([]int, 0, len(frontendMap))

pkg/controller/doriscluster_controller.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ package controller
3636
import (
3737
"context"
3838
dorisv1 "github.com/apache/doris-operator/api/doris/v1"
39+
"github.com/apache/doris-operator/pkg/common/utils/k8s"
3940
"github.com/apache/doris-operator/pkg/controller/sub_controller"
4041
"github.com/apache/doris-operator/pkg/controller/sub_controller/be"
4142
bk "github.com/apache/doris-operator/pkg/controller/sub_controller/broker"
@@ -150,9 +151,29 @@ func (r *DorisClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request
150151
}
151152
}
152153

154+
//if dcr has updated by doris operator, should update it in apiserver. if not ignore it.
155+
if err = r.revertDorisClusterSomeFields(ctx, &edcr, dcr); err != nil {
156+
klog.Errorf("DorisClusterReconciler updateDorisClusterToOld update dorisCluster namespace=%s, name=%s failed, err=%s", dcr.Namespace, dcr.Name, err.Error())
157+
return requeueIfError(err)
158+
}
159+
153160
return r.updateDorisClusterStatus(ctx, dcr)
154161
}
155162

163+
//if cluster spec be reverted, doris operator should revert to old.
164+
//this action is not good, but this will be a good shield for scale down of fe.
165+
func(r *DorisClusterReconciler) revertDorisClusterSomeFields(ctx context.Context, getDcr, updatedDcr *dorisv1.DorisCluster) error {
166+
if *getDcr.Spec.FeSpec.Replicas != *updatedDcr.Spec.FeSpec.Replicas {
167+
return k8s.ApplyDorisCluster(ctx, r.Client, updatedDcr)
168+
}
169+
170+
return nil
171+
}
172+
173+
func(r *DorisClusterReconciler) updateDorisCluster(ctx context.Context, dcr *dorisv1.DorisCluster) error {
174+
return k8s.ApplyDorisCluster(ctx, r.Client, dcr)
175+
}
176+
156177
func (r *DorisClusterReconciler) clearNoEffectResources(context context.Context, cluster *dorisv1.DorisCluster) {
157178
//calculate the status of doris cluster by subresource's status.
158179
//clear resources when sub resource deleted. example: deployed fe,be,cn, when cn spec is deleted we should delete cn resources.

pkg/controller/sub_controller/disaggregated_cluster/disaggregated_fe/controller.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -374,7 +374,7 @@ func (dfc *DisaggregatedFEController) dropFEBySQLClient(ctx context.Context, k8s
374374
return nil
375375
}
376376
}
377-
observes := mysql.FindNeedDeletedFrontends(frontendMap, needRemovedAmount)
377+
observes := mysql.FindNeedDeletedObservers(frontendMap, needRemovedAmount)
378378
// drop node and return
379379
return masterDBClient.DropObserver(observes)
380380
}

pkg/controller/sub_controller/fe/prepare_modify.go

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,7 @@ func (fc *Controller) prepareStatefulsetApply(ctx context.Context, cluster *v1.D
4444
cluster.Spec.FeSpec.Replicas = resource.GetInt32Pointer(0)
4545
}
4646

47-
ele := cluster.GetElectionNumber()
48-
49-
if *(cluster.Spec.FeSpec.Replicas) < ele {
50-
fc.K8srecorder.Event(cluster, string(sc.EventWarning), string(sc.FESpecSetError), "The number of fe ElectionNumber is large than Replicas, Replicas has been corrected to the correct minimum value")
51-
klog.Errorf("prepareStatefulsetApply namespace=%s,name=%s ,The number of fe ElectionNumber(%d) is large than Replicas(%d)", cluster.Namespace, cluster.Name, ele, *(cluster.Spec.FeSpec.Replicas))
52-
cluster.Spec.FeSpec.Replicas = &ele
53-
}
47+
fc.safeScaleDown(cluster, &oldSt)
5448

5549
// wroa means: oldReplicas - newReplicas, the opposite of removedAmount, willRemovedOppositeAmount shortly as wroa
5650
wroa := *(cluster.Spec.FeSpec.Replicas) - *(oldSt.Spec.Replicas)
@@ -76,6 +70,32 @@ func (fc *Controller) prepareStatefulsetApply(ctx context.Context, cluster *v1.D
7670
return nil
7771
}
7872

73+
func (fc *Controller) safeScaleDown(cluster *v1.DorisCluster, ost *appv1.StatefulSet) {
74+
ele := cluster.GetElectionNumber()
75+
nr := *cluster.Spec.FeSpec.Replicas
76+
or := *ost.Spec.Replicas
77+
//if not scale down do nothing.
78+
if nr >= or {
79+
return
80+
}
81+
82+
//if scale down observers,(replicas > election number), be allowed.
83+
if nr >= ele {
84+
return
85+
}
86+
87+
if or >= ele {
88+
// if the scale down nodes have observer and follower roles, scale down observers.
89+
*cluster.Spec.FeSpec.Replicas = ele
90+
fc.K8srecorder.Event(cluster,string(sc.EventWarning), sc.FollowerScaleDownFailed,"Replicas is not allowed less than ElectionNumber, because of the bdbje (like raft) consistency protocol, if want do that please set ElectionNumber less than replicas. like that \"spec:{feSpec:{electionNumber}}\"")
91+
} else {
92+
//if the scale down nodes only have followers, not be allowed.
93+
*cluster.Spec.FeSpec.Replicas =or
94+
fc.K8srecorder.Event(cluster,string(sc.EventWarning), sc.FollowerScaleDownFailed,"Replicas less than electionNumber, so not allowed scale down. This is because the bdbje(like raft) consistency protocol, if want do that please set ElectionNumber less than replicas. like that \"spec:{feSpec:{electionNumber}}\"")
95+
}
96+
97+
return
98+
}
7999
// dropObserverBySqlClient handles doris'SQL(drop frontend) through the MySQL client when dealing with scale in observer
80100
// targetDCR is new dcr
81101
func (fc *Controller) dropObserverBySqlClient(ctx context.Context, k8sclient client.Client, targetDCR *v1.DorisCluster) error {
@@ -150,7 +170,7 @@ func (fc *Controller) dropObserverBySqlClient(ctx context.Context, k8sclient cli
150170
return nil
151171
}
152172
}
153-
observes := mysql.FindNeedDeletedFrontends(frontendMap, needRemovedAmount)
173+
observes := mysql.FindNeedDeletedObservers(frontendMap, needRemovedAmount)
154174
// drop node and return
155175
return masterDBClient.DropObserver(observes)
156176

0 commit comments

Comments
 (0)