@@ -187,7 +187,24 @@ func (dcgs *DisaggregatedComputeGroupsController) reconcileStatefulset(ctx conte
187187 }
188188
189189 if err := k8s .ApplyStatefulSet (ctx , dcgs .K8sclient , st , func (st , est * appv1.StatefulSet ) bool {
190- return resource .StatefulsetDeepEqualWithKey (st , est , dv1 .DisaggregatedSpecHashValueAnnotation , false )
190+ //store annotations "doris.disaggregated.cluster/generation={generation}" on statefulset
191+ //store annotations "doris.disaggregated.cluster/update-{uniqueid}=true/false" on DorisDisaggregatedCluster
192+ equal := resource .StatefulsetDeepEqualWithKey (st , est , dv1 .DisaggregatedSpecHashValueAnnotation , false )
193+ if ! equal {
194+ if len (st .Annotations ) == 0 {
195+ st .Annotations = map [string ]string {}
196+ }
197+ st_annos := (resource .Annotations )(st .Annotations )
198+ st_annos .Add (dv1 .UpdateStatefulsetGeneration , strconv .FormatInt (cluster .Generation , 10 ))
199+ if len (cluster .Annotations ) == 0 {
200+ cluster .Annotations = map [string ]string {}
201+ }
202+ ddc_annos := (resource .Annotations )(cluster .Annotations )
203+ msUniqueIdKey := strings .ToLower (fmt .Sprintf (dv1 .UpdateStatefulsetName , cluster .GetCGStatefulsetName (cg )))
204+ ddc_annos .Add (msUniqueIdKey , "true" )
205+ }
206+ return equal
207+
191208 }); err != nil {
192209 klog .Errorf ("disaggregatedComputeGroupsController reconcileStatefulset apply statefulset namespace=%s name=%s failed, err=%s" , st .Namespace , st .Name , err .Error ())
193210 return & sc.Event {Type : sc .EventWarning , Reason : sc .CGApplyResourceFailed , Message : err .Error ()}, err
@@ -546,12 +563,34 @@ func (dcgs *DisaggregatedComputeGroupsController) UpdateComponentStatus(obj clie
546563}
547564
548565func (dcgs * DisaggregatedComputeGroupsController ) updateCGStatus (ddc * dv1.DorisDisaggregatedCluster , cgs * dv1.ComputeGroupStatus ) error {
566+ stfName := cgs .StatefulsetName
567+ sts , err := k8s .GetStatefulSet (context .Background (), dcgs .K8sclient , ddc .Namespace , stfName )
568+ if err != nil {
569+ klog .Errorf ("DisaggregatedComputeGroupsController updateCGStatus get statefulset %s failed, err=%s" , stfName , err .Error ())
570+ return err
571+ }
572+
573+ //check statefulset updated or not, if this reconcile update the sts, should exclude the circumstance that get old sts and the pods not updated.
574+ updateStatefulsetKey := strings .ToLower (fmt .Sprintf (dv1 .UpdateStatefulsetName , stfName ))
575+ if _ , updated := ddc .Annotations [updateStatefulsetKey ]; updated {
576+ generation := dcgs .DisaggregatedSubDefaultController .ReturnStatefulsetUpdatedGeneration (sts , updateStatefulsetKey )
577+ //if this reconcile not update statefulset will not check the generation equals or not.
578+ if ddc .Generation != generation {
579+ return errors .New ("waiting statefulset upd ated" )
580+ }
581+ }
582+
549583 selector := dcgs .newCGPodsSelector (ddc .Name , cgs .UniqueId )
550584 var podList corev1.PodList
551585 if err := dcgs .K8sclient .List (context .Background (), & podList , client .InNamespace (ddc .Namespace ), client .MatchingLabels (selector )); err != nil {
552586 return err
553587 }
554588
589+
590+ updateRevision := sts .Status .UpdateRevision
591+ //check all pods controlled by new statefulset.
592+ allUpdated := dcgs .DisaggregatedSubDefaultController .StatefulsetControlledPodsAllUseNewUpdateRevision (updateRevision , podList .Items )
593+
555594 var availableReplicas int32
556595 var creatingReplicas int32
557596 var failedReplicas int32
@@ -567,7 +606,7 @@ func (dcgs *DisaggregatedComputeGroupsController) updateCGStatus(ddc *dv1.DorisD
567606 }
568607
569608 cgs .AvailableReplicas = availableReplicas
570- if availableReplicas == cgs .Replicas {
609+ if allUpdated && availableReplicas == cgs .Replicas {
571610 cgs .Phase = dv1 .Ready
572611 }
573612 return nil
0 commit comments