@@ -259,79 +259,220 @@ func (dcgs *DisaggregatedComputeGroupsController) validateRegex(cgs []dv1.Comput
259259func (dcgs * DisaggregatedComputeGroupsController ) ClearResources (ctx context.Context , obj client.Object ) (bool , error ) {
260260 ddc := obj .(* dv1.DorisDisaggregatedCluster )
261261
262- if ! dcgs .feAvailable (ddc ) {
263- return false , nil
264- }
265-
266- var clearCGs []dv1.ComputeGroupStatus
267262 var eCGs []dv1.ComputeGroupStatus
268-
269263 for i , cgs := range ddc .Status .ComputeGroupStatuses {
270264 for _ , cg := range ddc .Spec .ComputeGroups {
271265 if cgs .UniqueId == cg .UniqueId {
272266 eCGs = append (eCGs , ddc .Status .ComputeGroupStatuses [i ])
273- goto NoNeedAppend
267+ break
274268 }
275269 }
276-
277- clearCGs = append (clearCGs , ddc .Status .ComputeGroupStatuses [i ])
278- // no need clear should not append.
279- NoNeedAppend:
280270 }
281271
282- sqlClient , err := dcgs .getMasterSqlClient (ctx , dcgs .K8sclient , ddc )
272+ //list the svcs and stss owner reference to dorisDisaggregatedCluster.
273+ cls := dcgs .GetCG2LayerCommonSchedulerLabels (ddc .Name )
274+ svcs , err := k8s .ListServicesInNamespace (ctx , dcgs .K8sclient , ddc .Namespace , cls )
283275 if err != nil {
284- klog .Errorf ("computeGroupSync ClearResources dropCGBySQLClient getMasterSqlClient failed: %s" , err .Error ())
285- dcgs .K8srecorder .Event (ddc , string (sc .EventWarning ), string (sc .CGSqlExecFailed ), "computeGroupSync dropCGBySQLClient failed: " + err .Error ())
276+ klog .Errorf ("DisaggregatedComputeGroupsController ListServicesInNamespace failed, dorisdisaggregatedcluster name=%s" , ddc .Name )
277+ return false , err
278+ }
279+ stss , err := k8s .ListStatefulsetInNamespace (ctx , dcgs .K8sclient , ddc .Namespace , cls )
280+ if err != nil {
281+ klog .Errorf ("DisaggregatedComputeGroupsController ListStatefulsetInNamespace failed, dorisdisaggregatedcluster name=%s" , ddc .Name )
286282 return false , err
287283 }
288- defer sqlClient .Close ()
289284
290- for i := range clearCGs {
291- cgs := clearCGs [i ]
292- cleared := true
293- if err := k8s .DeleteStatefulset (ctx , dcgs .K8sclient , ddc .Namespace , cgs .StatefulsetName ); err != nil {
294- cleared = false
295- klog .Errorf ("disaggregatedComputeGroupsController delete statefulset namespace %s name %s failed, err=%s" , ddc .Namespace , cgs .StatefulsetName , err .Error ())
296- dcgs .K8srecorder .Event (ddc , string (sc .EventWarning ), string (sc .CGStatefulsetDeleteFailed ), err .Error ())
297- }
285+ //clear unused service and statefulset.
286+ delSvcNames := dcgs .findUnusedSvcs (svcs , ddc )
287+ delStsNames , delUniqueIds := dcgs .findUnusedStssAndUniqueIds (stss , ddc )
288+
289+ if err = dcgs .clearCGInDorisMeta (ctx , delUniqueIds , ddc ); err != nil {
290+ return false , err
291+ }
292+ if err = dcgs .clearSvcs (ctx , delSvcNames , ddc ); err != nil {
293+ return false , err
294+ }
295+ if err = dcgs .clearStatefulsets (ctx , delStsNames , ddc ); err != nil {
296+ return false , err
297+ }
298298
299- if err := k8s .DeleteService (ctx , dcgs .K8sclient , ddc .Namespace , cgs .ServiceName ); err != nil {
300- cleared = false
301- klog .Errorf ("disaggregatedComputeGroupsController delete service namespace %s name %s failed, err=%s" , ddc .Namespace , cgs .ServiceName , err .Error ())
302- dcgs .K8srecorder .Event (ddc , string (sc .EventWarning ), string (sc .CGServiceDeleteFailed ), err .Error ())
299+ //clear unused pvc
300+ for i := range eCGs {
301+ err = dcgs .ClearStatefulsetUnusedPVCs (ctx , ddc , eCGs [i ])
302+ if err != nil {
303+ klog .Errorf ("disaggregatedComputeGroupsController ClearStatefulsetUnusedPVCs clear ComputeGroup reduced replicas PVC failed, namespace=%s, ddc name=%s, uniqueId=%s err=%s" , ddc .Namespace , ddc .Name , eCGs [i ].UniqueId , err .Error ())
303304 }
304- if ! cleared {
305- eCGs = append (eCGs , clearCGs [i ])
306- continue
305+ }
306+
307+ for _ , uniqueId := range delUniqueIds {
308+ //new fake computeGroup status for clear all pvcs owner reference to deleted compute group.
309+ fakeCgs := dv1.ComputeGroupStatus {
310+ UniqueId : uniqueId ,
307311 }
308- // drop compute group
309- cgName := strings .ReplaceAll (cgs .UniqueId , "_" , "-" )
310- cgKeepAmount := int32 (0 )
311- err = dcgs .scaledOutBENodesByDrop (sqlClient , cgName , cgKeepAmount )
312+ err = dcgs .ClearStatefulsetUnusedPVCs (ctx , ddc , fakeCgs )
312313 if err != nil {
313- klog .Errorf ("computeGroupSync ClearResources dropCGBySQLClient failed: %s" , err .Error ())
314- dcgs .K8srecorder .Event (ddc , string (sc .EventWarning ), string (sc .CGSqlExecFailed ), "computeGroupSync dropCGBySQLClient failed: " + err .Error ())
314+ klog .Errorf ("disaggregatedComputeGroupsController ClearStatefulsetUnusedPVCs clear deleted compute group failed, namespace=%s, ddc name=%s, uniqueId=%s err=%s" , ddc .Namespace , ddc .Name , uniqueId , err .Error ())
315315 }
316+ }
317+
318+ ddc .Status .ComputeGroupStatuses = eCGs
319+ return true , nil
316320
321+ //TODO: next pr remove the code
322+ //sqlClient, err := dcgs.getMasterSqlClient(ctx, dcgs.K8sclient, ddc)
323+ //if err != nil {
324+ // klog.Errorf("computeGroupSync ClearResources dropCGBySQLClient getMasterSqlClient failed: %s", err.Error())
325+ // dcgs.K8srecorder.Event(ddc, string(sc.EventWarning), string(sc.CGSqlExecFailed), "computeGroupSync dropCGBySQLClient failed: "+err.Error())
326+ // return false, err
327+ //}
328+ //defer sqlClient.Close()
329+ //
330+ //for i := range clearCGs {
331+ // cgs := clearCGs[i]
332+ // cleared := true
333+ // if err := k8s.DeleteStatefulset(ctx, dcgs.K8sclient, ddc.Namespace, cgs.StatefulsetName); err != nil {
334+ // cleared = false
335+ // klog.Errorf("disaggregatedComputeGroupsController delete statefulset namespace %s name %s failed, err=%s", ddc.Namespace, cgs.StatefulsetName, err.Error())
336+ // dcgs.K8srecorder.Event(ddc, string(sc.EventWarning), string(sc.CGStatefulsetDeleteFailed), err.Error())
337+ // }
338+ //
339+ // if err := k8s.DeleteService(ctx, dcgs.K8sclient, ddc.Namespace, cgs.ServiceName); err != nil {
340+ // cleared = false
341+ // klog.Errorf("disaggregatedComputeGroupsController delete service namespace %s name %s failed, err=%s", ddc.Namespace, cgs.ServiceName, err.Error())
342+ // dcgs.K8srecorder.Event(ddc, string(sc.EventWarning), string(sc.CGServiceDeleteFailed), err.Error())
343+ // }
344+ // if !cleared {
345+ // eCGs = append(eCGs, clearCGs[i])
346+ // continue
347+ // }
348+ // // drop compute group
349+ // cgName := strings.ReplaceAll(cgs.UniqueId, "_", "-")
350+ // cgKeepAmount := int32(0)
351+ // err = dcgs.scaledOutBENodesByDrop(sqlClient, cgName, cgKeepAmount)
352+ // if err != nil {
353+ // klog.Errorf("computeGroupSync ClearResources dropCGBySQLClient failed: %s", err.Error())
354+ // dcgs.K8srecorder.Event(ddc, string(sc.EventWarning), string(sc.CGSqlExecFailed), "computeGroupSync dropCGBySQLClient failed: "+err.Error())
355+ // }
356+ //
357+ //}
358+ //
359+ //for i := range eCGs {
360+ // err := dcgs.ClearStatefulsetUnusedPVCs(ctx, ddc, eCGs[i])
361+ // if err != nil {
362+ // klog.Errorf("disaggregatedComputeGroupsController ClearStatefulsetUnusedPVCs clear whole ComputeGroup PVC failed, err=%s", err.Error())
363+ // }
364+ //}
365+ //for i := range clearCGs {
366+ // err := dcgs.ClearStatefulsetUnusedPVCs(ctx, ddc, clearCGs[i])
367+ // if err != nil {
368+ // klog.Errorf("disaggregatedComputeGroupsController ClearStatefulsetUnusedPVCs clear part ComputeGroup PVC failed, err=%s", err.Error())
369+ // }
370+ //}
371+ //
372+ //ddc.Status.ComputeGroupStatuses = eCGs
373+ //
374+ //return true, nil
375+ }
376+
377+ func (dcgs * DisaggregatedComputeGroupsController ) clearStatefulsets (ctx context.Context , stsNames []string , ddc * dv1.DorisDisaggregatedCluster ) error {
378+ for _ , name := range stsNames {
379+ if err := k8s .DeleteStatefulset (ctx , dcgs .K8sclient , ddc .Namespace , name ); err != nil {
380+ klog .Errorf ("DisaggregatedComputeGroupsController clear statefulset failed, namespace=%s, name =%s, err=%s" , ddc .Namespace , name , err .Error ())
381+ return err
382+ }
317383 }
384+ return nil
385+ }
318386
319- for i := range eCGs {
320- err := dcgs .ClearStatefulsetUnusedPVCs (ctx , ddc , eCGs [i ])
321- if err != nil {
322- klog .Errorf ("disaggregatedComputeGroupsController ClearStatefulsetUnusedPVCs clear whole ComputeGroup PVC failed, err=%s" , err .Error ())
387+ func (dcgs * DisaggregatedComputeGroupsController ) clearSvcs (ctx context.Context , svcNames []string , ddc * dv1.DorisDisaggregatedCluster ) error {
388+ for _ , name := range svcNames {
389+ if err := k8s .DeleteService (ctx , dcgs .K8sclient , ddc .Namespace , name ); err != nil {
390+ klog .Errorf ("DisaggregatedComputeGroupsController clear service failed, namespace=%s, name =%s, err=%s" , ddc .Namespace , name , err .Error ())
391+ return err
323392 }
324393 }
325- for i := range clearCGs {
326- err := dcgs .ClearStatefulsetUnusedPVCs (ctx , ddc , clearCGs [i ])
394+ return nil
395+ }
396+
397+ func (dcgs * DisaggregatedComputeGroupsController ) clearCGInDorisMeta (ctx context.Context , cgNames []string , ddc * dv1.DorisDisaggregatedCluster ) error {
398+ if len (cgNames ) == 0 {
399+ return nil
400+ }
401+
402+ sqlClient , err := dcgs .getMasterSqlClient (ctx , dcgs .K8sclient , ddc )
403+ if err != nil {
404+ klog .Errorf ("DisaggregatedComputeGroupsController clearCGInDorisMeta dropCGBySQLClient getMasterSqlClient failed: %s" , err .Error ())
405+ dcgs .K8srecorder .Event (ddc , string (sc .EventWarning ), string (sc .CGSqlExecFailed ), "computeGroupSync dropCGBySQLClient failed: " + err .Error ())
406+ return err
407+ }
408+ defer sqlClient .Close ()
409+
410+ for _ , name := range cgNames {
411+ //clear cg, the keepAmount = 0
412+ //confirm used the right cgName, as the cgName get from the uniqueid that '-' replaced by '_'.
413+ cgName := strings .ReplaceAll (name , "-" , "_" )
414+ err = dcgs .scaledOutBENodesByDrop (sqlClient , cgName , 0 )
327415 if err != nil {
328- klog .Errorf ("disaggregatedComputeGroupsController ClearStatefulsetUnusedPVCs clear part ComputeGroup PVC failed, err=%s" , err .Error ())
416+ klog .Errorf ("DisaggregatedComputeGroupsController clearCGInDorisMeta dropCGBySQLClient failed: %s" , err .Error ())
417+ dcgs .K8srecorder .Event (ddc , string (sc .EventWarning ), string (sc .CGSqlExecFailed ), "computeGroupSync dropCGBySQLClient failed: " + err .Error ())
418+ return err
329419 }
330420 }
331421
332- ddc .Status .ComputeGroupStatuses = eCGs
422+ return nil
423+ }
333424
334- return true , nil
425+ func (dcgs * DisaggregatedComputeGroupsController ) findUnusedSvcs (svcs []corev1.Service , ddc * dv1.DorisDisaggregatedCluster ) []string {
426+ var unusedSvcNames []string
427+ for i , _ := range svcs {
428+ own := ownerReference2ddc (& svcs [i ], ddc )
429+ if ! own {
430+ //not owner reference to ddc, should skip the service.
431+ continue
432+ }
433+
434+ svcUniqueId := getUniqueIdFromClientObject (& svcs [i ])
435+ exist := false
436+ for j := 0 ; j < len (ddc .Spec .ComputeGroups ); j ++ {
437+ if ddc .Spec .ComputeGroups [j ].UniqueId == svcUniqueId {
438+ exist = true
439+ break
440+ }
441+ }
442+
443+ if ! exist {
444+ unusedSvcNames = append (unusedSvcNames , svcs [i ].Name )
445+ }
446+ }
447+
448+ return unusedSvcNames
449+ }
450+
451+ func (dcgs * DisaggregatedComputeGroupsController ) findUnusedStssAndUniqueIds (stss []appv1.StatefulSet , ddc * dv1.DorisDisaggregatedCluster ) ([]string /*sts*/ , []string /*cgNames*/ ) {
452+ var unusedStsNames []string
453+ var unusedUniqueIds []string
454+ for i , _ := range stss {
455+ own := ownerReference2ddc (& stss [i ], ddc )
456+ if ! own {
457+ //not owner reference tto ddc should skip the statefulset.
458+ continue
459+ }
460+
461+ stsUniqueId := getUniqueIdFromClientObject (& stss [i ])
462+ exist := false
463+ for j := 0 ; j < len (ddc .Spec .ComputeGroups ); j ++ {
464+ if ddc .Spec .ComputeGroups [j ].UniqueId == stsUniqueId {
465+ exist = true
466+ break
467+ }
468+ }
469+ if ! exist {
470+ unusedStsNames = append (unusedStsNames , stss [i ].Name )
471+ unusedUniqueIds = append (unusedUniqueIds , stsUniqueId )
472+ }
473+ }
474+
475+ return unusedStsNames , unusedUniqueIds
335476}
336477
337478// ClearStatefulsetUnusedPVCs
@@ -365,8 +506,17 @@ func (dcgs *DisaggregatedComputeGroupsController) ClearStatefulsetUnusedPVCs(ctx
365506 }
366507
367508 if cg != nil {
368- replicas := int (* cg .Replicas )
509+ //we should use statefulset replicas for avoiding the phase=scaleDown, when phase `scaleDown` cg' replicas is less than statefuslet.
510+ replicas := 0
369511 stsName := ddc .GetCGStatefulsetName (cg )
512+ sts , err := k8s .GetStatefulSet (ctx , dcgs .K8sclient , ddc .Namespace , stsName )
513+ if err != nil {
514+ klog .Errorf ("DisaggregatedComputeGroupsController ClearStatefulsetUnusedPVCs get statefulset namespace=%s, name=%s, failed, err=%s" , ddc .Namespace , stsName , err .Error ())
515+ //waiting next reconciling.
516+ return nil
517+ }
518+ replicas = int (* sts .Spec .Replicas )
519+
370520 cvs := dcgs .GetConfigValuesFromConfigMaps (ddc .Namespace , resource .BE_RESOLVEKEY , cg .CommonSpec .ConfigMaps )
371521 paths , _ := dcgs .getCacheMaxSizeAndPaths (cvs )
372522
0 commit comments