@@ -49,13 +49,14 @@ import (
4949 "go.goms.io/fleet/pkg/utils/controller"
5050 "go.goms.io/fleet/pkg/utils/informer"
5151 "go.goms.io/fleet/pkg/utils/labels"
52+ "go.goms.io/fleet/pkg/utils/resource"
5253)
5354
5455var (
5556 // maxFailedResourcePlacementLimit indicates the max number of failed resource placements to include in the status.
5657 maxFailedResourcePlacementLimit = 100
5758
58- errResourceSnapshotNotFound = errors . New ("the master resource snapshot is not found" )
59+ errResourceSnapshotNotFound = fmt . Errorf ("the master resource snapshot is not found" )
5960)
6061
6162// Reconciler watches binding objects and generate work objects in the designated cluster namespace
@@ -135,18 +136,17 @@ func (r *Reconciler) Reconcile(ctx context.Context, req controllerruntime.Reques
135136
136137 workUpdated := false
137138 overrideSucceeded := false
138- // Reset the conditions and failed placements.
139- for i := condition .OverriddenCondition ; i < condition .TotalCondition ; i ++ {
140- resourceBinding .RemoveCondition (string (i .ResourceBindingConditionType ()))
141- }
142- resourceBinding .Status .FailedPlacements = nil
143139 // list all the corresponding works
144140 works , syncErr := r .listAllWorksAssociated (ctx , & resourceBinding )
145141 if syncErr == nil {
146142 // generate and apply the workUpdated works if we have all the works
147143 overrideSucceeded , workUpdated , syncErr = r .syncAllWork (ctx , & resourceBinding , works , cluster )
148144 }
149-
145+ // Reset the conditions and failed placements.
146+ for i := condition .OverriddenCondition ; i < condition .TotalCondition ; i ++ {
147+ resourceBinding .RemoveCondition (string (i .ResourceBindingConditionType ()))
148+ }
149+ resourceBinding .Status .FailedPlacements = nil
150150 if overrideSucceeded {
151151 overrideReason := condition .OverriddenSucceededReason
152152 overrideMessage := "Successfully applied the override rules on the resources"
@@ -375,10 +375,28 @@ func (r *Reconciler) listAllWorksAssociated(ctx context.Context, resourceBinding
375375func (r * Reconciler ) syncAllWork (ctx context.Context , resourceBinding * fleetv1beta1.ClusterResourceBinding , existingWorks map [string ]* fleetv1beta1.Work , cluster clusterv1beta1.MemberCluster ) (bool , bool , error ) {
376376 updateAny := atomic .NewBool (false )
377377 resourceBindingRef := klog .KObj (resourceBinding )
378+ // the hash256 function can can handle empty list https://go.dev/play/p/_4HW17fooXM
379+ resourceOverrideSnapshotHash , err := resource .HashOf (resourceBinding .Spec .ResourceOverrideSnapshots )
380+ if err != nil {
381+ return false , false , controller .NewUnexpectedBehaviorError (err )
382+ }
383+ clusterResourceOverrideSnapshotHash , err := resource .HashOf (resourceBinding .Spec .ClusterResourceOverrideSnapshots )
384+ if err != nil {
385+ return false , false , controller .NewUnexpectedBehaviorError (err )
386+ }
387+ // TODO: check all work synced first before fetching the snapshots after we put ParentResourceOverrideSnapshotHashAnnotation and ParentClusterResourceOverrideSnapshotHashAnnotation in all the work objects
378388
379389 // Gather all the resource resourceSnapshots
380390 resourceSnapshots , err := r .fetchAllResourceSnapshots (ctx , resourceBinding )
381391 if err != nil {
392+ if errors .Is (err , errResourceSnapshotNotFound ) {
393+ // the resourceIndex is deleted but the works might still be up to date with the binding.
394+ if areAllWorkSynced (existingWorks , resourceBinding , resourceOverrideSnapshotHash , clusterResourceOverrideSnapshotHash ) {
395+ klog .V (2 ).InfoS ("All the works are synced with the resourceBinding even if the resource snapshot index is removed" , "resourceBinding" , resourceBindingRef )
396+ return true , false , nil
397+ }
398+ return false , false , controller .NewUserError (err )
399+ }
382400 // TODO(RZ): handle errResourceNotFullyCreated error so we don't need to wait for all the snapshots to be created
383401 return false , false , err
384402 }
@@ -422,7 +440,7 @@ func (r *Reconciler) syncAllWork(ctx context.Context, resourceBinding *fleetv1be
422440 if uResource .GetObjectKind ().GroupVersionKind () == utils .ConfigMapGVK &&
423441 len (uResource .GetAnnotations ()[fleetv1beta1 .EnvelopeConfigMapAnnotation ]) != 0 {
424442 // get a work object for the enveloped configMap
425- work , err := r .getConfigMapEnvelopWorkObj (ctx , workNamePrefix , resourceBinding , snapshot , & uResource )
443+ work , err := r .getConfigMapEnvelopWorkObj (ctx , workNamePrefix , resourceBinding , snapshot , & uResource , resourceOverrideSnapshotHash , clusterResourceOverrideSnapshotHash )
426444 if err != nil {
427445 return true , false , err
428446 }
@@ -438,7 +456,7 @@ func (r *Reconciler) syncAllWork(ctx context.Context, resourceBinding *fleetv1be
438456 // generate a work object for the manifests even if there is nothing to place
439457 // to allow CRP to collect the status of the placement
440458 // TODO (RZ): revisit to see if we need this hack
441- work := generateSnapshotWorkObj (workNamePrefix , resourceBinding , snapshot , simpleManifests )
459+ work := generateSnapshotWorkObj (workNamePrefix , resourceBinding , snapshot , simpleManifests , resourceOverrideSnapshotHash , clusterResourceOverrideSnapshotHash )
442460 activeWork [work .Name ] = work
443461 newWork = append (newWork , work )
444462
@@ -485,6 +503,32 @@ func (r *Reconciler) syncAllWork(ctx context.Context, resourceBinding *fleetv1be
485503 return true , updateAny .Load (), nil
486504}
487505
506+ // areAllWorkSynced checks if all the works are synced with the resource binding.
507+ func areAllWorkSynced (existingWorks map [string ]* fleetv1beta1.Work , resourceBinding * fleetv1beta1.ClusterResourceBinding , _ , _ string ) bool {
508+ syncedCondition := resourceBinding .GetCondition (string (fleetv1beta1 .ResourceBindingWorkSynchronized ))
509+ if ! condition .IsConditionStatusTrue (syncedCondition , resourceBinding .Generation ) {
510+ // The binding has to be synced first before we can check the works
511+ return false
512+ }
513+ // TODO: check resourceOverrideSnapshotHash and clusterResourceOverrideSnapshotHash after all the work has the ParentResourceOverrideSnapshotHashAnnotation and ParentClusterResourceOverrideSnapshotHashAnnotation
514+ resourceSnapshotName := resourceBinding .Spec .ResourceSnapshotName
515+ for _ , work := range existingWorks {
516+ recordedName , exist := work .Annotations [fleetv1beta1 .ParentResourceSnapshotNameAnnotation ]
517+ if ! exist {
518+ // TODO: remove this block after all the work has the ParentResourceSnapshotNameAnnotation
519+ // the parent resource snapshot name is not recorded in the work, we need to construct it from the labels
520+ crpName := resourceBinding .Labels [fleetv1beta1 .CRPTrackingLabel ]
521+ index , _ := labels .ExtractResourceSnapshotIndexFromWork (work )
522+ recordedName = fmt .Sprintf (fleetv1beta1 .ResourceSnapshotNameFmt , crpName , index )
523+ }
524+ if recordedName != resourceSnapshotName {
525+ klog .V (2 ).InfoS ("The work is not synced with the resourceBinding" , "work" , klog .KObj (work ), "resourceBinding" , klog .KObj (resourceBinding ), "annotationExist" , exist , "recordedName" , recordedName , "resourceSnapshotName" , resourceSnapshotName )
526+ return false
527+ }
528+ }
529+ return true
530+ }
531+
488532// fetchAllResourceSnapshots gathers all the resource snapshots for the resource binding.
489533func (r * Reconciler ) fetchAllResourceSnapshots (ctx context.Context , resourceBinding * fleetv1beta1.ClusterResourceBinding ) (map [string ]* fleetv1beta1.ClusterResourceSnapshot , error ) {
490534 // fetch the master snapshot first
@@ -504,7 +548,7 @@ func (r *Reconciler) fetchAllResourceSnapshots(ctx context.Context, resourceBind
504548// getConfigMapEnvelopWorkObj first try to locate a work object for the corresponding envelopObj of type configMap.
505549// we create a new one if the work object doesn't exist. We do this to avoid repeatedly delete and create the same work object.
506550func (r * Reconciler ) getConfigMapEnvelopWorkObj (ctx context.Context , workNamePrefix string , resourceBinding * fleetv1beta1.ClusterResourceBinding ,
507- resourceSnapshot * fleetv1beta1.ClusterResourceSnapshot , envelopeObj * unstructured.Unstructured ) (* fleetv1beta1.Work , error ) {
551+ resourceSnapshot * fleetv1beta1.ClusterResourceSnapshot , envelopeObj * unstructured.Unstructured , resourceOverrideSnapshotHash , clusterResourceOverrideSnapshotHash string ) (* fleetv1beta1.Work , error ) {
508552 // we group all the resources in one configMap to one work
509553 manifest , err := extractResFromConfigMap (envelopeObj )
510554 if err != nil {
@@ -514,6 +558,7 @@ func (r *Reconciler) getConfigMapEnvelopWorkObj(ctx context.Context, workNamePre
514558 }
515559 klog .V (2 ).InfoS ("Successfully extract the enveloped resources from the configMap" , "numOfResources" , len (manifest ),
516560 "snapshot" , klog .KObj (resourceSnapshot ), "resourceBinding" , klog .KObj (resourceBinding ), "configMapWrapper" , klog .KObj (envelopeObj ))
561+
517562 // Try to see if we already have a work represent the same enveloped object for this CRP in the same cluster
518563 // The ParentResourceSnapshotIndexLabel can change between snapshots so we have to exclude that label in the match
519564 envelopWorkLabelMatcher := client.MatchingLabels {
@@ -544,6 +589,11 @@ func (r *Reconciler) getConfigMapEnvelopWorkObj(ctx context.Context, workNamePre
544589 fleetv1beta1 .EnvelopeNameLabel : envelopeObj .GetName (),
545590 fleetv1beta1 .EnvelopeNamespaceLabel : envelopeObj .GetNamespace (),
546591 },
592+ Annotations : map [string ]string {
593+ fleetv1beta1 .ParentResourceSnapshotNameAnnotation : resourceBinding .Spec .ResourceSnapshotName ,
594+ fleetv1beta1 .ParentResourceOverrideSnapshotHashAnnotation : resourceOverrideSnapshotHash ,
595+ fleetv1beta1 .ParentClusterResourceOverrideSnapshotHashAnnotation : clusterResourceOverrideSnapshotHash ,
596+ },
547597 OwnerReferences : []metav1.OwnerReference {
548598 {
549599 APIVersion : fleetv1beta1 .GroupVersion .String (),
@@ -567,16 +617,19 @@ func (r *Reconciler) getConfigMapEnvelopWorkObj(ctx context.Context, workNamePre
567617 klog .ErrorS (controller .NewUnexpectedBehaviorError (fmt .Errorf ("find %d work representing configMap" , len (workList .Items ))),
568618 "snapshot" , klog .KObj (resourceSnapshot ), "resourceBinding" , klog .KObj (resourceBinding ), "configMapWrapper" , klog .KObj (envelopeObj ))
569619 }
570- // we just pick the first one if there are more than one.
571620 work := workList .Items [0 ]
572621 work .Labels [fleetv1beta1 .ParentResourceSnapshotIndexLabel ] = resourceSnapshot .Labels [fleetv1beta1 .ResourceIndexLabel ]
622+ work .Annotations [fleetv1beta1 .ParentResourceSnapshotNameAnnotation ] = resourceBinding .Spec .ResourceSnapshotName
623+ work .Annotations [fleetv1beta1 .ParentResourceOverrideSnapshotHashAnnotation ] = resourceOverrideSnapshotHash
624+ work .Annotations [fleetv1beta1 .ParentClusterResourceOverrideSnapshotHashAnnotation ] = clusterResourceOverrideSnapshotHash
573625 work .Spec .Workload .Manifests = manifest
574626 work .Spec .ApplyStrategy = resourceBinding .Spec .ApplyStrategy
575627 return & work , nil
576628}
577629
578630// generateSnapshotWorkObj generates the work object for the corresponding snapshot
579- func generateSnapshotWorkObj (workName string , resourceBinding * fleetv1beta1.ClusterResourceBinding , resourceSnapshot * fleetv1beta1.ClusterResourceSnapshot , manifest []fleetv1beta1.Manifest ) * fleetv1beta1.Work {
631+ func generateSnapshotWorkObj (workName string , resourceBinding * fleetv1beta1.ClusterResourceBinding , resourceSnapshot * fleetv1beta1.ClusterResourceSnapshot ,
632+ manifest []fleetv1beta1.Manifest , resourceOverrideSnapshotHash , clusterResourceOverrideSnapshotHash string ) * fleetv1beta1.Work {
580633 return & fleetv1beta1.Work {
581634 ObjectMeta : metav1.ObjectMeta {
582635 Name : workName ,
@@ -586,6 +639,11 @@ func generateSnapshotWorkObj(workName string, resourceBinding *fleetv1beta1.Clus
586639 fleetv1beta1 .CRPTrackingLabel : resourceBinding .Labels [fleetv1beta1 .CRPTrackingLabel ],
587640 fleetv1beta1 .ParentResourceSnapshotIndexLabel : resourceSnapshot .Labels [fleetv1beta1 .ResourceIndexLabel ],
588641 },
642+ Annotations : map [string ]string {
643+ fleetv1beta1 .ParentResourceSnapshotNameAnnotation : resourceBinding .Spec .ResourceSnapshotName ,
644+ fleetv1beta1 .ParentResourceOverrideSnapshotHashAnnotation : resourceOverrideSnapshotHash ,
645+ fleetv1beta1 .ParentClusterResourceOverrideSnapshotHashAnnotation : clusterResourceOverrideSnapshotHash ,
646+ },
589647 OwnerReferences : []metav1.OwnerReference {
590648 {
591649 APIVersion : fleetv1beta1 .GroupVersion .String (),
@@ -619,6 +677,7 @@ func (r *Reconciler) upsertWork(ctx context.Context, newWork, existingWork *flee
619677 "resourceSnapshot" , resourceSnapshotObj , "work" , workObj )
620678 return true , nil
621679 }
680+ // TODO: remove the compare after we did the check on all work in the sync all
622681 // check if we need to update the existing work object
623682 workResourceIndex , err := labels .ExtractResourceSnapshotIndexFromWork (existingWork )
624683 if err != nil {
@@ -628,12 +687,20 @@ func (r *Reconciler) upsertWork(ctx context.Context, newWork, existingWork *flee
628687 // we already checked the label in fetchAllResourceSnapShots function so no need to check again
629688 resourceIndex , _ := labels .ExtractResourceIndexFromClusterResourceSnapshot (resourceSnapshot )
630689 if workResourceIndex == resourceIndex {
631- // no need to do anything if the work is generated from the same resource snapshot group since the resource snapshot is immutable.
632- klog .V (2 ).InfoS ("Work is already associated with the desired resourceSnapshot" , "resourceIndex" , resourceIndex , "work" , workObj , "resourceSnapshot" , resourceSnapshotObj )
633- return false , nil
690+ // no need to do anything if the work is generated from the same snapshots
691+ if existingWork .Annotations [fleetv1beta1 .ParentResourceOverrideSnapshotHashAnnotation ] == newWork .Annotations [fleetv1beta1 .ParentResourceOverrideSnapshotHashAnnotation ] &&
692+ existingWork .Annotations [fleetv1beta1 .ParentClusterResourceOverrideSnapshotHashAnnotation ] == newWork .Annotations [fleetv1beta1 .ParentClusterResourceOverrideSnapshotHashAnnotation ] {
693+ klog .V (2 ).InfoS ("Work is not associated with the desired override snapshots" , "existingROHash" , existingWork .Annotations [fleetv1beta1 .ParentResourceOverrideSnapshotHashAnnotation ],
694+ "existingCROHash" , existingWork .Annotations [fleetv1beta1 .ParentClusterResourceOverrideSnapshotHashAnnotation ], "work" , workObj )
695+ return false , nil
696+ }
697+ klog .V (2 ).InfoS ("Work is already associated with the desired resourceSnapshot but still not having the right override snapshots" , "resourceIndex" , resourceIndex , "work" , workObj , "resourceSnapshot" , resourceSnapshotObj )
634698 }
635- // need to update the existing work, only two possible changes:
636- existingWork .Labels [fleetv1beta1 .ParentResourceSnapshotIndexLabel ] = resourceSnapshot .Labels [fleetv1beta1 .ResourceIndexLabel ]
699+ // need to copy the new work to the existing work, only 5 possible changes:
700+ existingWork .Labels [fleetv1beta1 .ParentResourceSnapshotIndexLabel ] = newWork .Labels [fleetv1beta1 .ParentResourceSnapshotIndexLabel ]
701+ existingWork .Annotations [fleetv1beta1 .ParentResourceSnapshotNameAnnotation ] = newWork .Annotations [fleetv1beta1 .ParentResourceSnapshotNameAnnotation ]
702+ existingWork .Annotations [fleetv1beta1 .ParentResourceOverrideSnapshotHashAnnotation ] = newWork .Annotations [fleetv1beta1 .ParentResourceOverrideSnapshotHashAnnotation ]
703+ existingWork .Annotations [fleetv1beta1 .ParentClusterResourceOverrideSnapshotHashAnnotation ] = newWork .Annotations [fleetv1beta1 .ParentClusterResourceOverrideSnapshotHashAnnotation ]
637704 existingWork .Spec .Workload .Manifests = newWork .Spec .Workload .Manifests
638705 if err := r .Client .Update (ctx , existingWork ); err != nil {
639706 klog .ErrorS (err , "Failed to update the work associated with the resourceSnapshot" , "resourceSnapshot" , resourceSnapshotObj , "work" , workObj )
0 commit comments