diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml index 207b0b0da..4e40a0f46 100644 --- a/.github/workflows/codespell.yml +++ b/.github/workflows/codespell.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Harden Runner - uses: step-security/harden-runner@ec9f2d5744a09debf3a187a3f4f675c53b671911 # v2.13.0 + uses: step-security/harden-runner@f4a75cfd619ee5ce8d5b864b0d183aff3c69b55a # v2.13.1 with: egress-policy: audit diff --git a/cmd/memberagent/main.go b/cmd/memberagent/main.go index b927fc216..9f5f04aea 100644 --- a/cmd/memberagent/main.go +++ b/cmd/memberagent/main.go @@ -102,6 +102,9 @@ var ( workApplierRequeueRateLimiterExponentialBaseForFastBackoff = flag.Float64("work-applier-requeue-rate-limiter-exponential-base-for-fast-backoff", 1.5, "If set, the work applier will start to back off fast at this factor after it completes the slow backoff stage, until it reaches the fast backoff delay cap. Its value should be larger than the base value for the slow backoff stage.") workApplierRequeueRateLimiterMaxFastBackoffDelaySeconds = flag.Float64("work-applier-requeue-rate-limiter-max-fast-backoff-delay-seconds", 900, "If set, the work applier will not back off longer than this value in seconds when it is in the fast backoff stage.") workApplierRequeueRateLimiterSkipToFastBackoffForAvailableOrDiffReportedWorkObjs = flag.Bool("work-applier-requeue-rate-limiter-skip-to-fast-backoff-for-available-or-diff-reported-work-objs", true, "If set, the rate limiter will skip the slow backoff stage and start fast backoff immediately for work objects that are available or have diff reported.") + // Azure property provider feature gates. + isAzProviderCostPropertiesEnabled = flag.Bool("use-cost-properties-in-azure-provider", true, "If set, the Azure property provider will expose cost properties in the member cluster.") + isAzProviderAvailableResPropertiesEnabled = flag.Bool("use-available-res-properties-in-azure-provider", true, "If set, the Azure property provider will expose available resources properties in the member cluster.") ) func init() { @@ -461,7 +464,7 @@ func Start(ctx context.Context, hubCfg, memberConfig *rest.Config, hubOpts, memb // the specific instance wins the leader election. klog.V(1).InfoS("Property Provider is azure, loading cloud config", "cloudConfigFile", *cloudConfigFile) // TODO (britaniar): load cloud config for Azure property provider. - pp = azure.New(region) + pp = azure.New(region, *isAzProviderCostPropertiesEnabled, *isAzProviderAvailableResPropertiesEnabled) default: // Fall back to not using any property provider if the provided type is none or // not recognizable. diff --git a/pkg/controllers/placement/resource_selector.go b/pkg/controllers/placement/resource_selector.go index d5732ec8d..230f11838 100644 --- a/pkg/controllers/placement/resource_selector.go +++ b/pkg/controllers/placement/resource_selector.go @@ -41,44 +41,52 @@ import ( ) var ( - // ApplyOrder is the order in which resources should be applied. - // Those occurring earlier in the list get applied before those occurring later in the list. - // Source: https://github.com/helm/helm/blob/31e22b9866af91e1a0ea2ad381798f6c5eec7f4f/pkg/release/util/kind_sorter.go#L31. - applyOrder = []string{ - "PriorityClass", - "Namespace", - "NetworkPolicy", - "ResourceQuota", - "LimitRange", - "PodDisruptionBudget", - "ServiceAccount", - "Secret", - "ConfigMap", - "StorageClass", - "PersistentVolume", - "PersistentVolumeClaim", - "CustomResourceDefinition", - "ClusterRole", - "ClusterRoleBinding", - "Role", - "RoleBinding", - "Service", - "DaemonSet", - "Pod", - "ReplicationController", - "ReplicaSet", - "Deployment", - "HorizontalPodAutoscaler", - "StatefulSet", - "Job", - "CronJob", - "IngressClass", - "Ingress", - "APIService", - "MutatingWebhookConfiguration", - "ValidatingWebhookConfiguration", - } - applyOrderMap = buildApplyOrderMap() + // resourceSortOrder is the order in which resources are sorted when KubeFleet + // organizes the resources in a resource snapshot. + // + // Note (chenyu1): the sort order here does not affect the order in which resources + // are applied on a selected member cluster (the work applier will handle the resources + // in batch with its own grouping logic). KubeFleet sorts resources here solely + // for consistency (deterministic processing) reasons (i.e., if the set of the + // resources remain the same, no new snapshots are generated). + // + // Important (chenyu1): changing the sort order here may induce side effects in + // existing KubeFleet deployments, as a new snapshot might be prepared and rolled out. + // Do not update the sort order unless absolutely necessary. + resourceSortOrder = map[string]int{ + "PriorityClass": 0, + "Namespace": 1, + "NetworkPolicy": 2, + "ResourceQuota": 3, + "LimitRange": 4, + "PodDisruptionBudget": 5, + "ServiceAccount": 6, + "Secret": 7, + "ConfigMap": 8, + "StorageClass": 9, + "PersistentVolume": 10, + "PersistentVolumeClaim": 11, + "CustomResourceDefinition": 12, + "ClusterRole": 13, + "ClusterRoleBinding": 14, + "Role": 15, + "RoleBinding": 16, + "Service": 17, + "DaemonSet": 18, + "Pod": 19, + "ReplicationController": 20, + "ReplicaSet": 21, + "Deployment": 22, + "HorizontalPodAutoscaler": 23, + "StatefulSet": 24, + "Job": 25, + "CronJob": 26, + "IngressClass": 27, + "Ingress": 28, + "APIService": 29, + "MutatingWebhookConfiguration": 30, + "ValidatingWebhookConfiguration": 31, + } ) // selectResources selects the resources according to the placement resourceSelectors. @@ -185,8 +193,8 @@ func sortResources(resources []*unstructured.Unstructured) { k1 := obj1.GetObjectKind().GroupVersionKind().Kind k2 := obj2.GetObjectKind().GroupVersionKind().Kind - first, aok := applyOrderMap[k1] - second, bok := applyOrderMap[k2] + first, aok := resourceSortOrder[k1] + second, bok := resourceSortOrder[k2] switch { // if both kinds are unknown. case !aok && !bok: @@ -222,14 +230,6 @@ func lessByGVK(obj1, obj2 *unstructured.Unstructured, ignoreKind bool) bool { return comp < 0 } -func buildApplyOrderMap() map[string]int { - ordering := make(map[string]int, len(applyOrder)) - for v, k := range applyOrder { - ordering[k] = v - } - return ordering -} - // fetchResources retrieves the objects based on the selector. func (r *Reconciler) fetchResources(selector fleetv1beta1.ResourceSelectorTerm, placementKey types.NamespacedName) ([]runtime.Object, error) { klog.V(2).InfoS("Start to fetch resources by the selector", "selector", selector, "placement", placementKey) diff --git a/pkg/controllers/workgenerator/envelope.go b/pkg/controllers/workgenerator/envelope.go index 0d2b3049c..dc3d258a9 100644 --- a/pkg/controllers/workgenerator/envelope.go +++ b/pkg/controllers/workgenerator/envelope.go @@ -147,6 +147,16 @@ func extractManifestsFromEnvelopeCR(envelopeReader fleetv1beta1.EnvelopeReader) } // Do a stable sort of the extracted manifests to ensure consistent, deterministic ordering. + // + // Note (chenyu1): the sort order here does not affect the order in which resources + // are applied on a selected member cluster (the work applier will handle the resources + // in batch with its own grouping logic). KubeFleet sorts resources here solely + // for consistency (deterministic processing) reasons (i.e., if the set of the + // resources remain the same, work objects will not be updated). + // + // Important (chenyu1): changing the sort order here may induce side effects in + // existing KubeFleet deployments, as it might trigger update ops on work objects. + // Do not update the sort order unless absolutely necessary. sort.Slice(manifests, func(i, j int) bool { obj1 := manifests[i].Raw obj2 := manifests[j].Raw diff --git a/pkg/propertyprovider/azure/controllers/node.go b/pkg/propertyprovider/azure/controllers/node.go index 49f34a4f3..9c8bd0ea5 100644 --- a/pkg/propertyprovider/azure/controllers/node.go +++ b/pkg/propertyprovider/azure/controllers/node.go @@ -89,9 +89,10 @@ func (r *NodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl. return ctrl.Result{}, nil } -func (r *NodeReconciler) SetupWithManager(mgr ctrl.Manager) error { +func (r *NodeReconciler) SetupWithManager(mgr ctrl.Manager, controllerName string) error { // Reconcile any node changes (create, update, delete). return ctrl.NewControllerManagedBy(mgr). + Named(controllerName). For(&corev1.Node{}). Complete(r) } diff --git a/pkg/propertyprovider/azure/controllers/pod.go b/pkg/propertyprovider/azure/controllers/pod.go index a5b02d3e9..fb2c876df 100644 --- a/pkg/propertyprovider/azure/controllers/pod.go +++ b/pkg/propertyprovider/azure/controllers/pod.go @@ -108,9 +108,10 @@ func (p *PodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R return ctrl.Result{}, nil } -func (p *PodReconciler) SetupWithManager(mgr ctrl.Manager) error { +func (p *PodReconciler) SetupWithManager(mgr ctrl.Manager, controllerName string) error { // Reconcile any pod changes (create, update, delete). return ctrl.NewControllerManagedBy(mgr). + Named(controllerName). For(&corev1.Pod{}). Complete(p) } diff --git a/pkg/propertyprovider/azure/provider.go b/pkg/propertyprovider/azure/provider.go index 9ae1993eb..d13f289e6 100644 --- a/pkg/propertyprovider/azure/provider.go +++ b/pkg/propertyprovider/azure/provider.go @@ -39,6 +39,7 @@ import ( "go.goms.io/fleet/pkg/propertyprovider" "go.goms.io/fleet/pkg/propertyprovider/azure/controllers" "go.goms.io/fleet/pkg/propertyprovider/azure/trackers" + "go.goms.io/fleet/pkg/utils/controller" ) const ( @@ -78,9 +79,17 @@ type PropertyProvider struct { // be either specified by the user or auto-discovered from the AKS cluster. region *string + // The feature flags. + isCostCollectionEnabled bool + isAvailableResourcesCollectionEnabled bool + // The controller manager in use by the Azure property provider; this field is mostly reserved for // testing purposes. mgr ctrl.Manager + // The names in use by the controllers managed by the property provider; these fields are exposed + // to avoid name conflicts, though at this moment are mostly reserved for testing purposes. + nodeControllerName string + podControllerName string } // Verify that the Azure property provider implements the MetricProvider interface at compile time. @@ -176,10 +185,14 @@ func (p *PropertyProvider) Start(ctx context.Context, config *rest.Config) error return err } - klog.V(2).Info("Setting up the node tracker") - if p.nodeTracker == nil { + switch { + case p.nodeTracker != nil: + // A node tracker has been explicitly set; use it. + klog.V(2).Info("A node tracker has been explicitly set") + case p.isCostCollectionEnabled: + // No node tracker has been set, and cost collection is enabled; set up a node tracker + // using the default pricing client (the AKS Karpenter pricing client). klog.V(2).Info("Building a node tracker using the default AKS Karpenter pricing client") - if p.region == nil || len(*p.region) == 0 { klog.V(2).Info("Auto-discover region as none has been specified") // Note that an API reader is passed here for the purpose of auto-discovering region @@ -198,30 +211,47 @@ func (p *PropertyProvider) Start(ctx context.Context, config *rest.Config) error klog.V(2).Infof("Starting with the region set to %s", *p.region) pp := trackers.NewAKSKarpenterPricingClient(ctx, *p.region) p.nodeTracker = trackers.NewNodeTracker(pp) + default: + // No node tracker has been set, and cost collection is disabled; set up a node tracker + // with no pricing provider. + klog.V(2).Info("Building a node tracker with no pricing provider") + p.nodeTracker = trackers.NewNodeTracker(nil) } - klog.V(2).Info("Setting up the pod tracker") - p.podTracker = trackers.NewPodTracker() - - // Set up the node and pod reconcilers. - klog.V(2).Info("Starting the node reconciler") + // Set up the node reconciler. + klog.V(2).Info("Setting up the node reconciler") nodeReconciler := &controllers.NodeReconciler{ NT: p.nodeTracker, Client: mgr.GetClient(), } - if err := nodeReconciler.SetupWithManager(mgr); err != nil { + if err := nodeReconciler.SetupWithManager(mgr, p.nodeControllerName); err != nil { klog.ErrorS(err, "Failed to start the node reconciler in the Azure property provider") return err } - klog.V(2).Info("Starting the pod reconciler") - podReconciler := &controllers.PodReconciler{ - PT: p.podTracker, - Client: mgr.GetClient(), - } - if err := podReconciler.SetupWithManager(mgr); err != nil { - klog.ErrorS(err, "Failed to start the pod reconciler in the Azure property provider") - return err + switch { + case p.podTracker != nil: + // A pod tracker has been explicitly set; use it. + klog.V(2).Info("A pod tracker has been explicitly set") + case !p.isAvailableResourcesCollectionEnabled: + // Available resource collection is disabled; there is no need to set up a pod tracker, and + // as a result there is no need to watch for pods either. + klog.V(2).Info("Skipping pod tracker setup as available resources collection is disabled") + default: + // No pod tracker has been set, and available resources collection is enabled; set up + // a pod tracker. + klog.V(2).Info("Building a pod tracker") + p.podTracker = trackers.NewPodTracker() + + klog.V(2).Info("Starting the pod reconciler") + podReconciler := &controllers.PodReconciler{ + PT: p.podTracker, + Client: mgr.GetClient(), + } + if err := podReconciler.SetupWithManager(mgr, p.podControllerName); err != nil { + klog.ErrorS(err, "Failed to start the pod reconciler in the Azure property provider") + return err + } } // Start the controller manager. @@ -250,16 +280,48 @@ func (p *PropertyProvider) Start(ctx context.Context, config *rest.Config) error } // Collect collects the properties of an AKS cluster. -func (p *PropertyProvider) Collect(_ context.Context) propertyprovider.PropertyCollectionResponse { +func (p *PropertyProvider) Collect(ctx context.Context) propertyprovider.PropertyCollectionResponse { conds := make([]metav1.Condition, 0, 1) // Collect the non-resource properties. + + // Collect the node count property. properties := make(map[clusterv1beta1.PropertyName]clusterv1beta1.PropertyValue) properties[propertyprovider.NodeCountProperty] = clusterv1beta1.PropertyValue{ Value: fmt.Sprintf("%d", p.nodeTracker.NodeCount()), ObservationTime: metav1.Now(), } + // Collect the cost properties (if enabled). + if p.isCostCollectionEnabled { + costConds := p.collectCosts(ctx, properties) + conds = append(conds, costConds...) + } + + // Collect the resource properties. + + // Collect the total and allocatable resource properties. + resources := clusterv1beta1.ResourceUsage{} + resources.Capacity = p.nodeTracker.TotalCapacity() + resources.Allocatable = p.nodeTracker.TotalAllocatable() + + // Collect the available resource properties (if enabled). + if p.isAvailableResourcesCollectionEnabled { + p.collectAvailableResource(ctx, &resources) + } + + // Return the collection response. + return propertyprovider.PropertyCollectionResponse{ + Properties: properties, + Resources: resources, + Conditions: conds, + } +} + +// collectCosts collects the cost information. +func (p *PropertyProvider) collectCosts(_ context.Context, properties map[clusterv1beta1.PropertyName]clusterv1beta1.PropertyValue) []metav1.Condition { + conds := make([]metav1.Condition, 0, 1) + perCPUCost, perGBMemoryCost, warnings, err := p.nodeTracker.Costs() switch { case err != nil: @@ -307,15 +369,22 @@ func (p *PropertyProvider) Collect(_ context.Context) propertyprovider.PropertyC }) } - // Collect the resource properties. - resources := clusterv1beta1.ResourceUsage{} - resources.Capacity = p.nodeTracker.TotalCapacity() - resources.Allocatable = p.nodeTracker.TotalAllocatable() + return conds +} + +// collectAvailableResource collects the available resource information. +func (p *PropertyProvider) collectAvailableResource(_ context.Context, usage *clusterv1beta1.ResourceUsage) { + if p.podTracker == nil { + // No pod tracker has been set; but the property provider has been configured to collect + // available resource information. Normally this should never occur. + _ = controller.NewUnexpectedBehaviorError(fmt.Errorf("no pod tracker has been set, but the property provider has been configured to collect available resource information")) + } requested := p.podTracker.TotalRequested() + allocatable := usage.Allocatable available := make(corev1.ResourceList) - for rn := range resources.Allocatable { - left := resources.Allocatable[rn].DeepCopy() + for rn := range allocatable { + left := allocatable[rn].DeepCopy() // In some unlikely scenarios, it could happen that, due to unavoidable // inconsistencies in the data collection process, the total value of a specific // requested resource exceeds that of the allocatable resource, as observed by @@ -331,14 +400,7 @@ func (p *PropertyProvider) Collect(_ context.Context) propertyprovider.PropertyC } available[rn] = left } - resources.Available = available - - // Return the collection response. - return propertyprovider.PropertyCollectionResponse{ - Properties: properties, - Resources: resources, - Conditions: conds, - } + usage.Available = available } // autoDiscoverRegionAndSetupTrackers auto-discovers the region of the AKS cluster. @@ -394,9 +456,17 @@ func (p *PropertyProvider) autoDiscoverRegionAndSetupTrackers(ctx context.Contex // If the region is unspecified at the time when this function is called, the provider // will attempt to auto-discover the region of its host cluster when the Start method is // called. -func New(region *string) propertyprovider.PropertyProvider { +func New( + region *string, + isCostCollectionEnabled, isAvailableResourcesCollectionEnabled bool, +) propertyprovider.PropertyProvider { return &PropertyProvider{ region: region, + // Use the default names. + nodeControllerName: "azure-property-provider-node-watcher", + podControllerName: "azure-property-provider-pod-watcher", + isCostCollectionEnabled: isCostCollectionEnabled, + isAvailableResourcesCollectionEnabled: isAvailableResourcesCollectionEnabled, } } @@ -405,8 +475,16 @@ func New(region *string) propertyprovider.PropertyProvider { // // This is mostly used for allow plugging in of alternate pricing providers (one that // does not use the Karpenter client), and for testing purposes. -func NewWithPricingProvider(pp trackers.PricingProvider) propertyprovider.PropertyProvider { +func NewWithPricingProvider( + pp trackers.PricingProvider, + nodeControllerName, podControllerName string, + isCostCollectionEnabled, isAvailableResourcesCollectionEnabled bool, +) propertyprovider.PropertyProvider { return &PropertyProvider{ - nodeTracker: trackers.NewNodeTracker(pp), + nodeTracker: trackers.NewNodeTracker(pp), + nodeControllerName: nodeControllerName, + podControllerName: podControllerName, + isCostCollectionEnabled: isCostCollectionEnabled, + isAvailableResourcesCollectionEnabled: isAvailableResourcesCollectionEnabled, } } diff --git a/pkg/propertyprovider/azure/provider_integration_test.go b/pkg/propertyprovider/azure/provider_integration_test.go index c876a7cee..7b168df4d 100644 --- a/pkg/propertyprovider/azure/provider_integration_test.go +++ b/pkg/propertyprovider/azure/provider_integration_test.go @@ -132,167 +132,175 @@ var ( } } - shouldReportCorrectPropertiesForNodes = func(nodes []corev1.Node, pods []corev1.Pod) func() { - return func() { - totalCPUCapacity := resource.Quantity{} - allocatableCPUCapacity := resource.Quantity{} - totalMemoryCapacity := resource.Quantity{} - allocatableMemoryCapacity := resource.Quantity{} + shouldReportCorrectProperties = func( + p propertyprovider.PropertyProvider, + nodes []corev1.Node, pods []corev1.Pod, + isCostsEnabled, isAvailableResourcesEnabled bool, + ) { + totalCPUCapacity := resource.Quantity{} + allocatableCPUCapacity := resource.Quantity{} + totalMemoryCapacity := resource.Quantity{} + allocatableMemoryCapacity := resource.Quantity{} + + for idx := range nodes { + node := nodes[idx] + totalCPUCapacity.Add(node.Status.Capacity[corev1.ResourceCPU]) + allocatableCPUCapacity.Add(node.Status.Allocatable[corev1.ResourceCPU]) + totalMemoryCapacity.Add(node.Status.Capacity[corev1.ResourceMemory]) + allocatableMemoryCapacity.Add(node.Status.Allocatable[corev1.ResourceMemory]) + } - for idx := range nodes { - node := nodes[idx] - totalCPUCapacity.Add(node.Status.Capacity[corev1.ResourceCPU]) - allocatableCPUCapacity.Add(node.Status.Allocatable[corev1.ResourceCPU]) - totalMemoryCapacity.Add(node.Status.Capacity[corev1.ResourceMemory]) - allocatableMemoryCapacity.Add(node.Status.Allocatable[corev1.ResourceMemory]) + totalCPUCores := totalCPUCapacity.AsApproximateFloat64() + totalMemoryBytes := totalMemoryCapacity.AsApproximateFloat64() + totalMemoryGBs := totalMemoryBytes / (1024.0 * 1024.0 * 1024.0) + + requestedCPUCapacity := resource.Quantity{} + requestedMemoryCapacity := resource.Quantity{} + for idx := range pods { + pod := pods[idx] + for cidx := range pod.Spec.Containers { + c := pod.Spec.Containers[cidx] + requestedCPUCapacity.Add(c.Resources.Requests[corev1.ResourceCPU]) + requestedMemoryCapacity.Add(c.Resources.Requests[corev1.ResourceMemory]) } + } - totalCPUCores := totalCPUCapacity.AsApproximateFloat64() - totalMemoryBytes := totalMemoryCapacity.AsApproximateFloat64() - totalMemoryGBs := totalMemoryBytes / (1024.0 * 1024.0 * 1024.0) + availableCPUCapacity := allocatableCPUCapacity.DeepCopy() + availableCPUCapacity.Sub(requestedCPUCapacity) + availableMemoryCapacity := allocatableMemoryCapacity.DeepCopy() + availableMemoryCapacity.Sub(requestedMemoryCapacity) + + Eventually(func() error { + // Calculate the costs manually; hardcoded values cannot be used as Azure pricing + // is subject to periodic change. + + // Note that this is done within an eventually block to ensure that the + // calculation is done using the latest pricing data. Inconsistency + // should seldom occur though. + totalCost := 0.0 + missingSKUSet := map[string]bool{} + isPricingDataStale := false + + pricingDataLastUpdated := pp.LastUpdated() + pricingDataBestAfter := time.Now().Add(-trackers.PricingDataShelfLife) + if pricingDataLastUpdated.Before(pricingDataBestAfter) { + isPricingDataStale = true + } - requestedCPUCapacity := resource.Quantity{} - requestedMemoryCapacity := resource.Quantity{} - for idx := range pods { - pod := pods[idx] - for cidx := range pod.Spec.Containers { - c := pod.Spec.Containers[cidx] - requestedCPUCapacity.Add(c.Resources.Requests[corev1.ResourceCPU]) - requestedMemoryCapacity.Add(c.Resources.Requests[corev1.ResourceMemory]) + for idx := range nodes { + node := nodes[idx] + sku := node.Labels[trackers.AKSClusterNodeSKULabelName] + cost, found := pp.OnDemandPrice(sku) + if !found || cost == pricing.MissingPrice { + missingSKUSet[sku] = true + continue } + totalCost += cost + } + missingSKUs := []string{} + for sku := range missingSKUSet { + missingSKUs = append(missingSKUs, sku) + } + slices.Sort(missingSKUs) + + perCPUCoreCost := "0.0" + perGBMemoryCost := "0.0" + costCollectionWarnings := []string{} + var costCollectionErr error + + switch { + case len(nodes) == 0: + case totalCost == 0.0: + costCollectionErr = fmt.Errorf("nodes are present, but no pricing data is available for any node SKUs (%v)", missingSKUs) + case len(missingSKUs) > 0: + costCollectionErr = fmt.Errorf("no pricing data is available for one or more of the node SKUs (%v) in the cluster", missingSKUs) + default: + perCPUCoreCost = fmt.Sprintf(CostPrecisionTemplate, totalCost/totalCPUCores) + perGBMemoryCost = fmt.Sprintf(CostPrecisionTemplate, totalCost/totalMemoryGBs) } - availableCPUCapacity := allocatableCPUCapacity.DeepCopy() - availableCPUCapacity.Sub(requestedCPUCapacity) - availableMemoryCapacity := allocatableMemoryCapacity.DeepCopy() - availableMemoryCapacity.Sub(requestedMemoryCapacity) - - Eventually(func() error { - // Calculate the costs manually; hardcoded values cannot be used as Azure pricing - // is subject to periodic change. - - // Note that this is done within an eventually block to ensure that the - // calculation is done using the latest pricing data. Inconsistency - // should seldom occur though. - totalCost := 0.0 - missingSKUSet := map[string]bool{} - isPricingDataStale := false - - pricingDataLastUpdated := pp.LastUpdated() - pricingDataBestAfter := time.Now().Add(-trackers.PricingDataShelfLife) - if pricingDataLastUpdated.Before(pricingDataBestAfter) { - isPricingDataStale = true - } + if isPricingDataStale { + costCollectionWarnings = append(costCollectionWarnings, + fmt.Sprintf("the pricing data is stale (last updated at %v); the system might have issues connecting to the Azure Retail Prices API, or the current region is unsupported", pricingDataLastUpdated), + ) + } - for idx := range nodes { - node := nodes[idx] - sku := node.Labels[trackers.AKSClusterNodeSKULabelName] - cost, found := pp.OnDemandPrice(sku) - if !found || cost == pricing.MissingPrice { - missingSKUSet[sku] = true - continue - } - totalCost += cost - } - missingSKUs := []string{} - for sku := range missingSKUSet { - missingSKUs = append(missingSKUs, sku) - } - slices.Sort(missingSKUs) - - perCPUCoreCost := "0.0" - perGBMemoryCost := "0.0" - costCollectionWarnings := []string{} - var costCollectionErr error - - switch { - case len(nodes) == 0: - case totalCost == 0.0: - costCollectionErr = fmt.Errorf("nodes are present, but no pricing data is available for any node SKUs (%v)", missingSKUs) - case len(missingSKUs) > 0: - costCollectionErr = fmt.Errorf("no pricing data is available for one or more of the node SKUs (%v) in the cluster", missingSKUs) - default: - perCPUCoreCost = fmt.Sprintf(CostPrecisionTemplate, totalCost/totalCPUCores) - perGBMemoryCost = fmt.Sprintf(CostPrecisionTemplate, totalCost/totalMemoryGBs) - } + wantProperties := map[clusterv1beta1.PropertyName]clusterv1beta1.PropertyValue{ + propertyprovider.NodeCountProperty: { + Value: fmt.Sprintf("%d", len(nodes)), + }, + } - if isPricingDataStale { - costCollectionWarnings = append(costCollectionWarnings, - fmt.Sprintf("the pricing data is stale (last updated at %v); the system might have issues connecting to the Azure Retail Prices API, or the current region is unsupported", pricingDataLastUpdated), - ) + if costCollectionErr == nil && isCostsEnabled { + wantProperties[PerCPUCoreCostProperty] = clusterv1beta1.PropertyValue{ + Value: perCPUCoreCost, } + wantProperties[PerGBMemoryCostProperty] = clusterv1beta1.PropertyValue{ + Value: perGBMemoryCost, + } + } - wantProperties := map[clusterv1beta1.PropertyName]clusterv1beta1.PropertyValue{ - propertyprovider.NodeCountProperty: { - Value: fmt.Sprintf("%d", len(nodes)), + var wantConditions []metav1.Condition + switch { + case !isCostsEnabled: + // Cost calculation has been disabled. No need to add a condition. + case costCollectionErr != nil: + wantConditions = []metav1.Condition{ + { + Type: CostPropertiesCollectionSucceededCondType, + Status: metav1.ConditionFalse, + Reason: CostPropertiesCollectionFailedReason, + Message: fmt.Sprintf(CostPropertiesCollectionFailedMsgTemplate, costCollectionErr), }, } - if costCollectionErr == nil { - wantProperties[PerCPUCoreCostProperty] = clusterv1beta1.PropertyValue{ - Value: perCPUCoreCost, - } - wantProperties[PerGBMemoryCostProperty] = clusterv1beta1.PropertyValue{ - Value: perGBMemoryCost, - } + case len(costCollectionWarnings) > 0: + wantConditions = []metav1.Condition{ + { + Type: CostPropertiesCollectionSucceededCondType, + Status: metav1.ConditionTrue, + Reason: CostPropertiesCollectionDegradedReason, + Message: fmt.Sprintf(CostPropertiesCollectionDegradedMsgTemplate, costCollectionWarnings), + }, } - - var wantConditions []metav1.Condition - switch { - case costCollectionErr != nil: - wantConditions = []metav1.Condition{ - { - Type: CostPropertiesCollectionSucceededCondType, - Status: metav1.ConditionFalse, - Reason: CostPropertiesCollectionFailedReason, - Message: fmt.Sprintf(CostPropertiesCollectionFailedMsgTemplate, costCollectionErr), - }, - } - case len(costCollectionWarnings) > 0: - wantConditions = []metav1.Condition{ - { - Type: CostPropertiesCollectionSucceededCondType, - Status: metav1.ConditionTrue, - Reason: CostPropertiesCollectionDegradedReason, - Message: fmt.Sprintf(CostPropertiesCollectionDegradedMsgTemplate, costCollectionWarnings), - }, - } - default: - wantConditions = []metav1.Condition{ - { - Type: CostPropertiesCollectionSucceededCondType, - Status: metav1.ConditionTrue, - Reason: CostPropertiesCollectionSucceededReason, - Message: CostPropertiesCollectionSucceededMsg, - }, - } + default: + wantConditions = []metav1.Condition{ + { + Type: CostPropertiesCollectionSucceededCondType, + Status: metav1.ConditionTrue, + Reason: CostPropertiesCollectionSucceededReason, + Message: CostPropertiesCollectionSucceededMsg, + }, } + } - expectedRes := propertyprovider.PropertyCollectionResponse{ - Properties: wantProperties, - Resources: clusterv1beta1.ResourceUsage{ - Capacity: corev1.ResourceList{ - corev1.ResourceCPU: totalCPUCapacity, - corev1.ResourceMemory: totalMemoryCapacity, - }, - Allocatable: corev1.ResourceList{ - corev1.ResourceCPU: allocatableCPUCapacity, - corev1.ResourceMemory: allocatableMemoryCapacity, - }, - Available: corev1.ResourceList{ - corev1.ResourceCPU: availableCPUCapacity, - corev1.ResourceMemory: availableMemoryCapacity, - }, + expectedRes := propertyprovider.PropertyCollectionResponse{ + Properties: wantProperties, + Resources: clusterv1beta1.ResourceUsage{ + Capacity: corev1.ResourceList{ + corev1.ResourceCPU: totalCPUCapacity, + corev1.ResourceMemory: totalMemoryCapacity, }, - Conditions: wantConditions, - } + Allocatable: corev1.ResourceList{ + corev1.ResourceCPU: allocatableCPUCapacity, + corev1.ResourceMemory: allocatableMemoryCapacity, + }, + }, + Conditions: wantConditions, + } - res := p.Collect(ctx) - if diff := cmp.Diff(res, expectedRes, ignoreObservationTimeFieldInPropertyValue); diff != "" { - return fmt.Errorf("property collection response (-got, +want):\n%s", diff) + if isAvailableResourcesEnabled { + expectedRes.Resources.Available = corev1.ResourceList{ + corev1.ResourceCPU: availableCPUCapacity, + corev1.ResourceMemory: availableMemoryCapacity, } - return nil - }, eventuallyDuration, eventuallyInterval).Should(BeNil()) - } + } + + res := p.Collect(ctx) + if diff := cmp.Diff(res, expectedRes, ignoreObservationTimeFieldInPropertyValue, cmpopts.EquateEmpty()); diff != "" { + return fmt.Errorf("property collection response (-got, +want):\n%s", diff) + } + return nil + }, eventuallyDuration, eventuallyInterval).Should(BeNil()) } ) @@ -725,7 +733,9 @@ var _ = Describe("azure property provider", func() { Context("add a new node", Serial, Ordered, func() { BeforeAll(shouldCreateNodes(nodes[0])) - It("should report correct properties", shouldReportCorrectPropertiesForNodes(nodes[0:1], nil)) + It("should report correct properties", func() { + shouldReportCorrectProperties(p, nodes[0:1], nil, true, true) + }) AfterAll(shouldDeleteNodes(nodes[0])) }) @@ -733,7 +743,9 @@ var _ = Describe("azure property provider", func() { Context("add multiple nodes", Serial, Ordered, func() { BeforeAll(shouldCreateNodes(nodes...)) - It("should report correct properties", shouldReportCorrectPropertiesForNodes(nodes, nil)) + It("should report correct properties", func() { + shouldReportCorrectProperties(p, nodes, nil, true, true) + }) AfterAll(shouldDeleteNodes(nodes...)) }) @@ -741,11 +753,15 @@ var _ = Describe("azure property provider", func() { Context("remove a node", Serial, Ordered, func() { BeforeAll(shouldCreateNodes(nodes...)) - It("should report correct properties", shouldReportCorrectPropertiesForNodes(nodes, nil)) + It("should report correct properties", func() { + shouldReportCorrectProperties(p, nodes, nil, true, true) + }) It("can delete a node", shouldDeleteNodes(nodes[0])) - It("should report correct properties after deletion", shouldReportCorrectPropertiesForNodes(nodes[1:], nil)) + It("should report correct properties after deletion", func() { + shouldReportCorrectProperties(p, nodes[1:], nil, true, true) + }) AfterAll(shouldDeleteNodes(nodes[1:]...)) }) @@ -753,11 +769,15 @@ var _ = Describe("azure property provider", func() { Context("remove multiple nodes", Serial, Ordered, func() { BeforeAll(shouldCreateNodes(nodes...)) - It("should report correct properties", shouldReportCorrectPropertiesForNodes(nodes, nil)) + It("should report correct properties", func() { + shouldReportCorrectProperties(p, nodes, nil, true, true) + }) It("can delete multiple nodes", shouldDeleteNodes(nodes[0], nodes[3])) - It("should report correct properties after deletion", shouldReportCorrectPropertiesForNodes(nodes[1:3], nil)) + It("should report correct properties after deletion", func() { + shouldReportCorrectProperties(p, nodes[1:3], nil, true, true) + }) AfterAll(shouldDeleteNodes(nodes[1], nodes[2])) }) @@ -767,7 +787,9 @@ var _ = Describe("azure property provider", func() { BeforeAll(shouldCreatePods(pods[0])) - It("should report correct properties (pod bound)", shouldReportCorrectPropertiesForNodes(nodes, pods[0:1])) + It("should report correct properties (pod bound)", func() { + shouldReportCorrectProperties(p, nodes, pods[0:1], true, true) + }) AfterAll(shouldDeletePods(pods[0])) @@ -783,11 +805,15 @@ var _ = Describe("azure property provider", func() { Expect(memberClient.Create(ctx, pod)).To(Succeed(), "Failed to create pod") }) - It("should report correct properties (pod not bound)", shouldReportCorrectPropertiesForNodes(nodes, nil)) + It("should report correct properties (pod not bound)", func() { + shouldReportCorrectProperties(p, nodes, nil, true, true) + }) It("can bind the pod", shouldBindPods(pods[0])) - It("should report correct properties (pod bound)", shouldReportCorrectPropertiesForNodes(nodes, pods[0:1])) + It("should report correct properties (pod bound)", func() { + shouldReportCorrectProperties(p, nodes, pods[0:1], true, true) + }) AfterAll(shouldDeletePods(pods[0])) @@ -799,7 +825,9 @@ var _ = Describe("azure property provider", func() { BeforeAll(shouldCreatePods(pods...)) - It("should report correct properties (pods bound)", shouldReportCorrectPropertiesForNodes(nodes, pods)) + It("should report correct properties (pods bound)", func() { + shouldReportCorrectProperties(p, nodes, pods, true, true) + }) AfterAll(shouldDeletePods(pods...)) @@ -811,11 +839,15 @@ var _ = Describe("azure property provider", func() { BeforeAll(shouldCreatePods(pods[0])) - It("should report correct properties (pod bound)", shouldReportCorrectPropertiesForNodes(nodes, pods[0:1])) + It("should report correct properties (pod bound)", func() { + shouldReportCorrectProperties(p, nodes, pods[0:1], true, true) + }) It("can delete the pod", shouldDeletePods(pods[0])) - It("should report correct properties (pod deleted)", shouldReportCorrectPropertiesForNodes(nodes, nil)) + It("should report correct properties (pod deleted)", func() { + shouldReportCorrectProperties(p, nodes, nil, true, true) + }) AfterAll(shouldDeleteNodes(nodes...)) }) @@ -825,7 +857,9 @@ var _ = Describe("azure property provider", func() { BeforeAll(shouldCreatePods(pods[0])) - It("should report correct properties (pod bound)", shouldReportCorrectPropertiesForNodes(nodes, pods[0:1])) + It("should report correct properties (pod bound)", func() { + shouldReportCorrectProperties(p, nodes, pods[0:1], true, true) + }) It("can transition the pod to the succeeded state", func() { pod := pods[0].DeepCopy() @@ -833,7 +867,9 @@ var _ = Describe("azure property provider", func() { Expect(memberClient.Status().Update(ctx, pod)).To(Succeed(), "Failed to update pod status") }) - It("should report correct properties (pod succeeded)", shouldReportCorrectPropertiesForNodes(nodes, nil)) + It("should report correct properties (pod succeeded)", func() { + shouldReportCorrectProperties(p, nodes, nil, true, true) + }) AfterAll(shouldDeletePods(pods[0])) @@ -845,7 +881,9 @@ var _ = Describe("azure property provider", func() { BeforeAll(shouldCreatePods(pods[0])) - It("should report correct properties (pod bound)", shouldReportCorrectPropertiesForNodes(nodes, pods[0:1])) + It("should report correct properties (pod bound)", func() { + shouldReportCorrectProperties(p, nodes, pods[0:1], true, true) + }) It("can transition the pod to the failed state", func() { pod := pods[0].DeepCopy() @@ -853,7 +891,9 @@ var _ = Describe("azure property provider", func() { Expect(memberClient.Status().Update(ctx, pod)).To(Succeed(), "Failed to update pod status") }) - It("should report correct properties (pod failed)", shouldReportCorrectPropertiesForNodes(nodes, nil)) + It("should report correct properties (pod failed)", func() { + shouldReportCorrectProperties(p, nodes, nil, true, true) + }) AfterAll(shouldDeletePods(pods[0])) @@ -865,11 +905,15 @@ var _ = Describe("azure property provider", func() { BeforeAll(shouldCreatePods(pods...)) - It("should report correct properties (pods bound)", shouldReportCorrectPropertiesForNodes(nodes, pods)) + It("should report correct properties (pods bound)", func() { + shouldReportCorrectProperties(p, nodes, pods, true, true) + }) It("can delete multiple pods", shouldDeletePods(pods[1], pods[2])) - It("should report correct properties (pods deleted)", shouldReportCorrectPropertiesForNodes(nodes, []corev1.Pod{pods[0], pods[3]})) + It("should report correct properties (pods deleted)", func() { + shouldReportCorrectProperties(p, nodes, []corev1.Pod{pods[0], pods[3]}, true, true) + }) AfterAll(shouldDeletePods(pods[0], pods[3])) @@ -879,7 +923,9 @@ var _ = Describe("azure property provider", func() { Context("nodes with some unsupported SKUs", Serial, Ordered, func() { BeforeAll(shouldCreateNodes(nodesWithSomeUnsupportedSKUs...)) - It("should report correct properties", shouldReportCorrectPropertiesForNodes(nodesWithSomeUnsupportedSKUs, nil)) + It("should report correct properties", func() { + shouldReportCorrectProperties(p, nodesWithSomeUnsupportedSKUs, nil, true, true) + }) AfterAll(shouldDeleteNodes(nodesWithSomeUnsupportedSKUs...)) }) @@ -887,7 +933,9 @@ var _ = Describe("azure property provider", func() { Context("nodes with some empty SKUs", Serial, Ordered, func() { BeforeAll(shouldCreateNodes(nodesWithSomeEmptySKUs...)) - It("should report correct properties", shouldReportCorrectPropertiesForNodes(nodesWithSomeEmptySKUs, nil)) + It("should report correct properties", func() { + shouldReportCorrectProperties(p, nodesWithSomeEmptySKUs, nil, true, true) + }) AfterAll(shouldDeleteNodes(nodesWithSomeEmptySKUs...)) }) @@ -895,7 +943,9 @@ var _ = Describe("azure property provider", func() { Context("nodes with all unsupported SKUs", Serial, Ordered, func() { BeforeAll(shouldCreateNodes(nodesWithAllUnsupportedSKUs...)) - It("should report correct properties", shouldReportCorrectPropertiesForNodes(nodesWithAllUnsupportedSKUs, nil)) + It("should report correct properties", func() { + shouldReportCorrectProperties(p, nodesWithAllUnsupportedSKUs, nil, true, true) + }) AfterAll(shouldDeleteNodes(nodesWithAllUnsupportedSKUs...)) }) @@ -903,7 +953,9 @@ var _ = Describe("azure property provider", func() { Context("nodes with all empty SKUs", Serial, Ordered, func() { BeforeAll(shouldCreateNodes(nodesWithAllEmptySKUs...)) - It("should report correct properties", shouldReportCorrectPropertiesForNodes(nodesWithAllEmptySKUs, nil)) + It("should report correct properties", func() { + shouldReportCorrectProperties(p, nodesWithAllEmptySKUs, nil, true, true) + }) AfterAll(shouldDeleteNodes(nodesWithAllEmptySKUs...)) }) @@ -913,7 +965,9 @@ var _ = Describe("azure property provider", func() { Context("nodes with some known missing SKUs", Serial, Ordered, func() { BeforeAll(shouldCreateNodes(nodesWithSomeKnownMissingSKUs...)) - It("should report correct properties", shouldReportCorrectPropertiesForNodes(nodesWithSomeKnownMissingSKUs, nil)) + It("should report correct properties", func() { + shouldReportCorrectProperties(p, nodesWithSomeKnownMissingSKUs, nil, true, true) + }) AfterAll(shouldDeleteNodes(nodesWithSomeKnownMissingSKUs...)) }) @@ -923,8 +977,40 @@ var _ = Describe("azure property provider", func() { Context("nodes with all known missing SKUs", Serial, Ordered, func() { BeforeAll(shouldCreateNodes(nodesWithAllKnownMissingSKUs...)) - It("should report correct properties", shouldReportCorrectPropertiesForNodes(nodesWithAllKnownMissingSKUs, nil)) + It("should report correct properties", func() { + shouldReportCorrectProperties(p, nodesWithAllKnownMissingSKUs, nil, true, true) + }) AfterAll(shouldDeleteNodes(nodesWithAllKnownMissingSKUs...)) }) }) + +var _ = Describe("feature gates", func() { + Context("nodes and pods (cost info disabled)", Serial, Ordered, func() { + BeforeAll(shouldCreateNodes(nodes...)) + + BeforeAll(shouldCreatePods(pods...)) + + It("should report correct properties (pod bound)", func() { + shouldReportCorrectProperties(pWithNoCosts, nodes, pods, false, true) + }) + + AfterAll(shouldDeletePods(pods...)) + + AfterAll(shouldDeleteNodes(nodes...)) + }) + + Context("nodes and pods (available resources disabled)", Serial, Ordered, func() { + BeforeAll(shouldCreateNodes(nodes...)) + + BeforeAll(shouldCreatePods(pods...)) + + It("should report correct properties (pod bound)", func() { + shouldReportCorrectProperties(pWithNoAvailableResources, nodes, pods, true, false) + }) + + AfterAll(shouldDeletePods(pods...)) + + AfterAll(shouldDeleteNodes(nodes...)) + }) +}) diff --git a/pkg/propertyprovider/azure/provider_test.go b/pkg/propertyprovider/azure/provider_test.go index 4a7171c37..f5618842e 100644 --- a/pkg/propertyprovider/azure/provider_test.go +++ b/pkg/propertyprovider/azure/provider_test.go @@ -705,8 +705,10 @@ func TestCollect(t *testing.T) { } p := &PropertyProvider{ - nodeTracker: nodeTracker, - podTracker: podTracker, + nodeTracker: nodeTracker, + podTracker: podTracker, + isCostCollectionEnabled: true, + isAvailableResourcesCollectionEnabled: true, } res := p.Collect(ctx) if diff := cmp.Diff(res, tc.wantMetricCollectionResponse, ignoreObservationTimeFieldInPropertyValue); diff != "" { @@ -715,3 +717,187 @@ func TestCollect(t *testing.T) { }) } } + +func TestCollectWithDisabledFeatures(t *testing.T) { + nodes := []corev1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: nodeName1, + Labels: map[string]string{ + trackers.AKSClusterNodeSKULabelName: nodeSKU1, + }, + }, + Spec: corev1.NodeSpec{}, + Status: corev1.NodeStatus{ + Capacity: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("4"), + corev1.ResourceMemory: resource.MustParse("16Gi"), + }, + Allocatable: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("3.2"), + corev1.ResourceMemory: resource.MustParse("15.2Gi"), + }, + }, + }, + } + pods := []corev1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: podName1, + Namespace: namespaceName1, + }, + Spec: corev1.PodSpec{ + NodeName: nodeName1, + Containers: []corev1.Container{ + { + Name: containerName1, + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("2Gi"), + }, + }, + }, + { + Name: containerName2, + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1.5"), + corev1.ResourceMemory: resource.MustParse("4Gi"), + }, + }, + }, + }, + }, + }, + } + + testCases := []struct { + name string + nodeTracker *trackers.NodeTracker + podTracker *trackers.PodTracker + isCostCollectionEnabled bool + isAvailableResourcesCollectionEnabled bool + wantPropertyCollectionResponse propertyprovider.PropertyCollectionResponse + }{ + { + name: "cost collection disabled", + nodeTracker: trackers.NewNodeTracker(nil), + podTracker: trackers.NewPodTracker(), + isCostCollectionEnabled: false, + isAvailableResourcesCollectionEnabled: true, + wantPropertyCollectionResponse: propertyprovider.PropertyCollectionResponse{ + Properties: map[clusterv1beta1.PropertyName]clusterv1beta1.PropertyValue{ + propertyprovider.NodeCountProperty: { + Value: "1", + }, + }, + Resources: clusterv1beta1.ResourceUsage{ + Capacity: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("4"), + corev1.ResourceMemory: resource.MustParse("16Gi"), + }, + Allocatable: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("3.2"), + corev1.ResourceMemory: resource.MustParse("15.2Gi"), + }, + Available: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("0.7"), + corev1.ResourceMemory: resource.MustParse("9.2Gi"), + }, + }, + Conditions: []metav1.Condition{}, + }, + }, + { + name: "available resources collection disabled", + nodeTracker: trackers.NewNodeTracker(&dummyPricingProvider{}), + isCostCollectionEnabled: true, + isAvailableResourcesCollectionEnabled: false, + wantPropertyCollectionResponse: propertyprovider.PropertyCollectionResponse{ + Properties: map[clusterv1beta1.PropertyName]clusterv1beta1.PropertyValue{ + propertyprovider.NodeCountProperty: { + Value: "1", + }, + PerCPUCoreCostProperty: { + Value: "0.250", + }, + PerGBMemoryCostProperty: { + Value: "0.062", + }, + }, + Resources: clusterv1beta1.ResourceUsage{ + Capacity: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("4"), + corev1.ResourceMemory: resource.MustParse("16Gi"), + }, + Allocatable: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("3.2"), + corev1.ResourceMemory: resource.MustParse("15.2Gi"), + }, + }, + Conditions: []metav1.Condition{ + { + Type: CostPropertiesCollectionSucceededCondType, + Status: metav1.ConditionTrue, + Reason: CostPropertiesCollectionSucceededReason, + Message: CostPropertiesCollectionSucceededMsg, + }, + }, + }, + }, + { + name: "both cost and available resources collection disabled", + nodeTracker: trackers.NewNodeTracker(nil), + isCostCollectionEnabled: false, + isAvailableResourcesCollectionEnabled: false, + wantPropertyCollectionResponse: propertyprovider.PropertyCollectionResponse{ + Properties: map[clusterv1beta1.PropertyName]clusterv1beta1.PropertyValue{ + propertyprovider.NodeCountProperty: { + Value: "1", + }, + }, + Resources: clusterv1beta1.ResourceUsage{ + Capacity: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("4"), + corev1.ResourceMemory: resource.MustParse("16Gi"), + }, + Allocatable: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("3.2"), + corev1.ResourceMemory: resource.MustParse("15.2Gi"), + }, + }, + Conditions: []metav1.Condition{}, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + ctx := context.Background() + + // Build the trackers manually for testing purposes. + nodeTracker := tc.nodeTracker + for idx := range nodes { + nodeTracker.AddOrUpdate(&nodes[idx]) + } + podTracker := tc.podTracker + if podTracker != nil { + for idx := range pods { + podTracker.AddOrUpdate(&pods[idx]) + } + } + + p := &PropertyProvider{ + nodeTracker: nodeTracker, + podTracker: podTracker, + isCostCollectionEnabled: tc.isCostCollectionEnabled, + isAvailableResourcesCollectionEnabled: tc.isAvailableResourcesCollectionEnabled, + } + res := p.Collect(ctx) + if diff := cmp.Diff(res, tc.wantPropertyCollectionResponse, ignoreObservationTimeFieldInPropertyValue); diff != "" { + t.Fatalf("Collect() property collection response diff (-got, +want):\n%s", diff) + } + }) + } +} diff --git a/pkg/propertyprovider/azure/suite_test.go b/pkg/propertyprovider/azure/suite_test.go index 82e47d39c..84c674730 100644 --- a/pkg/propertyprovider/azure/suite_test.go +++ b/pkg/propertyprovider/azure/suite_test.go @@ -56,12 +56,14 @@ const ( ) var ( - memberTestEnv *envtest.Environment - memberClient client.Client - ctx context.Context - cancel context.CancelFunc - p propertyprovider.PropertyProvider - pp trackers.PricingProvider + memberTestEnv *envtest.Environment + memberClient client.Client + ctx context.Context + cancel context.CancelFunc + p propertyprovider.PropertyProvider + pp trackers.PricingProvider + pWithNoCosts propertyprovider.PropertyProvider + pWithNoAvailableResources propertyprovider.PropertyProvider ) // setUpResources help set up resources in the test environment. @@ -109,10 +111,20 @@ var _ = BeforeSuite(func() { // Set up resources. setUpResources() - // Start the Azure property provider. + // Start an Azure property provider instance with all features on. pp = trackers.NewAKSKarpenterPricingClient(ctx, region) - p = NewWithPricingProvider(pp) + p = NewWithPricingProvider(pp, "node watcher", "pod watcher", true, true) Expect(p.Start(ctx, memberCfg)).To(Succeed()) + + // Start different property provider instances with different features disabled, + // to verify the behaviors of feature gates. + // + // All property providers share the same environment and the same pricing provider + // (even though in normal ops they will not). + pWithNoCosts = NewWithPricingProvider(nil, "node watcher with costs disabled", "pod watcher with costs disabled", false, true) + pWithNoAvailableResources = NewWithPricingProvider(pp, "node watcher with no available resources", "pod watcher with no available resources", true, false) + Expect(pWithNoCosts.Start(ctx, memberCfg)).To(Succeed()) + Expect(pWithNoAvailableResources.Start(ctx, memberCfg)).To(Succeed()) }) var _ = AfterSuite(func() { diff --git a/pkg/propertyprovider/azure/trackers/nodes.go b/pkg/propertyprovider/azure/trackers/nodes.go index 68ee184e7..2fa01d1cf 100644 --- a/pkg/propertyprovider/azure/trackers/nodes.go +++ b/pkg/propertyprovider/azure/trackers/nodes.go @@ -29,6 +29,8 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" "k8s.io/klog/v2" + + "go.goms.io/fleet/pkg/utils/controller" ) const ( @@ -81,15 +83,14 @@ type NodeTracker struct { totalCapacity corev1.ResourceList totalAllocatable corev1.ResourceList - // costs tracks the cost-related information about the cluster. - costs *costInfo - // Below are a list of maps that tracks information about individual nodes in the cluster. capacityByNode map[string]corev1.ResourceList allocatableByNode map[string]corev1.ResourceList nodeSetBySKU map[string]NodeSet skuByNode map[string]string + // costs tracks the cost-related information about the cluster. + costs *costInfo // pricingProvider facilitates cost calculation. pricingProvider PricingProvider @@ -136,6 +137,15 @@ func NewNodeTracker(pp PricingProvider) *NodeTracker { // // Note that this method assumes that the access lock has been acquired. func (nt *NodeTracker) calculateCosts() { + // Skip the cost calculation is no pricing provider is available. + if nt.pricingProvider == nil { + // This error will not be read; it is set here for completeness reasons. + nt.costs = &costInfo{ + err: fmt.Errorf("no pricing provider is set up; cannot calculate costs"), + } + return + } + totalCapacityCPU := nt.totalCapacity[corev1.ResourceCPU] totalCapacityMemory := nt.totalCapacity[corev1.ResourceMemory] @@ -561,6 +571,13 @@ func (nt *NodeTracker) Costs() (perCPUCoreCost, perGBMemoryCost float64, warning nt.mu.Lock() defer nt.mu.Unlock() + if nt.pricingProvider == nil { + // Normally this branch will never run; it is set for completeness reasons. + wrappedErr := fmt.Errorf("no pricing provider is set up; cannot calculate costs") + _ = controller.NewUnexpectedBehaviorError(wrappedErr) + return 0.0, 0.0, nil, wrappedErr + } + if nt.costs.lastUpdated.Before(nt.pricingProvider.LastUpdated()) { nt.calculateCosts() } diff --git a/pkg/propertyprovider/azure/trackers/trackers_test.go b/pkg/propertyprovider/azure/trackers/trackers_test.go index cc3208951..942781a41 100644 --- a/pkg/propertyprovider/azure/trackers/trackers_test.go +++ b/pkg/propertyprovider/azure/trackers/trackers_test.go @@ -388,6 +388,28 @@ func TestCalculateCosts(t *testing.T) { fmt.Sprintf("the pricing data is stale (last updated at %v); the system might have issues connecting to the Azure Retail Prices API, or the current region is unsupported", currentTime.Add(-time.Hour*48)), }, }, + { + name: "no pricing provider", + nt: &NodeTracker{ + totalCapacity: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("12"), + corev1.ResourceMemory: resource.MustParse("2Gi"), + }, + totalAllocatable: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("8"), + corev1.ResourceMemory: resource.MustParse("1Gi"), + }, + nodeSetBySKU: map[string]NodeSet{ + nodeSKU1: { + nodeName1: true, + }, + }, + costs: &costInfo{ + err: fmt.Errorf("costs have not been calculated yet"), + }, + }, + wantCostErrStrPrefix: "no pricing provider is set up; cannot calculate costs", + }, } for _, tc := range testCases { @@ -2098,6 +2120,99 @@ func TestNodeTrackerAllocatableCapacityFor(t *testing.T) { } } +// TestNodeTrackerCosts tests the Costs method of the NodeTracker. +func TestNodeTrackerCosts(t *testing.T) { + testCases := []struct { + name string + nt *NodeTracker + wantPerCPUCoreCost float64 + wantPerGBMemoryCost float64 + wantWarnings []string + wantCostErrStrPrefix string + }{ + { + name: "fresh cost data, no warnings/errors", + nt: &NodeTracker{ + pricingProvider: &dummyPricingProvider{}, + costs: &costInfo{ + perCPUCoreHourlyCost: 1.0, + perGBMemoryHourlyCost: 1.0, + lastUpdated: time.Now(), + }, + }, + wantPerCPUCoreCost: 1.0, + wantPerGBMemoryCost: 1.0, + }, + { + name: "stale cost data, no warnings/errors", + nt: &NodeTracker{ + totalCapacity: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("12"), + corev1.ResourceMemory: resource.MustParse("24Gi"), + }, + nodeSetBySKU: map[string]NodeSet{ + nodeSKU1: {nodeName1: true, nodeName2: true}, + nodeSKU2: {nodeName3: true}, + }, + pricingProvider: &dummyPricingProvider{}, + costs: &costInfo{ + lastUpdated: currentTime.Add(-time.Hour * 48), + }, + }, + wantPerCPUCoreCost: 0.5833, + wantPerGBMemoryCost: 0.2917, + }, + { + name: "no pricing provider", + nt: &NodeTracker{ + totalCapacity: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("12"), + corev1.ResourceMemory: resource.MustParse("2Gi"), + }, + nodeSetBySKU: map[string]NodeSet{ + nodeSKU1: {nodeName1: true}, + }, + costs: &costInfo{ + err: fmt.Errorf("costs have not been calculated yet"), + }, + }, + wantCostErrStrPrefix: "no pricing provider is set up; cannot calculate costs", + }, + // TO-DO (chenyu1): add more test cases. + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + perCPUCoreCost, perGBMemoryCost, warnings, err := tc.nt.Costs() + + if tc.wantCostErrStrPrefix != "" { + if err == nil { + t.Fatalf("Costs() = nil, want error with prefix %s", tc.wantCostErrStrPrefix) + } + if !strings.HasPrefix(err.Error(), tc.wantCostErrStrPrefix) { + t.Fatalf("Costs() error = %s, want prefix %s", err.Error(), tc.wantCostErrStrPrefix) + } + return + } + + if err != nil { + t.Fatalf("Costs() = %s, want no error", err) + } + + if diff := cmp.Diff(warnings, tc.wantWarnings, cmpopts.EquateEmpty()); diff != "" { + t.Fatalf("Costs() warnings diff (-got, +want):\n%s", diff) + } + + if !cmp.Equal(perCPUCoreCost, tc.wantPerCPUCoreCost, cmpopts.EquateApprox(0.0, 0.01)) { + t.Fatalf("Costs() perCPUCoreCost = %f, want %f", perCPUCoreCost, tc.wantPerCPUCoreCost) + } + if !cmp.Equal(perGBMemoryCost, tc.wantPerGBMemoryCost, cmpopts.EquateApprox(0.0, 0.01)) { + t.Fatalf("Costs() perGBMemoryCost = %f, want %f", perGBMemoryCost, tc.wantPerGBMemoryCost) + } + }) + } +} + // TestNodeTrackerAddOrUpdate tests the AddOrUpdate method of the PodTracker. func TestPodTrackerAddOrUpdate(t *testing.T) { testCases := []struct { diff --git a/test/e2e/actuals_test.go b/test/e2e/actuals_test.go index 5c41abeda..32d790d12 100644 --- a/test/e2e/actuals_test.go +++ b/test/e2e/actuals_test.go @@ -554,6 +554,45 @@ func rpAppliedFailedConditions(generation int64) []metav1.Condition { } } +func rpDiffReportingFailedConditions(generation int64, hasOverride bool) []metav1.Condition { + overrideConditionReason := condition.OverrideNotSpecifiedReason + if hasOverride { + overrideConditionReason = condition.OverriddenSucceededReason + } + return []metav1.Condition{ + { + Type: string(placementv1beta1.ResourcePlacementScheduledConditionType), + Status: metav1.ConditionTrue, + Reason: scheduler.FullyScheduledReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ResourcePlacementRolloutStartedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.RolloutStartedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ResourcePlacementOverriddenConditionType), + Status: metav1.ConditionTrue, + Reason: overrideConditionReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ResourcePlacementWorkSynchronizedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.WorkSynchronizedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ResourcePlacementDiffReportedConditionType), + Status: metav1.ConditionFalse, + Reason: condition.DiffReportedStatusFalseReason, + ObservedGeneration: generation, + }, + } +} + func crpAppliedFailedConditions(generation int64) []metav1.Condition { return []metav1.Condition{ { diff --git a/test/e2e/placement_negative_cases_test.go b/test/e2e/placement_negative_cases_test.go index 842be3c88..5f476e60d 100644 --- a/test/e2e/placement_negative_cases_test.go +++ b/test/e2e/placement_negative_cases_test.go @@ -1,6 +1,17 @@ /* -Copyright (c) Microsoft Corporation. -Licensed under the MIT license. +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ package e2e @@ -29,7 +40,7 @@ var _ = Describe("handling errors and failures gracefully", func() { wrappedCMName2 := "app-2" cmDataKey := "foo" - cmDataVal1 := "bar" + cmDataVal1 := cmDataVal cmDataVal2 := "baz" // Many test specs below use envelopes for placement as it is a bit tricky to simulate @@ -485,7 +496,7 @@ var _ = Describe("handling errors and failures gracefully", func() { Consistently(crpStatusUpdatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "CRP status has changed unexpectedly") }) - It("should place some manifests on member clusters", func() { + It("should place the other manifests on member clusters", func() { Eventually(func() error { return validateWorkNamespaceOnCluster(memberCluster1EastProd, types.NamespacedName{Name: workNamespaceName}) }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to apply the namespace object") diff --git a/test/e2e/resource_placement_negative_cases_test.go b/test/e2e/resource_placement_negative_cases_test.go new file mode 100644 index 000000000..5da96f9c3 --- /dev/null +++ b/test/e2e/resource_placement_negative_cases_test.go @@ -0,0 +1,539 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +package e2e + +import ( + "encoding/json" + "fmt" + + "github.com/google/go-cmp/cmp" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/utils/ptr" + + placementv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" + "go.goms.io/fleet/pkg/controllers/workapplier" + "go.goms.io/fleet/test/e2e/framework" +) + +var _ = Describe("handling errors and failures gracefully for resource placement", Label("resourceplacement"), func() { + envelopeName := "wrapper" + wrappedCMName1 := "app-1" + wrappedCMName2 := "app-2" + + cmDataKey := "foo" + cmDataVal1 := cmDataVal + cmDataVal2 := "baz" + + crpName := fmt.Sprintf(crpNameTemplate, GinkgoParallelProcess()) + rpName := fmt.Sprintf(rpNameTemplate, GinkgoParallelProcess()) + workNamespaceName := fmt.Sprintf(workNamespaceNameTemplate, GinkgoParallelProcess()) + + BeforeEach(OncePerOrdered, func() { + // Create the resources. + createNamespace() + + // Create the CRP with Namespace-only selector. + createNamespaceOnlyCRP(crpName) + + By("should update CRP status as expected") + crpStatusUpdatedActual := crpStatusUpdatedActual(workNamespaceIdentifiers(), allMemberClusterNames, nil, "0") + Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") + }) + + AfterEach(OncePerOrdered, func() { + ensureCRPAndRelatedResourcesDeleted(crpName, allMemberClusters) + }) + + // Many test specs below use envelopes for placement as it is a bit tricky to simulate + // decoding errors with resources created directly in the hub cluster. + // + // TO-DO (chenyu1): reserve an API group exclusively on the hub cluster so that + // envelopes do not need to be used for this test spec. + Context("pre-processing failure in apply ops (decoding errors)", Ordered, func() { + BeforeAll(func() { + // Use an envelope to create duplicate resource entries. + // Create an envelope resource to wrap the configMaps. + resourceEnvelope := &placementv1beta1.ResourceEnvelope{ + ObjectMeta: metav1.ObjectMeta{ + Name: envelopeName, + Namespace: workNamespaceName, + }, + Data: map[string]runtime.RawExtension{}, + } + + // Create configMaps as wrapped resources. + configMap := &corev1.ConfigMap{ + TypeMeta: metav1.TypeMeta{ + APIVersion: corev1.SchemeGroupVersion.String(), + Kind: "ConfigMap", + }, + ObjectMeta: metav1.ObjectMeta{ + Namespace: workNamespaceName, + Name: wrappedCMName1, + }, + Data: map[string]string{ + cmDataKey: cmDataVal1, + }, + } + // Prepare a malformed config map. + badConfigMap := configMap.DeepCopy() + badConfigMap.TypeMeta = metav1.TypeMeta{ + APIVersion: "dummy/v10", + Kind: "Fake", + } + badCMBytes, err := json.Marshal(badConfigMap) + Expect(err).To(BeNil(), "Failed to marshal configMap %s", badConfigMap.Name) + resourceEnvelope.Data["cm1.yaml"] = runtime.RawExtension{Raw: badCMBytes} + + // Prepare a regular config map. + wrappedCM2 := configMap.DeepCopy() + wrappedCM2.Name = wrappedCMName2 + wrappedCM2.Data[cmDataKey] = cmDataVal2 + wrappedCM2Bytes, err := json.Marshal(wrappedCM2) + Expect(err).To(BeNil(), "Failed to marshal configMap %s", wrappedCM2.Name) + resourceEnvelope.Data["cm2.yaml"] = runtime.RawExtension{Raw: wrappedCM2Bytes} + + Expect(hubClient.Create(ctx, resourceEnvelope)).To(Succeed(), "Failed to create resource envelope %s", resourceEnvelope.Name) + + // Create a ResourcePlacement. + rp := &placementv1beta1.ResourcePlacement{ + ObjectMeta: metav1.ObjectMeta{ + Name: rpName, + Namespace: workNamespaceName, + // Add a custom finalizer; this would allow us to better observe + // the behavior of the controllers. + Finalizers: []string{customDeletionBlockerFinalizer}, + }, + Spec: placementv1beta1.PlacementSpec{ + ResourceSelectors: []placementv1beta1.ResourceSelectorTerm{ + { + Group: placementv1beta1.GroupVersion.Group, + Kind: placementv1beta1.ResourceEnvelopeKind, + Version: placementv1beta1.GroupVersion.Version, + Name: envelopeName, + }, + }, + Policy: &placementv1beta1.PlacementPolicy{ + PlacementType: placementv1beta1.PickFixedPlacementType, + ClusterNames: []string{ + memberCluster1EastProdName, + }, + }, + Strategy: placementv1beta1.RolloutStrategy{ + Type: placementv1beta1.RollingUpdateRolloutStrategyType, + RollingUpdate: &placementv1beta1.RollingUpdateConfig{ + UnavailablePeriodSeconds: ptr.To(2), + }, + }, + }, + } + Expect(hubClient.Create(ctx, rp)).To(Succeed(), "Failed to create ResourcePlacement") + }) + + It("should update ResourcePlacement status as expected", func() { + Eventually(func() error { + rp := &placementv1beta1.ResourcePlacement{} + if err := hubClient.Get(ctx, types.NamespacedName{Name: rpName, Namespace: workNamespaceName}, rp); err != nil { + return err + } + + wantStatus := placementv1beta1.PlacementStatus{ + Conditions: rpAppliedFailedConditions(rp.Generation), + PerClusterPlacementStatuses: []placementv1beta1.PerClusterPlacementStatus{ + { + ClusterName: memberCluster1EastProdName, + ObservedResourceIndex: "0", + FailedPlacements: []placementv1beta1.FailedResourcePlacement{ + { + ResourceIdentifier: placementv1beta1.ResourceIdentifier{ + Group: "dummy", + Version: "v10", + Kind: "Fake", + Namespace: workNamespaceName, + Name: wrappedCMName1, + Envelope: &placementv1beta1.EnvelopeIdentifier{ + Name: envelopeName, + Namespace: workNamespaceName, + Type: placementv1beta1.ResourceEnvelopeType, + }, + }, + Condition: metav1.Condition{ + Type: placementv1beta1.WorkConditionTypeApplied, + Status: metav1.ConditionFalse, + Reason: string(workapplier.ApplyOrReportDiffResTypeDecodingErred), + ObservedGeneration: 0, + }, + }, + }, + Conditions: perClusterApplyFailedConditions(rp.Generation), + }, + }, + SelectedResources: []placementv1beta1.ResourceIdentifier{ + { + Group: placementv1beta1.GroupVersion.Group, + Kind: placementv1beta1.ResourceEnvelopeKind, + Version: placementv1beta1.GroupVersion.Version, + Name: envelopeName, + Namespace: workNamespaceName, + }, + }, + ObservedResourceIndex: "0", + } + if diff := cmp.Diff(rp.Status, wantStatus, placementStatusCmpOptions...); diff != "" { + return fmt.Errorf("ResourcePlacement status diff (-got, +want): %s", diff) + } + return nil + }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update ResourcePlacement status as expected") + }) + + It("should place some manifests on member clusters", func() { + Eventually(func() error { + cm := &corev1.ConfigMap{} + if err := memberCluster1EastProdClient.Get(ctx, types.NamespacedName{Name: wrappedCMName2, Namespace: workNamespaceName}, cm); err != nil { + return err + } + + wantCM := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: wrappedCMName2, + Namespace: workNamespaceName, + }, + Data: map[string]string{ + cmDataKey: cmDataVal2, + }, + } + // Rebuild the configMap for ease of comparison. + rebuiltGotCM := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: cm.Name, + Namespace: cm.Namespace, + }, + Data: cm.Data, + } + + if diff := cmp.Diff(wantCM, rebuiltGotCM); diff != "" { + return fmt.Errorf("configMap diff (-got, +want): %s", diff) + } + return nil + }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to apply the configMap object #1") + }) + + AfterAll(func() { + // Remove the ResourcePlacement from the hub cluster. + ensureRPAndRelatedResourcesDeleted(types.NamespacedName{Name: rpName, Namespace: workNamespaceName}, []*framework.Cluster{memberCluster1EastProd}) + }) + }) + + Context("pre-processing failure in report diff mode (decoding errors)", Ordered, func() { + BeforeAll(func() { + // Create an envelope resource to wrap the configMaps. + resourceEnvelope := &placementv1beta1.ResourceEnvelope{ + ObjectMeta: metav1.ObjectMeta{ + Name: envelopeName, + Namespace: workNamespaceName, + }, + Data: map[string]runtime.RawExtension{}, + } + + // Create a malformed config map as a wrapped resource. + badConfigMap := &corev1.ConfigMap{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "malformed/v10", + Kind: "Unknown", + }, + ObjectMeta: metav1.ObjectMeta{ + Namespace: workNamespaceName, + Name: wrappedCMName1, + }, + Data: map[string]string{ + cmDataKey: cmDataVal1, + }, + } + badCMBytes, err := json.Marshal(badConfigMap) + Expect(err).To(BeNil(), "Failed to marshal configMap %s", badConfigMap.Name) + resourceEnvelope.Data["cm1.yaml"] = runtime.RawExtension{Raw: badCMBytes} + Expect(hubClient.Create(ctx, resourceEnvelope)).To(Succeed(), "Failed to create resource envelope %s", resourceEnvelope.Name) + + // Create a ResourcePlacement. + rp := &placementv1beta1.ResourcePlacement{ + ObjectMeta: metav1.ObjectMeta{ + Name: rpName, + Namespace: workNamespaceName, + // Add a custom finalizer; this would allow us to better observe + // the behavior of the controllers. + Finalizers: []string{customDeletionBlockerFinalizer}, + }, + Spec: placementv1beta1.PlacementSpec{ + ResourceSelectors: []placementv1beta1.ResourceSelectorTerm{ + { + Group: placementv1beta1.GroupVersion.Group, + Kind: placementv1beta1.ResourceEnvelopeKind, + Version: placementv1beta1.GroupVersion.Version, + Name: envelopeName, + }, + }, + Policy: &placementv1beta1.PlacementPolicy{ + PlacementType: placementv1beta1.PickFixedPlacementType, + ClusterNames: []string{ + memberCluster1EastProdName, + }, + }, + Strategy: placementv1beta1.RolloutStrategy{ + Type: placementv1beta1.RollingUpdateRolloutStrategyType, + RollingUpdate: &placementv1beta1.RollingUpdateConfig{ + UnavailablePeriodSeconds: ptr.To(2), + }, + ApplyStrategy: &placementv1beta1.ApplyStrategy{ + Type: placementv1beta1.ApplyStrategyTypeReportDiff, + }, + }, + }, + } + Expect(hubClient.Create(ctx, rp)).To(Succeed(), "Failed to create ResourcePlacement") + }) + + It("should update ResourcePlacement status as expected", func() { + Eventually(func() error { + rp := &placementv1beta1.ResourcePlacement{} + if err := hubClient.Get(ctx, types.NamespacedName{Name: rpName, Namespace: workNamespaceName}, rp); err != nil { + return err + } + + wantStatus := placementv1beta1.PlacementStatus{ + Conditions: rpDiffReportingFailedConditions(rp.Generation, false), + PerClusterPlacementStatuses: []placementv1beta1.PerClusterPlacementStatus{ + { + ClusterName: memberCluster1EastProdName, + ObservedResourceIndex: "0", + Conditions: perClusterDiffReportingFailedConditions(rp.Generation), + }, + }, + SelectedResources: []placementv1beta1.ResourceIdentifier{ + { + Group: placementv1beta1.GroupVersion.Group, + Kind: placementv1beta1.ResourceEnvelopeKind, + Version: placementv1beta1.GroupVersion.Version, + Name: envelopeName, + Namespace: workNamespaceName, + }, + }, + ObservedResourceIndex: "0", + } + if diff := cmp.Diff(rp.Status, wantStatus, placementStatusCmpOptions...); diff != "" { + return fmt.Errorf("ResourcePlacement status diff (-got, +want): %s", diff) + } + return nil + }, eventuallyDuration*20, eventuallyInterval).Should(Succeed(), "Failed to update ResourcePlacement status as expected") + }) + + It("should not apply any resource", func() { + Consistently(func() error { + cm := &corev1.ConfigMap{} + if err := memberCluster1EastProdClient.Get(ctx, types.NamespacedName{Name: wrappedCMName1, Namespace: workNamespaceName}, cm); !errors.IsNotFound(err) { + return fmt.Errorf("the config map exists, or an unexpected error has occurred: %w", err) + } + return nil + }, consistentlyDuration, consistentlyInterval).Should(Succeed(), "The malformed configMap has been applied unexpectedly") + }) + + AfterAll(func() { + // Remove the ResourcePlacement from the hub cluster. + ensureRPAndRelatedResourcesDeleted(types.NamespacedName{Name: rpName, Namespace: workNamespaceName}, []*framework.Cluster{memberCluster1EastProd}) + }) + }) + + Context("pre-processing failure in apply ops (duplicated)", Ordered, func() { + BeforeAll(func() { + // Use an envelope to create duplicate resource entries. + // Create an envelope resource to wrap the configMaps. + resourceEnvelope := &placementv1beta1.ResourceEnvelope{ + ObjectMeta: metav1.ObjectMeta{ + Name: envelopeName, + Namespace: workNamespaceName, + }, + Data: map[string]runtime.RawExtension{}, + } + + // Create configMaps as wrapped resources. + configMap := &corev1.ConfigMap{ + TypeMeta: metav1.TypeMeta{ + APIVersion: corev1.SchemeGroupVersion.String(), + Kind: "ConfigMap", + }, + ObjectMeta: metav1.ObjectMeta{ + Namespace: workNamespaceName, + Name: wrappedCMName1, + }, + Data: map[string]string{ + cmDataKey: cmDataVal1, + }, + } + // Prepare a regular config map and a duplicate. + wrappedCM := configMap.DeepCopy() + wrappedCM.Name = wrappedCMName1 + wrappedCM.Data[cmDataKey] = cmDataVal1 + wrappedCMBytes, err := json.Marshal(wrappedCM) + Expect(err).To(BeNil(), "Failed to marshal configMap %s", wrappedCM.Name) + resourceEnvelope.Data["cm1.yaml"] = runtime.RawExtension{Raw: wrappedCMBytes} + + // Note: due to how work generator sorts manifests, the CM below will actually be + // applied first. + duplicatedCM := configMap.DeepCopy() + duplicatedCM.Name = wrappedCMName1 + duplicatedCM.Data[cmDataKey] = cmDataVal2 + duplicatedCMBytes, err := json.Marshal(duplicatedCM) + Expect(err).To(BeNil(), "Failed to marshal configMap %s", duplicatedCM.Name) + resourceEnvelope.Data["cm2.yaml"] = runtime.RawExtension{Raw: duplicatedCMBytes} + + Expect(hubClient.Create(ctx, resourceEnvelope)).To(Succeed(), "Failed to create resource envelope %s", resourceEnvelope.Name) + + // Create a ResourcePlacement. + rp := &placementv1beta1.ResourcePlacement{ + ObjectMeta: metav1.ObjectMeta{ + Name: rpName, + Namespace: workNamespaceName, + // Add a custom finalizer; this would allow us to better observe + // the behavior of the controllers. + Finalizers: []string{customDeletionBlockerFinalizer}, + }, + Spec: placementv1beta1.PlacementSpec{ + ResourceSelectors: []placementv1beta1.ResourceSelectorTerm{ + { + Group: placementv1beta1.GroupVersion.Group, + Kind: placementv1beta1.ResourceEnvelopeKind, + Version: placementv1beta1.GroupVersion.Version, + Name: envelopeName, + }, + }, + Policy: &placementv1beta1.PlacementPolicy{ + PlacementType: placementv1beta1.PickFixedPlacementType, + ClusterNames: []string{ + memberCluster1EastProdName, + }, + }, + Strategy: placementv1beta1.RolloutStrategy{ + Type: placementv1beta1.RollingUpdateRolloutStrategyType, + RollingUpdate: &placementv1beta1.RollingUpdateConfig{ + UnavailablePeriodSeconds: ptr.To(2), + }, + }, + }, + } + Expect(hubClient.Create(ctx, rp)).To(Succeed(), "Failed to create ResourcePlacement") + }) + + It("should update ResourcePlacement status as expected", func() { + rpStatusUpdatedActual := func() error { + rp := &placementv1beta1.ResourcePlacement{} + if err := hubClient.Get(ctx, types.NamespacedName{Name: rpName, Namespace: workNamespaceName}, rp); err != nil { + return err + } + + wantStatus := placementv1beta1.PlacementStatus{ + Conditions: rpAppliedFailedConditions(rp.Generation), + PerClusterPlacementStatuses: []placementv1beta1.PerClusterPlacementStatus{ + { + ClusterName: memberCluster1EastProdName, + ObservedResourceIndex: "0", + FailedPlacements: []placementv1beta1.FailedResourcePlacement{ + { + ResourceIdentifier: placementv1beta1.ResourceIdentifier{ + Group: "", + Version: "v1", + Kind: "ConfigMap", + Namespace: workNamespaceName, + Name: wrappedCMName1, + Envelope: &placementv1beta1.EnvelopeIdentifier{ + Name: envelopeName, + Namespace: workNamespaceName, + Type: placementv1beta1.ResourceEnvelopeType, + }, + }, + Condition: metav1.Condition{ + Type: placementv1beta1.WorkConditionTypeApplied, + Status: metav1.ConditionFalse, + Reason: string(workapplier.ApplyOrReportDiffResTypeDuplicated), + ObservedGeneration: 0, + }, + }, + }, + Conditions: perClusterApplyFailedConditions(rp.Generation), + }, + }, + SelectedResources: []placementv1beta1.ResourceIdentifier{ + { + Group: placementv1beta1.GroupVersion.Group, + Kind: placementv1beta1.ResourceEnvelopeKind, + Version: placementv1beta1.GroupVersion.Version, + Name: envelopeName, + Namespace: workNamespaceName, + }, + }, + ObservedResourceIndex: "0", + } + if diff := cmp.Diff(rp.Status, wantStatus, placementStatusCmpOptions...); diff != "" { + return fmt.Errorf("ResourcePlacement status diff (-got, +want): %s", diff) + } + return nil + } + Eventually(rpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update ResourcePlacement status as expected") + Consistently(rpStatusUpdatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "ResourcePlacement status has changed unexpectedly") + }) + + It("should place the other manifests on member clusters", func() { + Eventually(func() error { + cm := &corev1.ConfigMap{} + if err := memberCluster1EastProdClient.Get(ctx, types.NamespacedName{Name: wrappedCMName1, Namespace: workNamespaceName}, cm); err != nil { + return err + } + + wantCM := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: wrappedCMName1, + Namespace: workNamespaceName, + }, + Data: map[string]string{ + cmDataKey: cmDataVal2, + }, + } + // Rebuild the configMap for ease of comparison. + rebuiltGotCM := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: cm.Name, + Namespace: cm.Namespace, + }, + Data: cm.Data, + } + + if diff := cmp.Diff(rebuiltGotCM, wantCM); diff != "" { + return fmt.Errorf("configMap diff (-got, +want): %s", diff) + } + return nil + }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to apply the configMap object #1") + }) + + AfterAll(func() { + // Remove the ResourcePlacement from the hub cluster. + ensureRPAndRelatedResourcesDeleted(types.NamespacedName{Name: rpName, Namespace: workNamespaceName}, []*framework.Cluster{memberCluster1EastProd}) + }) + }) +})