Merge pull request #6474 from zhzhuang-zju/binding

karmada-bot · web-flow · commit 0b75a70173ee · 2025-06-25T10:19:01.000+08:00
Fix workload scale up bypass FederatedResourceQuota check issue
diff --git a/pkg/controllers/binding/common.go b/pkg/controllers/binding/common.go
@@ -48,26 +48,30 @@ func ensureWork(
 ) error {
 	bindingSpec := getBindingSpec(binding, scope)
 	targetClusters := mergeTargetClusters(bindingSpec.Clusters, bindingSpec.RequiredBy)
+	var err error
+	var errs []error
 
 	var jobCompletions []workv1alpha2.TargetCluster
-	var err error
-	if workload.GetKind() == util.JobKind {
+	if workload.GetKind() == util.JobKind && needReviseJobCompletions(bindingSpec.Replicas, bindingSpec.Placement) {
 		jobCompletions, err = divideReplicasByJobCompletions(workload, targetClusters)
 		if err != nil {
 			return err
 		}
 	}
 
-	var errs []error
 	for i := range targetClusters {
 		targetCluster := targetClusters[i]
 		clonedWorkload := workload.DeepCopy()
 
 		workNamespace := names.GenerateExecutionSpaceName(targetCluster.Name)
 
-		// If and only if the resource template has replicas, and the replica scheduling policy is divided,
-		// we need to revise replicas.
-		if needReviseReplicas(bindingSpec.Replicas, bindingSpec.Placement) {
+		// When syncing workloads to member clusters, the controller MUST strictly adhere to the scheduling results
+		// specified in bindingSpec.Clusters for replica allocation, rather than using the replicas declared in the
+		// workload's resource template.
+		// This rule applies regardless of whether the workload distribution mode is "Divided" or "Duplicated".
+		// Failing to do so could allow workloads to bypass the quota checks performed by the scheduler
+		// (especially during scale-up operations) or skip queue validation when scheduling is suspended.
+		if needReviseReplicas(bindingSpec.Replicas) {
 			if resourceInterpreter.HookEnabled(clonedWorkload.GroupVersionKind(), configv1alpha1.InterpreterOperationReviseReplica) {
 				clonedWorkload, err = resourceInterpreter.ReviseReplica(clonedWorkload, int64(targetCluster.Replicas))
 				if err != nil {
@@ -77,18 +81,22 @@ func ensureWork(
 					continue
 				}
 			}
+		}
 
+		// jobSpec.Completions specifies the desired number of successfully finished pods the job should be run with.
+		// When the replica scheduling policy is set to "divided", jobSpec.Completions should also be divided accordingly.
+		// The weight assigned to each cluster roughly equals that cluster's jobSpec.Parallelism value. This approach helps
+		// balance the execution time of the job across member clusters.
+		if len(jobCompletions) > 0 {
 			// Set allocated completions for Job only when the '.spec.completions' field not omitted from resource template.
 			// For jobs running with a 'work queue' usually leaves '.spec.completions' unset, in that case we skip
 			// setting this field as well.
 			// Refer to: https://kubernetes.io/docs/concepts/workloads/controllers/job/#parallel-jobs.
-			if len(jobCompletions) > 0 {
-				if err = helper.ApplyReplica(clonedWorkload, int64(jobCompletions[i].Replicas), util.CompletionsField); err != nil {
-					klog.Errorf("Failed to apply Completions for %s/%s/%s in cluster %s, err is: %v",
-						clonedWorkload.GetKind(), clonedWorkload.GetNamespace(), clonedWorkload.GetName(), targetCluster.Name, err)
-					errs = append(errs, err)
-					continue
-				}
+			if err = helper.ApplyReplica(clonedWorkload, int64(jobCompletions[i].Replicas), util.CompletionsField); err != nil {
+				klog.Errorf("Failed to apply Completions for %s/%s/%s in cluster %s, err is: %v",
+					clonedWorkload.GetKind(), clonedWorkload.GetNamespace(), clonedWorkload.GetName(), targetCluster.Name, err)
+				errs = append(errs, err)
+				continue
 			}
 		}
 
@@ -137,10 +145,8 @@ func ensureWork(
 			continue
 		}
 	}
-	if len(errs) > 0 {
-		return errors.NewAggregate(errs)
-	}
-	return nil
+
+	return errors.NewAggregate(errs)
 }
 
 func getBindingSpec(binding metav1.Object, scope apiextensionsv1.ResourceScope) workv1alpha2.ResourceBindingSpec {
@@ -312,7 +318,11 @@ func divideReplicasByJobCompletions(workload *unstructured.Unstructured, cluster
 	return targetClusters, nil
 }
 
-func needReviseReplicas(replicas int32, placement *policyv1alpha1.Placement) bool {
+func needReviseReplicas(replicas int32) bool {
+	return replicas > 0
+}
+
+func needReviseJobCompletions(replicas int32, placement *policyv1alpha1.Placement) bool {
 	return replicas > 0 && placement != nil && placement.ReplicaSchedulingType() == policyv1alpha1.ReplicaSchedulingTypeDivided
 }
 
diff --git a/pkg/controllers/binding/common_test.go b/pkg/controllers/binding/common_test.go
@@ -382,6 +382,34 @@ func Test_shouldSuspendDispatching(t *testing.T) {
 }
 
 func Test_needReviseReplicas(t *testing.T) {
+	tests := []struct {
+		name      string
+		replicas  int32
+		placement *policyv1alpha1.Placement
+		want      bool
+	}{
+		{
+			name:     "replicas is zero",
+			replicas: 0,
+			want:     false,
+		},
+		{
+			name:     "replicas is greater than zero",
+			replicas: 1,
+			want:     true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := needReviseReplicas(tt.replicas); got != tt.want {
+				t.Errorf("needReviseReplicas() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
+
+func Test_needReviseJobCompletions(t *testing.T) {
 	tests := []struct {
 		name      string
 		replicas  int32
@@ -428,8 +456,8 @@ func Test_needReviseReplicas(t *testing.T) {
 
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			if got := needReviseReplicas(tt.replicas, tt.placement); got != tt.want {
-				t.Errorf("needReviseReplicas() = %v, want %v", got, tt.want)
+			if got := needReviseJobCompletions(tt.replicas, tt.placement); got != tt.want {
+				t.Errorf("needReviseJobCompletions() = %v, want %v", got, tt.want)
 			}
 		})
 	}
diff --git a/test/e2e/suites/base/federatedresourcequota_test.go b/test/e2e/suites/base/federatedresourcequota_test.go
@@ -28,6 +28,7 @@ import (
 	appsv1 "k8s.io/api/apps/v1"
 	corev1 "k8s.io/api/core/v1"
 	apierrors "k8s.io/apimachinery/pkg/api/errors"
+	"k8s.io/apimachinery/pkg/api/meta"
 	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/types"
@@ -275,6 +276,9 @@ var _ = ginkgo.Describe("FederatedResourceQuota enforcement testing", func() {
 		deployNamespace = fmt.Sprintf("karmadatest-%s", rand.String(RandomStrLength))
 		err := setupTestNamespace(deployNamespace, kubeClient)
 		gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
+		ginkgo.DeferCleanup(func() {
+			framework.RemoveNamespace(kubeClient, deployNamespace)
+		})
 		frqNamespace = deployNamespace
 		frqName = federatedResourceQuotaPrefix + rand.String(RandomStrLength)
 		clusterNames = framework.ClusterNames()[:1]
@@ -417,11 +421,62 @@ var _ = ginkgo.Describe("FederatedResourceQuota enforcement testing", func() {
 				framework.WaitEventFitWith(kubeClient, newDeploymentNamespace, newRB, func(event corev1.Event) bool {
 					return event.Reason == events.EventReasonScheduleBindingFailed && strings.Contains(event.Message, admissionWebhookDenyMsgPrefix)
 				})
+				framework.WaitEventFitWith(kubeClient, newDeploymentNamespace, newDeploymentName, func(event corev1.Event) bool {
+					return event.Reason == events.EventReasonScheduleBindingFailed && strings.Contains(event.Message, admissionWebhookDenyMsgPrefix)
+				})
+
+				gomega.Eventually(func() bool {
+					rb, err := karmadaClient.WorkV1alpha2().ResourceBindings(newDeploymentNamespace).Get(context.TODO(), newRB, metav1.GetOptions{})
+					if err != nil {
+						return false
+					}
+					return rb != nil && meta.IsStatusConditionPresentAndEqual(rb.Status.Conditions, workv1alpha2.Scheduled, metav1.ConditionFalse)
+				}, pollTimeout, pollInterval).Should(gomega.Equal(true))
 				framework.WaitResourceBindingFitWith(karmadaClient, newDeploymentNamespace, newRB, func(resourceBinding *workv1alpha2.ResourceBinding) bool {
 					return resourceBinding.Spec.Clusters == nil
 				})
 				framework.WaitDeploymentDisappearOnClusters(clusterNames, newDeploymentNamespace, newDeploymentName)
 			})
 		})
+
+		ginkgo.It("When the quota is insufficient, scaling up will be blocked.", func() {
+			ginkgo.By("update the replicas of the deployment", func() {
+				mutateFunc := func(deploy *appsv1.Deployment) {
+					deploy.Spec.Replicas = ptr.To[int32](2)
+				}
+
+				framework.UpdateDeploymentWith(kubeClient, deploymentNamespace, deploymentName, mutateFunc)
+			})
+
+			ginkgo.By("the quota has been exceed, so the update request for the spec.clusters in the resourcebinding will be intercepted.", func() {
+				framework.WaitEventFitWith(kubeClient, rbNamespace, rbName, func(event corev1.Event) bool {
+					return event.Reason == events.EventReasonScheduleBindingFailed && strings.Contains(event.Message, admissionWebhookDenyMsgPrefix)
+				})
+				framework.WaitEventFitWith(kubeClient, deploymentNamespace, deploymentName, func(event corev1.Event) bool {
+					return event.Reason == events.EventReasonScheduleBindingFailed && strings.Contains(event.Message, admissionWebhookDenyMsgPrefix)
+				})
+
+				gomega.Eventually(func() bool {
+					rb, err := karmadaClient.WorkV1alpha2().ResourceBindings(rbNamespace).Get(context.TODO(), rbName, metav1.GetOptions{})
+					if err != nil {
+						return false
+					}
+					return rb != nil && meta.IsStatusConditionPresentAndEqual(rb.Status.Conditions, workv1alpha2.Scheduled, metav1.ConditionFalse)
+				}, pollTimeout, pollInterval).Should(gomega.Equal(true))
+			})
+
+			ginkgo.By("The spec.clusters of resourcebinding was not updated.", func() {
+				rb, err := karmadaClient.WorkV1alpha2().ResourceBindings(rbNamespace).Get(context.TODO(), rbName, metav1.GetOptions{})
+				gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
+				for i := range rb.Spec.Clusters {
+					gomega.Expect(rb.Spec.Clusters[i].Replicas).Should(gomega.Equal(int32(1)))
+				}
+
+				time.Sleep(waitTimeout)
+				framework.WaitDeploymentPresentOnClustersFitWith(clusterNames, deploymentNamespace, deploymentName, func(deployment *appsv1.Deployment) bool {
+					return *deployment.Spec.Replicas == 1
+				})
+			})
+		})
 	})
 })
diff --git a/test/e2e/suites/base/scheduling_test.go b/test/e2e/suites/base/scheduling_test.go
@@ -262,6 +262,11 @@ var _ = ginkgo.Describe("propagation with label and group constraints testing",
 			jobNamespace = testNamespace
 			jobName = policyName
 			job = helper.NewJob(jobNamespace, jobName)
+			// For fixed completion count Jobs, the actual number of pods running in parallel will not exceed the number of remaining completions.
+			// Higher values of .spec.parallelism are effectively ignored.
+			// Since .spec.parallelism will be updated to updateParallelism in the subsequent testing, .spec.completions is set to updateParallelism here to make the update of .spec.parallelism take effect.
+			// More info: https://kubernetes.io/docs/concepts/workloads/controllers/job/
+			job.Spec.Completions = ptr.To[int32](updateParallelism)
 			maxGroups = rand.Intn(2) + 1
 			minGroups = maxGroups