[FLINK-36527][autoscaler] Refactor JobVertexScaler to implement the parallelism adjustment logic in a separate component

huyuanfeng · huyuanfeng · commit d22c463bf612 · 2024-11-13T19:00:09.000+08:00
diff --git a/flink-autoscaler/src/main/java/org/apache/flink/autoscaler/JobVertexScaler.java b/flink-autoscaler/src/main/java/org/apache/flink/autoscaler/JobVertexScaler.java
@@ -53,7 +53,6 @@
 import static org.apache.flink.autoscaler.metrics.ScalingMetric.NUM_SOURCE_PARTITIONS;
 import static org.apache.flink.autoscaler.metrics.ScalingMetric.PARALLELISM;
 import static org.apache.flink.autoscaler.metrics.ScalingMetric.TRUE_PROCESSING_RATE;
-import static org.apache.flink.autoscaler.topology.ShipStrategy.HASH;
 import static org.apache.flink.util.Preconditions.checkArgument;
 
 /** Component responsible for computing vertex parallelism based on the scaling metrics. */
@@ -355,12 +354,9 @@ private boolean detectIneffectiveScaleUp(
      * But we limit newParallelism between parallelismLowerLimit and min(parallelismUpperLimit,
      * maxParallelism).
      *
-     * <p>Also, in order to ensure the data is evenly spread across subtasks, we try to adjust the
-     * parallelism for source and keyed vertex such that it divides the maxParallelism without a
-     * remainder.
-     *
-     * <p>This method also attempts to adjust the parallelism to ensure it aligns well with the
-     * number of source partitions if a vertex has a known source partition count.
+     * <p>Also, if we know the number of partitions or key groups corresponding to the current
+     * vertex, the parallelism will be adjusted accordingly. For specific logic, please refer to
+     * {@link ParallelismAdjuster}.
      */
     @VisibleForTesting
     protected static <KEY, Context extends JobAutoScalerContext<KEY>> int scale(
@@ -403,62 +399,16 @@ protected static <KEY, Context extends JobAutoScalerContext<KEY>> int scale(
         // Apply min/max parallelism
         newParallelism = Math.min(Math.max(parallelismLowerLimit, newParallelism), upperBound);
 
-        var adjustByMaxParallelismOrPartitions =
-                numSourcePartitions > 0 || inputShipStrategies.contains(HASH);
-        if (!adjustByMaxParallelismOrPartitions) {
-            return newParallelism;
-        }
-
-        var numKeyGroupsOrPartitions =
-                numSourcePartitions <= 0 ? maxParallelism : numSourcePartitions;
-        var upperBoundForAlignment =
-                Math.min(
-                        // Optimize the case where newParallelism <= maxParallelism / 2
-                        newParallelism > numKeyGroupsOrPartitions / 2
-                                ? numKeyGroupsOrPartitions
-                                : numKeyGroupsOrPartitions / 2,
-                        upperBound);
-
-        // When the shuffle type of vertex inputs contains keyBy or vertex is a source,
-        // we try to adjust the parallelism such that it divides
-        // the numKeyGroupsOrPartitions without a remainder => data is evenly spread across subtasks
-        for (int p = newParallelism; p <= upperBoundForAlignment; p++) {
-            if (numKeyGroupsOrPartitions % p == 0) {
-                return p;
-            }
-        }
-
-        // When adjust the parallelism after rounding up cannot be evenly divided by
-        // numKeyGroupsOrPartitions, Try to find the smallest parallelism that can satisfy the
-        // current consumption rate.
-        int p = newParallelism;
-        for (; p > 0; p--) {
-            if (numKeyGroupsOrPartitions / p > numKeyGroupsOrPartitions / newParallelism) {
-                if (numKeyGroupsOrPartitions % p != 0) {
-                    p++;
-                }
-                break;
-            }
-        }
-
-        p = Math.max(p, parallelismLowerLimit);
-        var message =
-                String.format(
-                        SCALE_LIMITED_MESSAGE_FORMAT,
-                        vertex,
-                        newParallelism,
-                        p,
-                        numKeyGroupsOrPartitions,
-                        upperBound,
-                        parallelismLowerLimit);
-        eventHandler.handleEvent(
+        return ParallelismAdjuster.adjust(
+                vertex,
                 context,
-                AutoScalerEventHandler.Type.Warning,
-                SCALING_LIMITED,
-                message,
-                SCALING_LIMITED + vertex + (scaleFactor * currentParallelism),
-                context.getConfiguration().get(SCALING_EVENT_INTERVAL));
-        return p;
+                eventHandler,
+                maxParallelism,
+                numSourcePartitions,
+                newParallelism,
+                upperBound,
+                parallelismLowerLimit,
+                inputShipStrategies);
     }
 
     @VisibleForTesting
diff --git a/flink-autoscaler/src/main/java/org/apache/flink/autoscaler/ParallelismAdjuster.java b/flink-autoscaler/src/main/java/org/apache/flink/autoscaler/ParallelismAdjuster.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.autoscaler;
+
+import org.apache.flink.autoscaler.event.AutoScalerEventHandler;
+import org.apache.flink.autoscaler.topology.ShipStrategy;
+import org.apache.flink.runtime.jobgraph.JobVertexID;
+
+import java.util.Collection;
+
+import static org.apache.flink.autoscaler.JobVertexScaler.SCALE_LIMITED_MESSAGE_FORMAT;
+import static org.apache.flink.autoscaler.JobVertexScaler.SCALING_LIMITED;
+import static org.apache.flink.autoscaler.config.AutoScalerOptions.SCALING_EVENT_INTERVAL;
+import static org.apache.flink.autoscaler.topology.ShipStrategy.HASH;
+
+/**
+ * Component responsible adjusts the parallelism of a vertex.
+ *
+ * <p>When input vertex {@link ShipStrategy} is {@link ShipStrategy#HASH} or knows the number of
+ * current partitions of vertex. We hope to adjust the parallelism of the current vertex according
+ * to the number of key groups or partitions to achieve the goal of evenly distributing data among
+ * subtasks or maximizing utilization.
+ */
+public class ParallelismAdjuster {
+
+    public static <KEY, Context extends JobAutoScalerContext<KEY>> int adjust(
+            JobVertexID vertex,
+            Context context,
+            AutoScalerEventHandler<KEY, Context> eventHandler,
+            int maxParallelism,
+            int numSourcePartitions,
+            int newParallelism,
+            int upperBound,
+            int parallelismLowerLimit,
+            Collection<ShipStrategy> inputShipStrategies) {
+
+        var adjustByMaxParallelismOrPartitions =
+                numSourcePartitions > 0 || inputShipStrategies.contains(HASH);
+        if (!adjustByMaxParallelismOrPartitions) {
+            return newParallelism;
+        }
+
+        var numKeyGroupsOrPartitions =
+                numSourcePartitions <= 0 ? maxParallelism : numSourcePartitions;
+        var upperBoundForAlignment =
+                Math.min(
+                        // Optimize the case where newParallelism <= maxParallelism / 2
+                        newParallelism > numKeyGroupsOrPartitions / 2
+                                ? numKeyGroupsOrPartitions
+                                : numKeyGroupsOrPartitions / 2,
+                        upperBound);
+
+        // When the shuffle type of vertex inputs contains keyBy or vertex is a source,
+        // we try to adjust the parallelism such that it divides
+        // the numKeyGroupsOrPartitions without a remainder => data is evenly spread across subtasks
+        for (int p = newParallelism; p <= upperBoundForAlignment; p++) {
+            if (numKeyGroupsOrPartitions % p == 0) {
+                return p;
+            }
+        }
+
+        // When adjust the parallelism after rounding up cannot be evenly divided by
+        // numKeyGroupsOrPartitions, Try to find the smallest parallelism that can satisfy the
+        // current consumption rate.
+        int p = newParallelism;
+        for (; p > 0; p--) {
+            if (numKeyGroupsOrPartitions / p > numKeyGroupsOrPartitions / newParallelism) {
+                if (numKeyGroupsOrPartitions % p != 0) {
+                    p++;
+                }
+                break;
+            }
+        }
+
+        p = Math.max(p, parallelismLowerLimit);
+        var message =
+                String.format(
+                        SCALE_LIMITED_MESSAGE_FORMAT,
+                        vertex,
+                        newParallelism,
+                        p,
+                        numKeyGroupsOrPartitions,
+                        upperBound,
+                        parallelismLowerLimit);
+        eventHandler.handleEvent(
+                context,
+                AutoScalerEventHandler.Type.Warning,
+                SCALING_LIMITED,
+                message,
+                SCALING_LIMITED + vertex + newParallelism,
+                context.getConfiguration().get(SCALING_EVENT_INTERVAL));
+        return p;
+    }
+}