apache
diff --git a/‎flink-autoscaler/src/main/java/org/apache/flink/autoscaler/DelayedScaleDown.java‎
Lines changed: 94 additions & 15 deletions b/‎flink-autoscaler/src/main/java/org/apache/flink/autoscaler/DelayedScaleDown.java‎
Lines changed: 94 additions & 15 deletions
diff --git a/‎flink-autoscaler/src/main/java/org/apache/flink/autoscaler/JobVertexScaler.java‎
Lines changed: 35 additions & 15 deletions b/‎flink-autoscaler/src/main/java/org/apache/flink/autoscaler/JobVertexScaler.java‎
Lines changed: 35 additions & 15 deletions
diff --git a/‎flink-autoscaler/src/main/java/org/apache/flink/autoscaler/ScalingExecutor.java‎
Lines changed: 35 additions & 22 deletions b/‎flink-autoscaler/src/main/java/org/apache/flink/autoscaler/ScalingExecutor.java‎
Lines changed: 35 additions & 22 deletions
@@ -30,23 +30,95 @@
 
 import java.time.Instant;
 import java.util.HashMap;
+import java.util.LinkedList;
 import java.util.Map;
 
+import static org.apache.flink.util.Preconditions.checkState;
+
 /** All delayed scale down requests. */
 public class DelayedScaleDown {
 
+    @Data
+    public static class RecommendedParallelism {
+        @Nonnull private final Instant triggerTime;
+        private final int parallelism;
+        private final boolean outsideUtilizationBound;
+
+        @JsonCreator
+        public RecommendedParallelism(
+                @Nonnull @JsonProperty("triggerTime") Instant triggerTime,
+                @JsonProperty("parallelism") int parallelism,
+                @JsonProperty("outsideUtilizationBound") boolean outsideUtilizationBound) {
+            this.triggerTime = triggerTime;
+            this.parallelism = parallelism;
+            this.outsideUtilizationBound = outsideUtilizationBound;
+        }
+    }
+
     /** The delayed scale down info for vertex. */
     @Data
     public static class VertexDelayedScaleDownInfo {
         private final Instant firstTriggerTime;
-        private int maxRecommendedParallelism;
+
+        /**
+         * In theory, it maintains all recommended parallelisms at each time within the past
+         * `scale-down.interval` window period, so all recommended parallelisms before the window
+         * start time will be evicted.
+         *
+         * <p>Also, if latest parallelism is greater than the past parallelism, all smaller
+         * parallelism in the past never be the max recommended parallelism, so we could evict all
+         * smaller parallelism in the past. It's a general optimization for calculating max value
+         * for sliding window. So we only need to maintain a list with monotonically decreasing
+         * parallelism within the past window, and the first parallelism will be the max recommended
+         * parallelism within the past `scale-down.interval` window period.
+         */
+        private final LinkedList<RecommendedParallelism> recommendedParallelisms;
+
+        public VertexDelayedScaleDownInfo(Instant firstTriggerTime) {
+            this.firstTriggerTime = firstTriggerTime;
+            this.recommendedParallelisms = new LinkedList<>();
+        }
 
         @JsonCreator
         public VertexDelayedScaleDownInfo(
                 @JsonProperty("firstTriggerTime") Instant firstTriggerTime,
-                @JsonProperty("maxRecommendedParallelism") int maxRecommendedParallelism) {
+                @JsonProperty("recommendedParallelisms")
+                        LinkedList<RecommendedParallelism> recommendedParallelisms) {
             this.firstTriggerTime = firstTriggerTime;
-            this.maxRecommendedParallelism = maxRecommendedParallelism;
+            this.recommendedParallelisms = recommendedParallelisms;
+        }
+
+        /** Record current recommended parallelism. */
+        public void recordRecommendedParallelism(
+                Instant triggerTime, int parallelism, boolean outsideUtilizationBound) {
+            // Evict all recommended parallelisms that are lower than or equal to the latest
+            // parallelism. When the past parallelism is equal to the latest parallelism,
+            // triggerTime needs to be updated, so it also needs to be evicted.
+            while (!recommendedParallelisms.isEmpty()
+                    && recommendedParallelisms.peekLast().getParallelism() <= parallelism) {
+                recommendedParallelisms.pollLast();
+            }
+
+            recommendedParallelisms.addLast(
+                    new RecommendedParallelism(triggerTime, parallelism, outsideUtilizationBound));
+        }
+
+        @JsonIgnore
+        public RecommendedParallelism getMaxRecommendedParallelism(Instant windowStartTime) {
+            // Evict all recommended parallelisms before the window start time.
+            while (!recommendedParallelisms.isEmpty()
+                    && recommendedParallelisms
+                            .peekFirst()
+                            .getTriggerTime()
+                            .isBefore(windowStartTime)) {
+                recommendedParallelisms.pollFirst();
+            }
+
+            var maxRecommendedParallelism = recommendedParallelisms.peekFirst();
+            checkState(
+                    maxRecommendedParallelism != null,
+                    "The getMaxRecommendedParallelism should be called after triggering a scale down, it may be a bug.");
+            return maxRecommendedParallelism;
         }
     }
 
@@ -60,21 +132,28 @@ public DelayedScaleDown() {
         this.delayedVertices = new HashMap<>();
     }
 
+    // TODO : remove this and refactor tests;
+    public VertexDelayedScaleDownInfo triggerScaleDown(
+            JobVertexID vertex, Instant triggerTime, int parallelism) {
+        return triggerScaleDown(vertex, triggerTime, parallelism, false);
+    }
+
     /** Trigger a scale down, and return the corresponding {@link VertexDelayedScaleDownInfo}. */
     @Nonnull
     public VertexDelayedScaleDownInfo triggerScaleDown(
-            JobVertexID vertex, Instant triggerTime, int parallelism) {
-        var vertexDelayedScaleDownInfo = delayedVertices.get(vertex);
-        if (vertexDelayedScaleDownInfo == null) {
-            // It's the first trigger
-            vertexDelayedScaleDownInfo = new VertexDelayedScaleDownInfo(triggerTime, parallelism);
-            delayedVertices.put(vertex, vertexDelayedScaleDownInfo);
-            updated = true;
-        } else if (parallelism > vertexDelayedScaleDownInfo.getMaxRecommendedParallelism()) {
-            // Not the first trigger, but the maxRecommendedParallelism needs to be updated.
-            vertexDelayedScaleDownInfo.setMaxRecommendedParallelism(parallelism);
-            updated = true;
-        }
+            JobVertexID vertex,
+            Instant triggerTime,
+            int parallelism,
+            boolean outsideUtilizationBound) {
+        // The vertexDelayedScaleDownInfo is updated once scale down is triggered due to we need
+        // update the triggerTime each time.
+        updated = true;
+
+        var vertexDelayedScaleDownInfo =
+                delayedVertices.computeIfAbsent(
+                        vertex, k -> new VertexDelayedScaleDownInfo(triggerTime));
+        vertexDelayedScaleDownInfo.recordRecommendedParallelism(
+                triggerTime, parallelism, outsideUtilizationBound);
 
         return vertexDelayedScaleDownInfo;
     }
 
@@ -44,6 +44,7 @@
 import java.util.SortedMap;
 
 import static org.apache.flink.autoscaler.JobVertexScaler.KeyGroupOrPartitionsAdjustMode.MAXIMIZE_UTILISATION;
+import static org.apache.flink.autoscaler.ScalingExecutor.outsideUtilizationBound;
 import static org.apache.flink.autoscaler.config.AutoScalerOptions.MAX_SCALE_DOWN_FACTOR;
 import static org.apache.flink.autoscaler.config.AutoScalerOptions.MAX_SCALE_UP_FACTOR;
 import static org.apache.flink.autoscaler.config.AutoScalerOptions.SCALE_DOWN_INTERVAL;
@@ -92,12 +93,15 @@ public JobVertexScaler(AutoScalerEventHandler<KEY, Context> autoScalerEventHandl
     @Getter
     public static class ParallelismChange {
 
-        private static final ParallelismChange NO_CHANGE = new ParallelismChange(-1);
+        private static final ParallelismChange NO_CHANGE = new ParallelismChange(-1, false);
 
         private final int newParallelism;
 
-        private ParallelismChange(int newParallelism) {
+        private final boolean outsideUtilizationBound;
+
+        private ParallelismChange(int newParallelism, boolean outsideUtilizationBound) {
             this.newParallelism = newParallelism;
+            this.outsideUtilizationBound = outsideUtilizationBound;
         }
 
         public boolean isNoChange() {
@@ -113,24 +117,29 @@ public boolean equals(Object o) {
                 return false;
             }
             ParallelismChange that = (ParallelismChange) o;
-            return newParallelism == that.newParallelism;
+            return newParallelism == that.newParallelism
+                    && outsideUtilizationBound == that.outsideUtilizationBound;
         }
 
         @Override
         public int hashCode() {
-            return Objects.hash(newParallelism);
+            return Objects.hash(newParallelism, outsideUtilizationBound);
         }
 
         @Override
         public String toString() {
             return isNoChange()
                     ? "NoParallelismChange"
-                    : "ParallelismChange{newParallelism=" + newParallelism + '}';
+                    : "ParallelismChange{newParallelism="
+                            + newParallelism
+                            + ", outsideUtilizationBound="
+                            + outsideUtilizationBound
+                            + "}";
         }
 
-        public static ParallelismChange build(int newParallelism) {
+        public static ParallelismChange build(int newParallelism, boolean outsideUtilizationBound) {
             checkArgument(newParallelism > 0, "The parallelism should be greater than 0.");
-            return new ParallelismChange(newParallelism);
+            return new ParallelismChange(newParallelism, outsideUtilizationBound);
         }
 
         public static ParallelismChange noChange() {
@@ -239,6 +248,8 @@ private ParallelismChange detectBlockScaling(
                 currentParallelism != newParallelism,
                 "The newParallelism is equal to currentParallelism, no scaling is needed. This is probably a bug.");
 
+        var outsideUtilizationBound = outsideUtilizationBound(vertex, evaluatedMetrics);
+
         var scaledUp = currentParallelism < newParallelism;
 
         if (scaledUp) {
@@ -248,7 +259,7 @@ private ParallelismChange detectBlockScaling(
 
             // If we don't have past scaling actions for this vertex, don't block scale up.
             if (history.isEmpty()) {
-                return ParallelismChange.build(newParallelism);
+                return ParallelismChange.build(newParallelism, outsideUtilizationBound);
             }
 
             var lastSummary = history.get(history.lastKey());
@@ -260,28 +271,33 @@ && detectIneffectiveScaleUp(
                 return ParallelismChange.noChange();
             }
 
-            return ParallelismChange.build(newParallelism);
+            return ParallelismChange.build(newParallelism, outsideUtilizationBound);
         } else {
-            return applyScaleDownInterval(delayedScaleDown, vertex, conf, newParallelism);
+            return applyScaleDownInterval(
+                    delayedScaleDown, vertex, conf, newParallelism, outsideUtilizationBound);
         }
     }
 
     private ParallelismChange applyScaleDownInterval(
             DelayedScaleDown delayedScaleDown,
             JobVertexID vertex,
             Configuration conf,
-            int newParallelism) {
+            int newParallelism,
+            boolean outsideUtilizationBound) {
         var scaleDownInterval = conf.get(SCALE_DOWN_INTERVAL);
         if (scaleDownInterval.toMillis() <= 0) {
             // The scale down interval is disable, so don't block scaling.
-            return ParallelismChange.build(newParallelism);
+            return ParallelismChange.build(newParallelism, outsideUtilizationBound);
         }
 
         var now = clock.instant();
-        var delayedScaleDownInfo = delayedScaleDown.triggerScaleDown(vertex, now, newParallelism);
+        var windowStartTime = now.minus(scaleDownInterval);
+        var delayedScaleDownInfo =
+                delayedScaleDown.triggerScaleDown(
+                        vertex, now, newParallelism, outsideUtilizationBound);
 
         // Never scale down within scale down interval
-        if (now.isBefore(delayedScaleDownInfo.getFirstTriggerTime().plus(scaleDownInterval))) {
+        if (windowStartTime.isBefore(delayedScaleDownInfo.getFirstTriggerTime())) {
             if (now.equals(delayedScaleDownInfo.getFirstTriggerTime())) {
                 LOG.info("The scale down of {} is delayed by {}.", vertex, scaleDownInterval);
             } else {
@@ -293,7 +309,11 @@ private ParallelismChange applyScaleDownInterval(
         } else {
             // Using the maximum parallelism within the scale down interval window instead of the
             // latest parallelism when scaling down
-            return ParallelismChange.build(delayedScaleDownInfo.getMaxRecommendedParallelism());
+            var maxRecommendedParallelism =
+                    delayedScaleDownInfo.getMaxRecommendedParallelism(windowStartTime);
+            return ParallelismChange.build(
+                    maxRecommendedParallelism.getParallelism(),
+                    maxRecommendedParallelism.isOutsideUtilizationBound());
         }
     }
 
 
@@ -49,6 +49,7 @@
 import java.util.Map;
 import java.util.Set;
 import java.util.SortedMap;
+import java.util.concurrent.atomic.AtomicBoolean;
 
 import static org.apache.flink.autoscaler.config.AutoScalerOptions.EXCLUDED_PERIODS;
 import static org.apache.flink.autoscaler.config.AutoScalerOptions.SCALING_ENABLED;
@@ -178,6 +179,7 @@ private void updateRecommendedParallelism(
                                                 scalingSummary.getNewParallelism())));
     }
 
+    // TODO: how to support old testing?
     @VisibleForTesting
     static boolean allChangedVerticesWithinUtilizationTarget(
             Map<JobVertexID, Map<ScalingMetric, EvaluatedScalingMetric>> evaluatedMetrics,
@@ -190,32 +192,39 @@ static boolean allChangedVerticesWithinUtilizationTarget(
         for (JobVertexID vertex : changedVertices) {
             var metrics = evaluatedMetrics.get(vertex);
 
-            double trueProcessingRate = metrics.get(TRUE_PROCESSING_RATE).getAverage();
-            double scaleUpRateThreshold = metrics.get(SCALE_UP_RATE_THRESHOLD).getCurrent();
-            double scaleDownRateThreshold = metrics.get(SCALE_DOWN_RATE_THRESHOLD).getCurrent();
-
-            if (trueProcessingRate < scaleUpRateThreshold
-                    || trueProcessingRate > scaleDownRateThreshold) {
-                LOG.debug(
-                        "Vertex {} processing rate {} is outside ({}, {})",
-                        vertex,
-                        trueProcessingRate,
-                        scaleUpRateThreshold,
-                        scaleDownRateThreshold);
+            if (outsideUtilizationBound(vertex, metrics)) {
                 return false;
-            } else {
-                LOG.debug(
-                        "Vertex {} processing rate {} is within target ({}, {})",
-                        vertex,
-                        trueProcessingRate,
-                        scaleUpRateThreshold,
-                        scaleDownRateThreshold);
             }
         }
-        LOG.info("All vertex processing rates are within target.");
         return true;
     }
 
+    public static boolean outsideUtilizationBound(
+            JobVertexID vertex, Map<ScalingMetric, EvaluatedScalingMetric> metrics) {
+        double trueProcessingRate = metrics.get(TRUE_PROCESSING_RATE).getAverage();
+        double scaleUpRateThreshold = metrics.get(SCALE_UP_RATE_THRESHOLD).getCurrent();
+        double scaleDownRateThreshold = metrics.get(SCALE_DOWN_RATE_THRESHOLD).getCurrent();
+
+        if (trueProcessingRate < scaleUpRateThreshold
+                || trueProcessingRate > scaleDownRateThreshold) {
+            LOG.debug(
+                    "Vertex {} processing rate {} is outside ({}, {})",
+                    vertex,
+                    trueProcessingRate,
+                    scaleUpRateThreshold,
+                    scaleDownRateThreshold);
+            return true;
+        } else {
+            LOG.debug(
+                    "Vertex {} processing rate {} is within target ({}, {})",
+                    vertex,
+                    trueProcessingRate,
+                    scaleUpRateThreshold,
+                    scaleDownRateThreshold);
+        }
+        return false;
+    }
+
     @VisibleForTesting
     Map<JobVertexID, ScalingSummary> computeScalingSummary(
             Context context,
@@ -235,6 +244,7 @@ Map<JobVertexID, ScalingSummary> computeScalingSummary(
 
         var excludeVertexIdList =
                 context.getConfiguration().get(AutoScalerOptions.VERTEX_EXCLUDE_IDS);
+        AtomicBoolean anyVertexOutsideBound = new AtomicBoolean(false);
         evaluatedMetrics
                 .getVertexMetrics()
                 .forEach(
@@ -260,6 +270,9 @@ Map<JobVertexID, ScalingSummary> computeScalingSummary(
                                 if (parallelismChange.isNoChange()) {
                                     return;
                                 }
+                                if (parallelismChange.isOutsideUtilizationBound()) {
+                                    anyVertexOutsideBound.set(true);
+                                }
                                 out.put(
                                         v,
                                         new ScalingSummary(
@@ -270,8 +283,8 @@ Map<JobVertexID, ScalingSummary> computeScalingSummary(
                         });
 
         // If the Utilization of all tasks is within range, we can skip scaling.
-        if (allChangedVerticesWithinUtilizationTarget(
-                evaluatedMetrics.getVertexMetrics(), out.keySet())) {
+        if (!anyVertexOutsideBound.get()) {
+            LOG.info("All vertex processing rates are within target.");
             return Map.of();
         }