1. Updated scaling coefficient compute logic to remove the weighting. 2. Check scaling coefficient with threshold before returning. 3. Refactored tests for point [1] and [2].

pchoudhury22 · pchoudhury22 · commit ee9a953e869f · 2025-04-30T14:47:02.000+05:30
diff --git a/flink-autoscaler/src/main/java/org/apache/flink/autoscaler/JobVertexScaler.java b/flink-autoscaler/src/main/java/org/apache/flink/autoscaler/JobVertexScaler.java
@@ -185,9 +185,9 @@ public ParallelismChange computeScaleTargetParallelism(
         LOG.debug("Target processing capacity for {} is {}", vertex, targetCapacity);
         double scaleFactor = targetCapacity / averageTrueProcessingRate;
         if (conf.get(OBSERVED_SCALABILITY_ENABLED)) {
+
             double scalingCoefficient =
-                    JobVertexScaler.calculateObservedScalingCoefficient(
-                            history, conf.get(OBSERVED_SCALABILITY_MIN_OBSERVATIONS));
+                    JobVertexScaler.calculateObservedScalingCoefficient(history, conf);
             scaleFactor = scaleFactor / scalingCoefficient;
         }
         double minScaleFactor = 1 - conf.get(MAX_SCALE_DOWN_FACTOR);
@@ -251,22 +251,19 @@ public ParallelismChange computeScaleTargetParallelism(
     /**
      * Calculates the scaling coefficient based on historical scaling data.
      *
-     * <p>The scaling coefficient is computed using a weighted least squares approach, where more
-     * recent data points and those with higher parallelism are given higher weights. If there are
-     * not enough observations, or if the computed coefficient is invalid, a default value of {@code
+     * <p>The scaling coefficient is computed using the least squares approach. If there are not
+     * enough observations, or if the computed coefficient is invalid, a default value of {@code
      * 1.0} is returned, assuming linear scaling.
      *
      * @param history A {@code SortedMap} of {@code Instant} timestamps to {@code ScalingSummary}
-     * @param minObservations The minimum number of observations required to compute the scaling
-     *     coefficient. If the number of historical entries is less than this threshold, a default
-     *     coefficient of {@code 1.0} is returned.
+     * @param conf Deployment configuration.
      * @return The computed scaling coefficient.
      */
     @VisibleForTesting
     protected static double calculateObservedScalingCoefficient(
-            SortedMap<Instant, ScalingSummary> history, int minObservations) {
+            SortedMap<Instant, ScalingSummary> history, Configuration conf) {
         /*
-         * The scaling coefficient is computed using a **weighted least squares** approach
+         * The scaling coefficient is computed using the least squares approach
          * to fit a linear model:
          *
          *      R_i = β * P_i * α
@@ -277,18 +274,21 @@ protected static double calculateObservedScalingCoefficient(
          * - β   = baseline processing rate
          * - α   = scaling coefficient to optimize
          *
-         * The optimization minimizes the **weighted sum of squared errors**:
+         * The optimization minimizes the **sum of squared errors**:
          *
-         *      Loss = ∑ w_i * (R_i - β * α * P_i)^2
+         *      Loss = ∑ (R_i - β * α * P_i)^2
          *
          * Differentiating w.r.t. α and solving for α:
          *
-         *      α = ∑ (w_i * P_i * R_i) / (∑ (w_i * P_i^2) * β)
+         *      α = ∑ (P_i * R_i) / (∑ (P_i^2) * β)
          *
-         * We keep the system conservative for higher returns scenario by clamping computed α within 1.0.
+         * We keep the system conservative for higher returns scenario by clamping computed α to an upper bound of 1.0.
+         * If the computed coefficient falls below threshold, the system falls back to assuming linear scaling (α = 1.0).
          */
 
-        // not enough data to compute scaling coefficient. we assume linear scaling.
+        var minObservations = conf.get(OBSERVED_SCALABILITY_MIN_OBSERVATIONS);
+
+        // not enough data to compute scaling coefficient; we assume linear scaling.
         if (history.isEmpty() || history.size() < minObservations) {
             return 1.0;
         }
@@ -299,14 +299,10 @@ protected static double calculateObservedScalingCoefficient(
             return 1.0;
         }
 
-        Instant latestTimestamp = history.lastKey();
-
         List<Double> parallelismList = new ArrayList<>();
         List<Double> processingRateList = new ArrayList<>();
-        List<Double> weightList = new ArrayList<>();
 
         for (Map.Entry<Instant, ScalingSummary> entry : history.entrySet()) {
-            Instant timestamp = entry.getKey();
             ScalingSummary summary = entry.getValue();
             double parallelism = summary.getCurrentParallelism();
             double processingRate = summary.getMetrics().get(TRUE_PROCESSING_RATE).getAverage();
@@ -317,25 +313,24 @@ protected static double calculateObservedScalingCoefficient(
                 return 1.0;
             }
 
-            // Compute weight based on recency & parallelism
-            double timeDiff =
-                    Duration.between(timestamp, latestTimestamp).getSeconds()
-                            + 1; // Avoid division by zero
-            double weight = parallelism / timeDiff;
-
             parallelismList.add(parallelism);
             processingRateList.add(processingRate);
-            weightList.add(weight);
         }
 
         var coefficient =
                 AutoScalerUtils.optimizeLinearScalingCoefficient(
-                        parallelismList,
-                        processingRateList,
-                        weightList,
-                        baselineProcessingRate,
-                        1,
-                        0.01);
+                        parallelismList, processingRateList, baselineProcessingRate, 1, 0.01);
+
+        double threshold =
+                conf.get(AutoScalerOptions.SCALING_EFFECTIVENESS_DETECTION_ENABLED)
+                        ? conf.get(AutoScalerOptions.SCALING_EFFECTIVENESS_THRESHOLD)
+                        : 0.5;
+
+        if (coefficient < threshold) {
+            LOG.warn("Scaling coefficient is below threshold. Falling back to linear scaling.");
+            return 1.0;
+        }
+
         return BigDecimal.valueOf(coefficient).setScale(2, RoundingMode.CEILING).doubleValue();
     }
 
diff --git a/flink-autoscaler/src/main/java/org/apache/flink/autoscaler/config/AutoScalerOptions.java b/flink-autoscaler/src/main/java/org/apache/flink/autoscaler/config/AutoScalerOptions.java
@@ -394,8 +394,15 @@ private static ConfigOptions.OptionBuilder autoScalerConfig(String key) {
     public static final ConfigOption<Integer> OBSERVED_SCALABILITY_MIN_OBSERVATIONS =
             autoScalerConfig("observed-scalability.min-observations")
                     .intType()
-                    .defaultValue(5)
+                    .defaultValue(3)
                     .withFallbackKeys(oldOperatorConfigKey("observed-scalability.min-observations"))
                     .withDescription(
-                            "Defines the minimum number of historical scaling observations required to estimate the scalability coefficient. If the number of available observations is below this threshold, the system falls back to assuming linear scaling.");
+                            "Defines the minimum number of historical scaling observations required to estimate the scalability coefficient. "
+                                    + "If the number of available observations is below this threshold, the system falls back to assuming linear scaling. "
+                                    + "Note: To effectively use a higher minimum observation count, you need to increase "
+                                    + VERTEX_SCALING_HISTORY_COUNT.key()
+                                    + ". Avoid setting "
+                                    + VERTEX_SCALING_HISTORY_COUNT.key()
+                                    + " to a very high value, as the number of retained data points is limited by the size of the state store—"
+                                    + "particularly when using Kubernetes-based state store.");
 }
diff --git a/flink-autoscaler/src/main/java/org/apache/flink/autoscaler/utils/AutoScalerUtils.java b/flink-autoscaler/src/main/java/org/apache/flink/autoscaler/utils/AutoScalerUtils.java
@@ -101,19 +101,16 @@ public static boolean excludeVerticesFromScaling(
     }
 
     /**
-     * Computes the optimized linear scaling coefficient (α) by minimizing the weighted least
-     * squares error.
+     * Computes the optimized linear scaling coefficient (α) by minimizing the least squares error.
      *
      * <p>This method estimates the scaling coefficient in a linear scaling model by fitting
-     * observed processing rates and parallelism levels while applying weights to account for
-     * recency and significance.
+     * observed processing rates and parallelism levels.
      *
      * <p>The computed coefficient is clamped within the specified lower and upper bounds to ensure
      * stability and prevent extreme scaling adjustments.
      *
      * @param parallelismLevels List of parallelism levels.
      * @param processingRates List of observed processing rates.
-     * @param weights List of weights for each observation.
      * @param baselineProcessingRate Baseline processing rate.
      * @param upperBound Maximum allowable value for the scaling coefficient.
      * @param lowerBound Minimum allowable value for the scaling coefficient.
@@ -123,28 +120,26 @@ public static boolean excludeVerticesFromScaling(
     public static double optimizeLinearScalingCoefficient(
             List<Double> parallelismLevels,
             List<Double> processingRates,
-            List<Double> weights,
             double baselineProcessingRate,
             double upperBound,
             double lowerBound) {
 
-        double weightedSum = 0.0;
-        double weightedSquaredSum = 0.0;
+        double sum = 0.0;
+        double squaredSum = 0.0;
 
         for (int i = 0; i < parallelismLevels.size(); i++) {
             double parallelism = parallelismLevels.get(i);
             double processingRate = processingRates.get(i);
-            double weight = weights.get(i);
 
-            weightedSum += weight * parallelism * processingRate;
-            weightedSquaredSum += weight * parallelism * parallelism;
+            sum += parallelism * processingRate;
+            squaredSum += parallelism * parallelism;
         }
 
-        if (weightedSquaredSum == 0.0) {
+        if (squaredSum == 0.0) {
             return 1.0; // Fallback to linear scaling if denominator is zero
         }
 
-        double alpha = weightedSum / (weightedSquaredSum * baselineProcessingRate);
+        double alpha = sum / (squaredSum * baselineProcessingRate);
 
         return Math.max(lowerBound, Math.min(upperBound, alpha));
     }
diff --git a/flink-autoscaler/src/test/java/org/apache/flink/autoscaler/JobVertexScalerTest.java b/flink-autoscaler/src/test/java/org/apache/flink/autoscaler/JobVertexScalerTest.java