-
Notifications
You must be signed in to change notification settings - Fork 501
[FLINK-30571] Estimate scalability coefficient from past scaling history using linear regression #966
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[FLINK-30571] Estimate scalability coefficient from past scaling history using linear regression #966
Changes from 1 commit
6f01454
ee9a953
6385389
3adc6b3
dcd815b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -34,18 +34,24 @@ | |
| import org.slf4j.Logger; | ||
| import org.slf4j.LoggerFactory; | ||
|
|
||
| import java.math.BigDecimal; | ||
| import java.math.RoundingMode; | ||
| import java.time.Clock; | ||
| import java.time.Duration; | ||
| import java.time.Instant; | ||
| import java.time.ZoneId; | ||
| import java.util.ArrayList; | ||
| import java.util.Collection; | ||
| import java.util.List; | ||
| import java.util.Map; | ||
| import java.util.Objects; | ||
| import java.util.SortedMap; | ||
|
|
||
| import static org.apache.flink.autoscaler.JobVertexScaler.KeyGroupOrPartitionsAdjustMode.MAXIMIZE_UTILISATION; | ||
| import static org.apache.flink.autoscaler.config.AutoScalerOptions.MAX_SCALE_DOWN_FACTOR; | ||
| import static org.apache.flink.autoscaler.config.AutoScalerOptions.MAX_SCALE_UP_FACTOR; | ||
| import static org.apache.flink.autoscaler.config.AutoScalerOptions.OBSERVED_SCALABILITY_ENABLED; | ||
| import static org.apache.flink.autoscaler.config.AutoScalerOptions.OBSERVED_SCALABILITY_MIN_OBSERVATIONS; | ||
| import static org.apache.flink.autoscaler.config.AutoScalerOptions.SCALE_DOWN_INTERVAL; | ||
| import static org.apache.flink.autoscaler.config.AutoScalerOptions.SCALING_EVENT_INTERVAL; | ||
| import static org.apache.flink.autoscaler.config.AutoScalerOptions.SCALING_KEY_GROUP_PARTITIONS_ADJUST_MODE; | ||
|
|
@@ -178,6 +184,12 @@ public ParallelismChange computeScaleTargetParallelism( | |
|
|
||
| LOG.debug("Target processing capacity for {} is {}", vertex, targetCapacity); | ||
| double scaleFactor = targetCapacity / averageTrueProcessingRate; | ||
| if (conf.get(OBSERVED_SCALABILITY_ENABLED)) { | ||
| double scalingCoefficient = | ||
| JobVertexScaler.calculateObservedScalingCoefficient( | ||
| history, conf.get(OBSERVED_SCALABILITY_MIN_OBSERVATIONS)); | ||
| scaleFactor = scaleFactor / scalingCoefficient; | ||
| } | ||
| double minScaleFactor = 1 - conf.get(MAX_SCALE_DOWN_FACTOR); | ||
| double maxScaleFactor = 1 + conf.get(MAX_SCALE_UP_FACTOR); | ||
| if (scaleFactor < minScaleFactor) { | ||
|
|
@@ -236,6 +248,97 @@ public ParallelismChange computeScaleTargetParallelism( | |
| delayedScaleDown); | ||
| } | ||
|
|
||
| /** | ||
| * Calculates the scaling coefficient based on historical scaling data. | ||
| * | ||
| * <p>The scaling coefficient is computed using a weighted least squares approach, where more | ||
| * recent data points and those with higher parallelism are given higher weights. If there are | ||
| * not enough observations, or if the computed coefficient is invalid, a default value of {@code | ||
| * 1.0} is returned, assuming linear scaling. | ||
| * | ||
| * @param history A {@code SortedMap} of {@code Instant} timestamps to {@code ScalingSummary} | ||
| * @param minObservations The minimum number of observations required to compute the scaling | ||
| * coefficient. If the number of historical entries is less than this threshold, a default | ||
| * coefficient of {@code 1.0} is returned. | ||
| * @return The computed scaling coefficient. | ||
| */ | ||
| @VisibleForTesting | ||
| protected static double calculateObservedScalingCoefficient( | ||
| SortedMap<Instant, ScalingSummary> history, int minObservations) { | ||
| /* | ||
| * The scaling coefficient is computed using a **weighted least squares** approach | ||
| * to fit a linear model: | ||
| * | ||
| * R_i = β * P_i * α | ||
| * | ||
| * where: | ||
| * - R_i = observed processing rate | ||
| * - P_i = parallelism | ||
| * - β = baseline processing rate | ||
| * - α = scaling coefficient to optimize | ||
| * | ||
| * The optimization minimizes the **weighted sum of squared errors**: | ||
| * | ||
| * Loss = ∑ w_i * (R_i - β * α * P_i)^2 | ||
| * | ||
| * Differentiating w.r.t. α and solving for α: | ||
| * | ||
| * α = ∑ (w_i * P_i * R_i) / (∑ (w_i * P_i^2) * β) | ||
| * | ||
| * We keep the system conservative for higher returns scenario by clamping computed α within 1.0. | ||
| */ | ||
|
|
||
| // not enough data to compute scaling coefficient. we assume linear scaling. | ||
| if (history.isEmpty() || history.size() < minObservations) { | ||
| return 1.0; | ||
| } | ||
|
|
||
| var baselineProcessingRate = AutoScalerUtils.computeBaselineProcessingRate(history); | ||
|
|
||
| if (Double.isNaN(baselineProcessingRate)) { | ||
| return 1.0; | ||
| } | ||
|
|
||
| Instant latestTimestamp = history.lastKey(); | ||
|
|
||
| List<Double> parallelismList = new ArrayList<>(); | ||
| List<Double> processingRateList = new ArrayList<>(); | ||
| List<Double> weightList = new ArrayList<>(); | ||
|
|
||
| for (Map.Entry<Instant, ScalingSummary> entry : history.entrySet()) { | ||
| Instant timestamp = entry.getKey(); | ||
| ScalingSummary summary = entry.getValue(); | ||
| double parallelism = summary.getCurrentParallelism(); | ||
| double processingRate = summary.getMetrics().get(TRUE_PROCESSING_RATE).getAverage(); | ||
|
|
||
| if (Double.isNaN(processingRate)) { | ||
| LOG.warn( | ||
| "True processing rate is not available in scaling history. Cannot compute scaling coefficient."); | ||
| return 1.0; | ||
| } | ||
|
|
||
| // Compute weight based on recency & parallelism | ||
| double timeDiff = | ||
| Duration.between(timestamp, latestTimestamp).getSeconds() | ||
| + 1; // Avoid division by zero | ||
| double weight = parallelism / timeDiff; | ||
|
||
|
|
||
| parallelismList.add(parallelism); | ||
| processingRateList.add(processingRate); | ||
| weightList.add(weight); | ||
| } | ||
|
|
||
| var coefficient = | ||
| AutoScalerUtils.optimizeLinearScalingCoefficient( | ||
| parallelismList, | ||
| processingRateList, | ||
| weightList, | ||
| baselineProcessingRate, | ||
| 1, | ||
| 0.01); | ||
| return BigDecimal.valueOf(coefficient).setScale(2, RoundingMode.CEILING).doubleValue(); | ||
| } | ||
|
|
||
| private ParallelismChange detectBlockScaling( | ||
| Context context, | ||
| JobVertexID vertex, | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.