56
56
import static org .apache .flink .autoscaler .metrics .ScalingMetric .MAX_PARALLELISM ;
57
57
import static org .apache .flink .autoscaler .metrics .ScalingMetric .NUM_SOURCE_PARTITIONS ;
58
58
import static org .apache .flink .autoscaler .metrics .ScalingMetric .PARALLELISM ;
59
+ import static org .apache .flink .autoscaler .metrics .ScalingMetric .SCALE_DOWN_RATE_THRESHOLD ;
60
+ import static org .apache .flink .autoscaler .metrics .ScalingMetric .SCALE_UP_RATE_THRESHOLD ;
59
61
import static org .apache .flink .autoscaler .metrics .ScalingMetric .TRUE_PROCESSING_RATE ;
60
62
import static org .apache .flink .autoscaler .topology .ShipStrategy .HASH ;
61
63
import static org .apache .flink .configuration .description .TextElement .text ;
@@ -92,12 +94,15 @@ public JobVertexScaler(AutoScalerEventHandler<KEY, Context> autoScalerEventHandl
92
94
@ Getter
93
95
public static class ParallelismChange {
94
96
95
- private static final ParallelismChange NO_CHANGE = new ParallelismChange (-1 );
97
+ private static final ParallelismChange NO_CHANGE = new ParallelismChange (-1 , false );
96
98
97
99
private final int newParallelism ;
98
100
99
- private ParallelismChange (int newParallelism ) {
101
+ private final boolean outsideUtilizationBound ;
102
+
103
+ private ParallelismChange (int newParallelism , boolean outsideUtilizationBound ) {
100
104
this .newParallelism = newParallelism ;
105
+ this .outsideUtilizationBound = outsideUtilizationBound ;
101
106
}
102
107
103
108
public boolean isNoChange () {
@@ -113,24 +118,29 @@ public boolean equals(Object o) {
113
118
return false ;
114
119
}
115
120
ParallelismChange that = (ParallelismChange ) o ;
116
- return newParallelism == that .newParallelism ;
121
+ return newParallelism == that .newParallelism
122
+ && outsideUtilizationBound == that .outsideUtilizationBound ;
117
123
}
118
124
119
125
@ Override
120
126
public int hashCode () {
121
- return Objects .hash (newParallelism );
127
+ return Objects .hash (newParallelism , outsideUtilizationBound );
122
128
}
123
129
124
130
@ Override
125
131
public String toString () {
126
132
return isNoChange ()
127
133
? "NoParallelismChange"
128
- : "ParallelismChange{newParallelism=" + newParallelism + '}' ;
134
+ : "ParallelismChange{newParallelism="
135
+ + newParallelism
136
+ + ", outsideUtilizationBound="
137
+ + outsideUtilizationBound
138
+ + "}" ;
129
139
}
130
140
131
- public static ParallelismChange build (int newParallelism ) {
141
+ public static ParallelismChange build (int newParallelism , boolean outsideUtilizationBound ) {
132
142
checkArgument (newParallelism > 0 , "The parallelism should be greater than 0." );
133
- return new ParallelismChange (newParallelism );
143
+ return new ParallelismChange (newParallelism , outsideUtilizationBound );
134
144
}
135
145
136
146
public static ParallelismChange noChange () {
@@ -239,6 +249,8 @@ private ParallelismChange detectBlockScaling(
239
249
currentParallelism != newParallelism ,
240
250
"The newParallelism is equal to currentParallelism, no scaling is needed. This is probably a bug." );
241
251
252
+ var outsideUtilizationBound = outsideUtilizationBound (vertex , evaluatedMetrics );
253
+
242
254
var scaledUp = currentParallelism < newParallelism ;
243
255
244
256
if (scaledUp ) {
@@ -248,7 +260,7 @@ private ParallelismChange detectBlockScaling(
248
260
249
261
// If we don't have past scaling actions for this vertex, don't block scale up.
250
262
if (history .isEmpty ()) {
251
- return ParallelismChange .build (newParallelism );
263
+ return ParallelismChange .build (newParallelism , outsideUtilizationBound );
252
264
}
253
265
254
266
var lastSummary = history .get (history .lastKey ());
@@ -260,28 +272,59 @@ && detectIneffectiveScaleUp(
260
272
return ParallelismChange .noChange ();
261
273
}
262
274
263
- return ParallelismChange .build (newParallelism );
275
+ return ParallelismChange .build (newParallelism , outsideUtilizationBound );
276
+ } else {
277
+ return applyScaleDownInterval (
278
+ delayedScaleDown , vertex , conf , newParallelism , outsideUtilizationBound );
279
+ }
280
+ }
281
+
282
+ private static boolean outsideUtilizationBound (
283
+ JobVertexID vertex , Map <ScalingMetric , EvaluatedScalingMetric > metrics ) {
284
+ double trueProcessingRate = metrics .get (TRUE_PROCESSING_RATE ).getAverage ();
285
+ double scaleUpRateThreshold = metrics .get (SCALE_UP_RATE_THRESHOLD ).getCurrent ();
286
+ double scaleDownRateThreshold = metrics .get (SCALE_DOWN_RATE_THRESHOLD ).getCurrent ();
287
+
288
+ if (trueProcessingRate < scaleUpRateThreshold
289
+ || trueProcessingRate > scaleDownRateThreshold ) {
290
+ LOG .debug (
291
+ "Vertex {} processing rate {} is outside ({}, {})" ,
292
+ vertex ,
293
+ trueProcessingRate ,
294
+ scaleUpRateThreshold ,
295
+ scaleDownRateThreshold );
296
+ return true ;
264
297
} else {
265
- return applyScaleDownInterval (delayedScaleDown , vertex , conf , newParallelism );
298
+ LOG .debug (
299
+ "Vertex {} processing rate {} is within target ({}, {})" ,
300
+ vertex ,
301
+ trueProcessingRate ,
302
+ scaleUpRateThreshold ,
303
+ scaleDownRateThreshold );
266
304
}
305
+ return false ;
267
306
}
268
307
269
308
private ParallelismChange applyScaleDownInterval (
270
309
DelayedScaleDown delayedScaleDown ,
271
310
JobVertexID vertex ,
272
311
Configuration conf ,
273
- int newParallelism ) {
312
+ int newParallelism ,
313
+ boolean outsideUtilizationBound ) {
274
314
var scaleDownInterval = conf .get (SCALE_DOWN_INTERVAL );
275
315
if (scaleDownInterval .toMillis () <= 0 ) {
276
316
// The scale down interval is disable, so don't block scaling.
277
- return ParallelismChange .build (newParallelism );
317
+ return ParallelismChange .build (newParallelism , outsideUtilizationBound );
278
318
}
279
319
280
320
var now = clock .instant ();
281
- var delayedScaleDownInfo = delayedScaleDown .triggerScaleDown (vertex , now , newParallelism );
321
+ var windowStartTime = now .minus (scaleDownInterval );
322
+ var delayedScaleDownInfo =
323
+ delayedScaleDown .triggerScaleDown (
324
+ vertex , now , newParallelism , outsideUtilizationBound );
282
325
283
326
// Never scale down within scale down interval
284
- if (now .isBefore (delayedScaleDownInfo .getFirstTriggerTime (). plus ( scaleDownInterval ))) {
327
+ if (windowStartTime .isBefore (delayedScaleDownInfo .getFirstTriggerTime ())) {
285
328
if (now .equals (delayedScaleDownInfo .getFirstTriggerTime ())) {
286
329
LOG .info ("The scale down of {} is delayed by {}." , vertex , scaleDownInterval );
287
330
} else {
@@ -293,7 +336,11 @@ private ParallelismChange applyScaleDownInterval(
293
336
} else {
294
337
// Using the maximum parallelism within the scale down interval window instead of the
295
338
// latest parallelism when scaling down
296
- return ParallelismChange .build (delayedScaleDownInfo .getMaxRecommendedParallelism ());
339
+ var maxRecommendedParallelism =
340
+ delayedScaleDownInfo .getMaxRecommendedParallelism (windowStartTime );
341
+ return ParallelismChange .build (
342
+ maxRecommendedParallelism .getParallelism (),
343
+ maxRecommendedParallelism .isOutsideUtilizationBound ());
297
344
}
298
345
}
299
346
0 commit comments