@@ -71,21 +71,31 @@ public JobVertexScaler(AutoScalerEventHandler<KEY, Context> autoScalerEventHandl
7171 this .autoScalerEventHandler = autoScalerEventHandler ;
7272 }
7373
74- public int computeScaleTargetParallelism (
74+ public VertexScalingResult computeScaleTargetParallelism (
7575 Context context ,
7676 JobVertexID vertex ,
7777 Collection <ShipStrategy > inputShipStrategies ,
7878 Map <ScalingMetric , EvaluatedScalingMetric > evaluatedMetrics ,
7979 SortedMap <Instant , ScalingSummary > history ,
80- Duration restartTime ) {
80+ Duration restartTime ,
81+ double backpropagationScaleFactor ) {
8182 var conf = context .getConfiguration ();
83+
84+ boolean excluded =
85+ conf .get (AutoScalerOptions .VERTEX_EXCLUDE_IDS ).contains (vertex .toHexString ());
86+ if (excluded ) {
87+ LOG .debug (
88+ "Vertex {} is part of `vertex.exclude.ids` config, Check for bottleneck but not scale" ,
89+ vertex );
90+ }
91+
8292 var currentParallelism = (int ) evaluatedMetrics .get (PARALLELISM ).getCurrent ();
8393 double averageTrueProcessingRate = evaluatedMetrics .get (TRUE_PROCESSING_RATE ).getAverage ();
8494 if (Double .isNaN (averageTrueProcessingRate )) {
8595 LOG .warn (
8696 "True processing rate is not available for {}, cannot compute new parallelism" ,
8797 vertex );
88- return currentParallelism ;
98+ return VertexScalingResult . normalScaling ( currentParallelism ) ;
8999 }
90100
91101 double targetCapacity =
@@ -95,9 +105,11 @@ public int computeScaleTargetParallelism(
95105 LOG .warn (
96106 "Target data rate is not available for {}, cannot compute new parallelism" ,
97107 vertex );
98- return currentParallelism ;
108+ return VertexScalingResult . normalScaling ( currentParallelism ) ;
99109 }
100110
111+ targetCapacity *= backpropagationScaleFactor ;
112+
101113 LOG .debug ("Target processing capacity for {} is {}" , vertex , targetCapacity );
102114 double scaleFactor = targetCapacity / averageTrueProcessingRate ;
103115 double minScaleFactor = 1 - conf .get (MAX_SCALE_DOWN_FACTOR );
@@ -122,32 +134,44 @@ public int computeScaleTargetParallelism(
122134 double cappedTargetCapacity = averageTrueProcessingRate * scaleFactor ;
123135 LOG .debug ("Capped target processing capacity for {} is {}" , vertex , cappedTargetCapacity );
124136
125- int newParallelism =
137+ int parallelismLowerLimit =
138+ excluded
139+ ? currentParallelism
140+ : Math .min (currentParallelism , conf .getInteger (VERTEX_MIN_PARALLELISM ));
141+ int parallelismUpperLimit =
142+ excluded
143+ ? currentParallelism
144+ : Math .max (currentParallelism , conf .getInteger (VERTEX_MAX_PARALLELISM ));
145+
146+ var scalingResult =
126147 scale (
127148 currentParallelism ,
128149 inputShipStrategies ,
129150 (int ) evaluatedMetrics .get (MAX_PARALLELISM ).getCurrent (),
130151 scaleFactor ,
131- Math . min ( currentParallelism , conf . getInteger ( VERTEX_MIN_PARALLELISM )) ,
132- Math . max ( currentParallelism , conf . getInteger ( VERTEX_MAX_PARALLELISM )) );
152+ parallelismLowerLimit ,
153+ parallelismUpperLimit );
133154
134- if (newParallelism == currentParallelism
155+ if (scalingResult . getParallelism () == currentParallelism
135156 || blockScalingBasedOnPastActions (
136157 context ,
137158 vertex ,
138159 conf ,
139160 evaluatedMetrics ,
140161 history ,
141162 currentParallelism ,
142- newParallelism )) {
143- return currentParallelism ;
163+ scalingResult .getParallelism ())) {
164+ return new VertexScalingResult (
165+ currentParallelism ,
166+ scalingResult .getBottleneckScaleFactor (),
167+ scalingResult .isBottleneck ());
144168 }
145169
146170 // We record our expectations for this scaling operation
147171 evaluatedMetrics .put (
148172 ScalingMetric .EXPECTED_PROCESSING_RATE ,
149173 EvaluatedScalingMetric .of (cappedTargetCapacity ));
150- return newParallelism ;
174+ return scalingResult ;
151175 }
152176
153177 private boolean blockScalingBasedOnPastActions (
@@ -249,9 +273,12 @@ private boolean detectIneffectiveScaleUp(
249273 * <p>Also, in order to ensure the data is evenly spread across subtasks, we try to adjust the
250274 * parallelism for source and keyed vertex such that it divides the maxParallelism without a
251275 * remainder.
276+ *
277+ * <p>If newParallelism exceeds min(parallelismUpperLimit, maxParallelism) the job vertex
278+ * considered to be a bottleneck.
252279 */
253280 @ VisibleForTesting
254- protected static int scale (
281+ protected static VertexScalingResult scale (
255282 int currentParallelism ,
256283 Collection <ShipStrategy > inputShipStrategies ,
257284 int maxParallelism ,
@@ -284,26 +311,36 @@ protected static int scale(
284311 // parallelism upper limit
285312 final int upperBound = Math .min (maxParallelism , parallelismUpperLimit );
286313
314+ boolean isBottleneck = false ;
315+ double bottleneckScaleFactor = 1.0 ;
316+
317+ // If required parallelism is higher than upper bound ---> the vertex is a bottleneck
318+ if (newParallelism > upperBound ) {
319+ isBottleneck = true ;
320+ bottleneckScaleFactor = (double ) upperBound / newParallelism ;
321+ newParallelism = upperBound ;
322+ }
323+
287324 // Apply min/max parallelism
288- newParallelism = Math .min ( Math . max (parallelismLowerLimit , newParallelism ), upperBound );
325+ newParallelism = Math .max (parallelismLowerLimit , newParallelism );
289326
290327 var adjustByMaxParallelism =
291328 inputShipStrategies .isEmpty () || inputShipStrategies .contains (HASH );
292329 if (!adjustByMaxParallelism ) {
293- return newParallelism ;
330+ return new VertexScalingResult ( newParallelism , bottleneckScaleFactor , isBottleneck ) ;
294331 }
295332
296333 // When the shuffle type of vertex inputs contains keyBy or vertex is a source, we try to
297334 // adjust the parallelism such that it divides the maxParallelism without a remainder
298335 // => data is evenly spread across subtasks
299336 for (int p = newParallelism ; p <= maxParallelism / 2 && p <= upperBound ; p ++) {
300337 if (maxParallelism % p == 0 ) {
301- return p ;
338+ return new VertexScalingResult ( p , bottleneckScaleFactor , isBottleneck ) ;
302339 }
303340 }
304341
305342 // If parallelism adjustment fails, use originally computed parallelism
306- return newParallelism ;
343+ return new VertexScalingResult ( newParallelism , bottleneckScaleFactor , isBottleneck ) ;
307344 }
308345
309346 @ VisibleForTesting
0 commit comments