Skip to content

Commit de150fc

Browse files
committed
adaptive allocations: reset time interval with zero requests upon starting an allocation (#115400)
1 parent 30485d4 commit de150fc

File tree

3 files changed

+46
-3
lines changed

3 files changed

+46
-3
lines changed

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScaler.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ public class AdaptiveAllocationsScaler {
3333
private final String deploymentId;
3434
private final KalmanFilter1d requestRateEstimator;
3535
private final KalmanFilter1d inferenceTimeEstimator;
36+
private final long scaleToZeroAfterNoRequestsSeconds;
3637
private double timeWithoutRequestsSeconds;
3738

3839
private int numberOfAllocations;
@@ -44,10 +45,11 @@ public class AdaptiveAllocationsScaler {
4445
private Double lastMeasuredRequestRate;
4546
private Double lastMeasuredInferenceTime;
4647
private Long lastMeasuredQueueSize;
47-
private long scaleToZeroAfterNoRequestsSeconds;
4848

4949
AdaptiveAllocationsScaler(String deploymentId, int numberOfAllocations, long scaleToZeroAfterNoRequestsSeconds) {
5050
this.deploymentId = deploymentId;
51+
this.scaleToZeroAfterNoRequestsSeconds = scaleToZeroAfterNoRequestsSeconds;
52+
5153
// A smoothing factor of 100 roughly means the last 100 measurements have an effect
5254
// on the estimated values. The sampling time is 10 seconds, so approximately the
5355
// last 15 minutes are taken into account.
@@ -67,7 +69,6 @@ public class AdaptiveAllocationsScaler {
6769
lastMeasuredRequestRate = null;
6870
lastMeasuredInferenceTime = null;
6971
lastMeasuredQueueSize = null;
70-
this.scaleToZeroAfterNoRequestsSeconds = scaleToZeroAfterNoRequestsSeconds;
7172
}
7273

7374
void setMinMaxNumberOfAllocations(Integer minNumberOfAllocations, Integer maxNumberOfAllocations) {
@@ -117,6 +118,10 @@ void process(AdaptiveAllocationsScalerService.Stats stats, double timeIntervalSe
117118
dynamicsChanged = false;
118119
}
119120

121+
void resetTimeWithoutRequests() {
122+
timeWithoutRequestsSeconds = 0;
123+
}
124+
120125
double getLoadLower() {
121126
double requestRateLower = Math.max(0.0, requestRateEstimator.lower());
122127
double inferenceTimeLower = Math.max(0.0, inferenceTimeEstimator.hasValue() ? inferenceTimeEstimator.lower() : 1.0);

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScalerService.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,10 @@ Collection<DoubleWithAttributes> observeDouble(Function<AdaptiveAllocationsScale
188188

189189
/**
190190
* The time interval without any requests that has to pass, before scaling down
191-
* to zero allocations (in case min_allocations = 0).
191+
* to zero allocations (in case min_allocations = 0). After this time interval
192+
* without requests, the number of allocations is set to zero. When this time
193+
* interval hasn't passed, the minimum number of allocations will always be
194+
* larger than zero.
192195
*/
193196
private static final long SCALE_TO_ZERO_AFTER_NO_REQUESTS_TIME_SECONDS = TimeValue.timeValueMinutes(15).getSeconds();
194197

@@ -447,6 +450,12 @@ public boolean maybeStartAllocation(TrainedModelAssignment assignment) {
447450
deploymentIdsWithInFlightScaleFromZeroRequests.add(assignment.getDeploymentId());
448451
updateNumberOfAllocations(assignment.getDeploymentId(), 1, cleanUpListener);
449452
}
453+
454+
AdaptiveAllocationsScaler scaler = scalers.get(assignment.getDeploymentId());
455+
if (scaler != null) {
456+
scaler.resetTimeWithoutRequests();
457+
}
458+
450459
return true;
451460
}
452461
return false;

x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScalerTests.java

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,35 @@ public void testAutoscaling_scaleDownToZeroAllocations() {
195195
}
196196
}
197197

198+
public void testAutoscaling_resetTimeWithoutRequests() {
199+
int scaleDownAfterInactivitySeconds = 60 * 15; // scale down to 0 after 15 minutes
200+
AdaptiveAllocationsScaler adaptiveAllocationsScaler = new AdaptiveAllocationsScaler(
201+
"test-deployment",
202+
0,
203+
scaleDownAfterInactivitySeconds
204+
);
205+
206+
// 1 hour without requests, but call "reset" every 10 minutes, so don't scale.
207+
for (int i = 0; i < 360; i++) {
208+
adaptiveAllocationsScaler.process(new AdaptiveAllocationsScalerService.Stats(0, 0, 0, 0.05), 10, 0);
209+
assertThat(adaptiveAllocationsScaler.scale(), nullValue());
210+
if (i % 60 == 0) {
211+
adaptiveAllocationsScaler.resetTimeWithoutRequests();
212+
}
213+
}
214+
215+
adaptiveAllocationsScaler.resetTimeWithoutRequests();
216+
// 15 minutes with no requests, so don't scale.
217+
for (int i = 0; i < 90; i++) {
218+
adaptiveAllocationsScaler.process(new AdaptiveAllocationsScalerService.Stats(0, 0, 0, 0.05), 10, 1);
219+
assertThat(adaptiveAllocationsScaler.scale(), nullValue());
220+
}
221+
222+
// another second with no requests, so scale to zero allocations.
223+
adaptiveAllocationsScaler.process(new AdaptiveAllocationsScalerService.Stats(0, 0, 0, 0.05), 1, 1);
224+
assertThat(adaptiveAllocationsScaler.scale(), equalTo(0));
225+
}
226+
198227
public void testAutoscaling_dontScaleDownToZeroAllocationsWhenMinAllocationsIsSet() {
199228
AdaptiveAllocationsScaler adaptiveAllocationsScaler = new AdaptiveAllocationsScaler("test-deployment", 1, 60);
200229
adaptiveAllocationsScaler.setMinMaxNumberOfAllocations(1, null);

0 commit comments

Comments
 (0)