From de07a7dbafcce8b1616c8ebf8886762f30900240 Mon Sep 17 00:00:00 2001 From: Jan Kuipers <148754765+jan-elastic@users.noreply.github.com> Date: Wed, 23 Oct 2024 15:07:04 +0200 Subject: [PATCH] adaptive allocations: reset time interval with zero requests upon starting an allocation (#115400) --- .../AdaptiveAllocationsScaler.java | 9 ++++-- .../AdaptiveAllocationsScalerService.java | 11 ++++++- .../AdaptiveAllocationsScalerTests.java | 29 +++++++++++++++++++ 3 files changed, 46 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScaler.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScaler.java index bbd63e0d3bfe9..0dec99a9b9bb9 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScaler.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScaler.java @@ -33,6 +33,7 @@ public class AdaptiveAllocationsScaler { private final String deploymentId; private final KalmanFilter1d requestRateEstimator; private final KalmanFilter1d inferenceTimeEstimator; + private final long scaleToZeroAfterNoRequestsSeconds; private double timeWithoutRequestsSeconds; private int numberOfAllocations; @@ -44,10 +45,11 @@ public class AdaptiveAllocationsScaler { private Double lastMeasuredRequestRate; private Double lastMeasuredInferenceTime; private Long lastMeasuredQueueSize; - private long scaleToZeroAfterNoRequestsSeconds; AdaptiveAllocationsScaler(String deploymentId, int numberOfAllocations, long scaleToZeroAfterNoRequestsSeconds) { this.deploymentId = deploymentId; + this.scaleToZeroAfterNoRequestsSeconds = scaleToZeroAfterNoRequestsSeconds; + // A smoothing factor of 100 roughly means the last 100 measurements have an effect // on the estimated values. The sampling time is 10 seconds, so approximately the // last 15 minutes are taken into account. @@ -67,7 +69,6 @@ public class AdaptiveAllocationsScaler { lastMeasuredRequestRate = null; lastMeasuredInferenceTime = null; lastMeasuredQueueSize = null; - this.scaleToZeroAfterNoRequestsSeconds = scaleToZeroAfterNoRequestsSeconds; } void setMinMaxNumberOfAllocations(Integer minNumberOfAllocations, Integer maxNumberOfAllocations) { @@ -117,6 +118,10 @@ void process(AdaptiveAllocationsScalerService.Stats stats, double timeIntervalSe dynamicsChanged = false; } + void resetTimeWithoutRequests() { + timeWithoutRequestsSeconds = 0; + } + double getLoadLower() { double requestRateLower = Math.max(0.0, requestRateEstimator.lower()); double inferenceTimeLower = Math.max(0.0, inferenceTimeEstimator.hasValue() ? inferenceTimeEstimator.lower() : 1.0); diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScalerService.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScalerService.java index 770e890512935..16ec3ee9b468c 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScalerService.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScalerService.java @@ -188,7 +188,10 @@ Collection observeDouble(Function