From 8cbd216720b3c86b7f71921475248e6dbf671dc1 Mon Sep 17 00:00:00 2001 From: Jan Kuipers Date: Tue, 15 Oct 2024 09:27:48 +0200 Subject: [PATCH] Revert "[ML] Remove scale to zero feature flag (#114323)" This reverts commit 3a83fcdef969fdd376720ff0b57953623ca0d8b4. --- .../test/cluster/FeatureFlag.java | 1 + .../AdaptiveAllocationsScaler.java | 3 ++- .../AdaptiveAllocationsScalerService.java | 3 ++- .../ScaleToZeroFeatureFlag.java | 20 +++++++++++++++++++ .../AdaptiveAllocationsScalerTests.java | 4 ++++ 5 files changed, 29 insertions(+), 2 deletions(-) create mode 100644 x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/ScaleToZeroFeatureFlag.java diff --git a/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/FeatureFlag.java b/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/FeatureFlag.java index ca2300611b4fd..0c9ad749c050b 100644 --- a/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/FeatureFlag.java +++ b/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/FeatureFlag.java @@ -20,6 +20,7 @@ public enum FeatureFlag { FAILURE_STORE_ENABLED("es.failure_store_feature_flag_enabled=true", Version.fromString("8.12.0"), null), SUB_OBJECTS_AUTO_ENABLED("es.sub_objects_auto_feature_flag_enabled=true", Version.fromString("8.16.0"), null), CHUNKING_SETTINGS_ENABLED("es.inference_chunking_settings_feature_flag_enabled=true", Version.fromString("8.16.0"), null), + INFERENCE_SCALE_TO_ZERO("es.inference_scale_to_zero_feature_flag_enabled=true", Version.fromString("8.16.0"), null), INFERENCE_DEFAULT_ELSER("es.inference_default_elser_feature_flag_enabled=true", Version.fromString("8.16.0"), null); public final String systemProperty; diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScaler.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScaler.java index 58259b87c6b00..05e7202b8efe9 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScaler.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScaler.java @@ -170,7 +170,8 @@ Integer scale() { if (maxNumberOfAllocations != null) { numberOfAllocations = Math.min(numberOfAllocations, maxNumberOfAllocations); } - if ((minNumberOfAllocations == null || minNumberOfAllocations == 0) + if (ScaleToZeroFeatureFlag.isEnabled() + && (minNumberOfAllocations == null || minNumberOfAllocations == 0) && timeWithoutRequestsSeconds > SCALE_TO_ZERO_AFTER_NO_REQUESTS_TIME_SECONDS) { logger.debug("[{}] adaptive allocations scaler: scaling down to zero, because of no requests.", deploymentId); numberOfAllocations = 0; diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScalerService.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScalerService.java index 8f43044a465c2..659f709f8eaeb 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScalerService.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScalerService.java @@ -410,7 +410,8 @@ private void processDeploymentStats(GetDeploymentStatsAction.Response statsRespo } public boolean maybeStartAllocation(TrainedModelAssignment assignment) { - if (assignment.getAdaptiveAllocationsSettings() != null + if (ScaleToZeroFeatureFlag.isEnabled() + && assignment.getAdaptiveAllocationsSettings() != null && assignment.getAdaptiveAllocationsSettings().getEnabled() == Boolean.TRUE) { lastScaleUpTimesMillis.put(assignment.getDeploymentId(), System.currentTimeMillis()); updateNumberOfAllocations(assignment.getDeploymentId(), 1); diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/ScaleToZeroFeatureFlag.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/ScaleToZeroFeatureFlag.java new file mode 100644 index 0000000000000..072b8c5593c93 --- /dev/null +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/ScaleToZeroFeatureFlag.java @@ -0,0 +1,20 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.ml.inference.adaptiveallocations; + +import org.elasticsearch.common.util.FeatureFlag; + +public class ScaleToZeroFeatureFlag { + private ScaleToZeroFeatureFlag() {} + + private static final FeatureFlag FEATURE_FLAG = new FeatureFlag("inference_scale_to_zero"); + + public static boolean isEnabled() { + return FEATURE_FLAG.isEnabled(); + } +} diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScalerTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScalerTests.java index 1887ebe8050e0..7d98aaf67a7f3 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScalerTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScalerTests.java @@ -148,6 +148,8 @@ public void testAutoscaling_maxAllocationsSafeguard() { } public void testAutoscaling_scaleDownToZeroAllocations() { + assumeTrue("Should only run if adaptive allocations feature flag is enabled", ScaleToZeroFeatureFlag.isEnabled()); + AdaptiveAllocationsScaler adaptiveAllocationsScaler = new AdaptiveAllocationsScaler("test-deployment", 1); // 1 hour with 1 request per 1 seconds, so don't scale. for (int i = 0; i < 3600; i++) { @@ -178,6 +180,8 @@ public void testAutoscaling_scaleDownToZeroAllocations() { } public void testAutoscaling_dontScaleDownToZeroAllocationsWhenMinAllocationsIsSet() { + assumeTrue("Should only run if adaptive allocations feature flag is enabled", ScaleToZeroFeatureFlag.isEnabled()); + AdaptiveAllocationsScaler adaptiveAllocationsScaler = new AdaptiveAllocationsScaler("test-deployment", 1); adaptiveAllocationsScaler.setMinMaxNumberOfAllocations(1, null);