Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ public enum FeatureFlag {
FAILURE_STORE_ENABLED("es.failure_store_feature_flag_enabled=true", Version.fromString("8.12.0"), null),
SUB_OBJECTS_AUTO_ENABLED("es.sub_objects_auto_feature_flag_enabled=true", Version.fromString("8.16.0"), null),
CHUNKING_SETTINGS_ENABLED("es.inference_chunking_settings_feature_flag_enabled=true", Version.fromString("8.16.0"), null),
INFERENCE_SCALE_TO_ZERO("es.inference_scale_to_zero_feature_flag_enabled=true", Version.fromString("8.16.0"), null),
INFERENCE_DEFAULT_ELSER("es.inference_default_elser_feature_flag_enabled=true", Version.fromString("8.16.0"), null);

public final String systemProperty;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,8 @@ Integer scale() {
if (maxNumberOfAllocations != null) {
numberOfAllocations = Math.min(numberOfAllocations, maxNumberOfAllocations);
}
if ((minNumberOfAllocations == null || minNumberOfAllocations == 0)
if (ScaleToZeroFeatureFlag.isEnabled()
&& (minNumberOfAllocations == null || minNumberOfAllocations == 0)
&& timeWithoutRequestsSeconds > SCALE_TO_ZERO_AFTER_NO_REQUESTS_TIME_SECONDS) {
logger.debug("[{}] adaptive allocations scaler: scaling down to zero, because of no requests.", deploymentId);
numberOfAllocations = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,8 @@ private void processDeploymentStats(GetDeploymentStatsAction.Response statsRespo
}

public boolean maybeStartAllocation(TrainedModelAssignment assignment) {
if (assignment.getAdaptiveAllocationsSettings() != null
if (ScaleToZeroFeatureFlag.isEnabled()
&& assignment.getAdaptiveAllocationsSettings() != null
&& assignment.getAdaptiveAllocationsSettings().getEnabled() == Boolean.TRUE) {
lastScaleUpTimesMillis.put(assignment.getDeploymentId(), System.currentTimeMillis());
updateNumberOfAllocations(assignment.getDeploymentId(), 1);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.ml.inference.adaptiveallocations;

import org.elasticsearch.common.util.FeatureFlag;

public class ScaleToZeroFeatureFlag {
private ScaleToZeroFeatureFlag() {}

private static final FeatureFlag FEATURE_FLAG = new FeatureFlag("inference_scale_to_zero");

public static boolean isEnabled() {
return FEATURE_FLAG.isEnabled();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,8 @@ public void testAutoscaling_maxAllocationsSafeguard() {
}

public void testAutoscaling_scaleDownToZeroAllocations() {
assumeTrue("Should only run if adaptive allocations feature flag is enabled", ScaleToZeroFeatureFlag.isEnabled());

AdaptiveAllocationsScaler adaptiveAllocationsScaler = new AdaptiveAllocationsScaler("test-deployment", 1);
// 1 hour with 1 request per 1 seconds, so don't scale.
for (int i = 0; i < 3600; i++) {
Expand Down Expand Up @@ -178,6 +180,8 @@ public void testAutoscaling_scaleDownToZeroAllocations() {
}

public void testAutoscaling_dontScaleDownToZeroAllocationsWhenMinAllocationsIsSet() {
assumeTrue("Should only run if adaptive allocations feature flag is enabled", ScaleToZeroFeatureFlag.isEnabled());

AdaptiveAllocationsScaler adaptiveAllocationsScaler = new AdaptiveAllocationsScaler("test-deployment", 1);
adaptiveAllocationsScaler.setMinMaxNumberOfAllocations(1, null);

Expand Down
Loading