Clean up debug logging

davidkyle · davidkyle · commit 6931a3a108af · 2024-10-15T12:31:46.000+01:00
# Conflicts:
#	test/test-clusters/src/main/java/org/elasticsearch/test/cluster/FeatureFlag.java
diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/assignment/TrainedModelAssignment.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/assignment/TrainedModelAssignment.java
@@ -224,10 +224,7 @@ public boolean hasStartedRoutes() {
         return nodeRoutingTable.values().stream().anyMatch(routeInfo -> routeInfo.getState() == RoutingState.STARTED);
     }
 
-    public List<Tuple<String, Integer>> selectRandomNodesWeighedOnAllocationsForNRequestsAndState(
-        int numberOfRequests,
-        RoutingState... acceptableStates
-    ) {
+    public List<Tuple<String, Integer>> selectRandomNodesWeighedOnAllocations(int numberOfRequests, RoutingState... acceptableStates) {
         List<String> nodeIds = new ArrayList<>(nodeRoutingTable.size());
         List<Integer> cumulativeAllocations = new ArrayList<>(nodeRoutingTable.size());
         int allocationSum = 0;
diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/inference/assignment/TrainedModelAssignmentTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/inference/assignment/TrainedModelAssignmentTests.java
@@ -195,15 +195,15 @@ public void testselectRandomStartedNodeWeighedOnAllocationsForNRequests_GivenNoS
         builder.addRoutingEntry("node-2", new RoutingInfo(1, 1, RoutingState.STOPPED, ""));
         TrainedModelAssignment assignment = builder.build();
 
-        assertThat(assignment.selectRandomNodesWeighedOnAllocationsForNRequestsAndState(1, RoutingState.STARTED).isEmpty(), is(true));
+        assertThat(assignment.selectRandomNodesWeighedOnAllocations(1, RoutingState.STARTED).isEmpty(), is(true));
     }
 
     public void testselectRandomStartedNodeWeighedOnAllocationsForNRequests_GivenSingleStartedNode() {
         TrainedModelAssignment.Builder builder = TrainedModelAssignment.Builder.empty(randomTaskParams(5), null);
         builder.addRoutingEntry("node-1", new RoutingInfo(4, 4, RoutingState.STARTED, ""));
         TrainedModelAssignment assignment = builder.build();
 
-        var nodes = assignment.selectRandomNodesWeighedOnAllocationsForNRequestsAndState(1, RoutingState.STARTED);
+        var nodes = assignment.selectRandomNodesWeighedOnAllocations(1, RoutingState.STARTED);
 
         assertThat(nodes, contains(new Tuple<>("node-1", 1)));
     }
@@ -213,7 +213,7 @@ public void testselectRandomStartedNodeWeighedOnAllocationsForNRequests_GivenASh
         builder.addRoutingEntry("node-1", new RoutingInfo(4, 4, RoutingState.STARTED, ""));
         TrainedModelAssignment assignment = builder.build();
 
-        var nodes = assignment.selectRandomNodesWeighedOnAllocationsForNRequestsAndState(1, RoutingState.STOPPING);
+        var nodes = assignment.selectRandomNodesWeighedOnAllocations(1, RoutingState.STOPPING);
 
         assertThat(nodes, empty());
     }
@@ -223,7 +223,7 @@ public void testselectRandomStartedNodeWeighedOnAllocationsForNRequests_GivenASh
         builder.addRoutingEntry("node-1", new RoutingInfo(4, 4, RoutingState.STOPPING, ""));
         TrainedModelAssignment assignment = builder.build();
 
-        var nodes = assignment.selectRandomNodesWeighedOnAllocationsForNRequestsAndState(1, RoutingState.STOPPING);
+        var nodes = assignment.selectRandomNodesWeighedOnAllocations(1, RoutingState.STOPPING);
 
         assertThat(nodes, contains(new Tuple<>("node-1", 1)));
     }
@@ -234,7 +234,7 @@ public void testSingleRequestWith2Nodes() {
         builder.addRoutingEntry("node-2", new RoutingInfo(1, 1, RoutingState.STARTED, ""));
         TrainedModelAssignment assignment = builder.build();
 
-        var nodes = assignment.selectRandomNodesWeighedOnAllocationsForNRequestsAndState(1, RoutingState.STARTED);
+        var nodes = assignment.selectRandomNodesWeighedOnAllocations(1, RoutingState.STARTED);
         assertThat(nodes, hasSize(1));
         assertEquals(nodes.get(0).v2(), Integer.valueOf(1));
     }
@@ -248,7 +248,7 @@ public void testSelectRandomStartedNodeWeighedOnAllocationsForNRequests_GivenMul
 
         final int selectionCount = 10000;
         final CountAccumulator countsPerNodeAccumulator = new CountAccumulator();
-        var nodes = assignment.selectRandomNodesWeighedOnAllocationsForNRequestsAndState(selectionCount, RoutingState.STARTED);
+        var nodes = assignment.selectRandomNodesWeighedOnAllocations(selectionCount, RoutingState.STARTED);
 
         assertThat(nodes, hasSize(3));
         assertThat(nodes.stream().mapToInt(Tuple::v2).sum(), equalTo(selectionCount));
@@ -269,7 +269,7 @@ public void testselectRandomStartedNodeWeighedOnAllocationsForNRequests_GivenMul
         builder.addRoutingEntry("node-3", new RoutingInfo(0, 0, RoutingState.STARTED, ""));
         TrainedModelAssignment assignment = builder.build();
         final int selectionCount = 1000;
-        var nodeCounts = assignment.selectRandomNodesWeighedOnAllocationsForNRequestsAndState(selectionCount, RoutingState.STARTED);
+        var nodeCounts = assignment.selectRandomNodesWeighedOnAllocations(selectionCount, RoutingState.STARTED);
         assertThat(nodeCounts, hasSize(3));
 
         var selectedNodes = new HashSet<String>();
diff --git a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/AdaptiveAllocationsScaleFromZeroIT.java b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/AdaptiveAllocationsScaleFromZeroIT.java
@@ -7,15 +7,13 @@
 
 package org.elasticsearch.xpack.ml.integration;
 
-import org.elasticsearch.client.Request;
+import org.apache.lucene.tests.util.LuceneTestCase;
 import org.elasticsearch.client.Response;
 import org.elasticsearch.client.ResponseListener;
 import org.elasticsearch.common.xcontent.support.XContentMapValues;
 import org.elasticsearch.core.TimeValue;
 import org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings;
-import org.junit.Before;
 
-import java.io.IOException;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
@@ -29,22 +27,9 @@
 import static org.hamcrest.Matchers.not;
 import static org.hamcrest.Matchers.nullValue;
 
+@LuceneTestCase.AwaitsFix(bugUrl = "Cannot test without setting the scale to zero period to a small value")
 public class AdaptiveAllocationsScaleFromZeroIT extends PyTorchModelRestTestCase {
 
-    @Before
-    public void setShortScaleToZeroPeriod() throws IOException {
-        logger.info("setting time");
-        Request scaleToZeroTime = new Request("PUT", "_cluster/settings");
-        scaleToZeroTime.setJsonEntity("""
-            {
-              "persistent": {
-                "xpack.ml.adaptive_allocations_scale_to_zero_interval": "2s"
-              }
-            }""");
-
-        client().performRequest(scaleToZeroTime);
-    }
-
     @SuppressWarnings("unchecked")
     public void testScaleFromZero() throws Exception {
         String modelId = "test_scale_from_zero";
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java
@@ -758,18 +758,6 @@ public void loadExtensions(ExtensionLoader loader) {
      */
     public static final int MAX_LOW_PRIORITY_MODELS_PER_NODE = 100;
 
-    /**
-     * The time interval without any requests that has to pass, before scaling down
-     * to zero allocations.
-     */
-    public static final Setting<TimeValue> ADAPTIVE_ALLOCATIONS_SCALE_TO_ZERO_INTERVAL = Setting.timeSetting(
-        "xpack.ml.adaptive_allocations_scale_to_zero_interval",
-        TimeValue.timeValueMinutes(15),
-        TimeValue.timeValueSeconds(1),
-        Property.Dynamic,
-        Setting.Property.NodeScope
-    );
-
     private static final Logger logger = LogManager.getLogger(MachineLearning.class);
     private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(MachineLearning.class);
 
@@ -829,8 +817,7 @@ public List<Setting<?>> getSettings() {
             MAX_ML_NODE_SIZE,
             DELAYED_DATA_CHECK_FREQ,
             DUMMY_ENTITY_MEMORY,
-            DUMMY_ENTITY_PROCESSORS,
-            ADAPTIVE_ALLOCATIONS_SCALE_TO_ZERO_INTERVAL
+            DUMMY_ENTITY_PROCESSORS
         );
     }
 
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportInternalInferModelAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportInternalInferModelAction.java
@@ -264,22 +264,19 @@ private void inferAgainstAllocatedModel(
 
         // Get a list of nodes to send the requests to and the number of
         // documents for each node.
-        var nodes = assignment.selectRandomNodesWeighedOnAllocationsForNRequestsAndState(request.numberOfDocuments(), RoutingState.STARTED);
+        var nodes = assignment.selectRandomNodesWeighedOnAllocations(request.numberOfDocuments(), RoutingState.STARTED);
 
         // We couldn't find any nodes in the started state so let's look for ones that are stopping in case we're shutting down some nodes
         if (nodes.isEmpty()) {
-            nodes = assignment.selectRandomNodesWeighedOnAllocationsForNRequestsAndState(
-                request.numberOfDocuments(),
-                RoutingState.STOPPING
-            );
+            nodes = assignment.selectRandomNodesWeighedOnAllocations(request.numberOfDocuments(), RoutingState.STOPPING);
         }
 
         if (nodes.isEmpty()) {
             String message = "Trained model deployment [" + request.getId() + "] is not allocated to any nodes";
             boolean starting = adaptiveAllocationsScalerService.maybeStartAllocation(assignment);
             if (starting) {
                 message += "; starting deployment of one allocation";
-                logger.info(message);
+                logger.debug(message);
                 waitForAllocation.waitForAssignment(
                     new InferenceWaitForAllocation.WaitingRequest(request, responseBuilder, parentTaskId, listener)
                 );
@@ -299,10 +296,7 @@ private void inferAgainstAllocatedModel(
     private void inferOnBlockedRequest(InferenceWaitForAllocation.WaitingRequest request, TrainedModelAssignment assignment) {
         threadPool.executor(MachineLearning.UTILITY_THREAD_POOL_NAME).execute(() -> {
 
-            var nodes = assignment.selectRandomNodesWeighedOnAllocationsForNRequestsAndState(
-                request.request().numberOfDocuments(),
-                RoutingState.STARTED
-            );
+            var nodes = assignment.selectRandomNodesWeighedOnAllocations(request.request().numberOfDocuments(), RoutingState.STARTED);
 
             if (nodes.isEmpty()) {
                 request.listener()
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/InferenceWaitForAllocation.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/InferenceWaitForAllocation.java
@@ -84,7 +84,6 @@ public InferenceWaitForAllocation(
      * @param request The inference request details
      */
     public synchronized void waitForAssignment(WaitingRequest request) {
-        logger.info("waitForAssignment will wait for condition");
         if (pendingRequestCount.incrementAndGet() >= MAX_PENDING_REQUEST_COUNT) {
             pendingRequestCount.decrementAndGet();
             request.listener.onFailure(
@@ -103,7 +102,7 @@ public synchronized void waitForAssignment(WaitingRequest request) {
             request.deploymentId(),
             predicate,
             request.request().getInferenceTimeout(),
-            new WaitingListener(request.deploymentId(), request, predicate)
+            new WaitingListener(request, predicate)
         );
     }
 
@@ -118,14 +117,20 @@ private static class DeploymentHasAtLeastOneAllocation implements Predicate<Clus
 
         @Override
         public boolean test(ClusterState clusterState) {
-            logger.info("predicate test");
             TrainedModelAssignment trainedModelAssignment = TrainedModelAssignmentMetadata.assignmentForDeploymentId(
                 clusterState,
                 deploymentId
             ).orElse(null);
             if (trainedModelAssignment == null) {
                 logger.info(() -> format("[%s] assignment was null while waiting to scale up", deploymentId));
-                return false;
+                exception.set(
+                    new ElasticsearchStatusException(
+                        "[{}] Error waiting for a model allocation, model assignment has been removed",
+                        RestStatus.CONFLICT,
+                        deploymentId
+                    )
+                );
+                return true; // don't try again
             }
 
             Map<String, String> nodeFailuresAndReasons = new HashMap<>();
@@ -151,24 +156,16 @@ public boolean test(ClusterState clusterState) {
             }
 
             var routable = trainedModelAssignment.getNodeRoutingTable().values().stream().filter(RoutingInfo::isRoutable).findFirst();
-            if (routable.isPresent()) {
-                logger.info("first route " + routable.get() + ", state" + trainedModelAssignment.calculateAllocationStatus());
-            } else {
-                logger.info("no routes");
-            }
-
             return routable.isPresent();
         }
     }
 
     private class WaitingListener implements TrainedModelAssignmentService.WaitForAssignmentListener {
 
-        private final String deploymentId;
         private final WaitingRequest request;
         private final DeploymentHasAtLeastOneAllocation predicate;
 
-        private WaitingListener(String deploymentId, WaitingRequest request, DeploymentHasAtLeastOneAllocation predicate) {
-            this.deploymentId = deploymentId;
+        private WaitingListener(WaitingRequest request, DeploymentHasAtLeastOneAllocation predicate) {
             this.request = request;
             this.predicate = predicate;
         }
@@ -183,13 +180,11 @@ public void onResponse(TrainedModelAssignment assignment) {
                 return;
             }
 
-            logger.info("sending waited request");
             queuedConsumer.accept(request, assignment);
         }
 
         @Override
         public void onFailure(Exception e) {
-            logger.info("failed waiting", e);
             pendingRequestCount.decrementAndGet();
             request.listener().onFailure(e);
         }
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScaler.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScaler.java
@@ -75,11 +75,6 @@ void setMinMaxNumberOfAllocations(Integer minNumberOfAllocations, Integer maxNum
         this.maxNumberOfAllocations = maxNumberOfAllocations;
     }
 
-    void setScaleToZeroPeriod(long inactivitySeconds) {
-        logger.info("setting scale to zero " + inactivitySeconds);
-        this.scaleToZeroAfterNoRequestsSeconds = inactivitySeconds;
-    }
-
     void process(AdaptiveAllocationsScalerService.Stats stats, double timeIntervalSeconds, int numberOfAllocations) {
         lastMeasuredQueueSize = stats.pendingCount();
         if (stats.requestCount() > 0) {
@@ -144,9 +139,6 @@ Double getInferenceTimeEstimate() {
 
     Integer scale() {
 
-        logger.info("[{}] checking scale.", deploymentId);
-        logger.info("[{}] to zero", scaleToZeroAfterNoRequestsSeconds);
-
         if (requestRateEstimator.hasValue() == false) {
             return null;
         }
@@ -180,7 +172,7 @@ Integer scale() {
 
             if (oldNumberOfAllocations != 0) {
                 // avoid logging this message if there is no change
-                logger.info("[{}] adaptive allocations scaler: scaling down to zero, because of no requests.", deploymentId);
+                logger.debug("[{}] adaptive allocations scaler: scaling down to zero, because of no requests.", deploymentId);
             }
             numberOfAllocations = 0;
             neededNumberOfAllocations = 0;
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScalerService.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScalerService.java
@@ -43,11 +43,8 @@
 import java.util.Set;
 import java.util.concurrent.ConcurrentSkipListSet;
 import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.concurrent.atomic.AtomicLong;
 import java.util.function.Function;
 
-import static org.elasticsearch.xpack.ml.MachineLearning.ADAPTIVE_ALLOCATIONS_SCALE_TO_ZERO_INTERVAL;
-
 /**
  * Periodically schedules adaptive allocations scaling. This process consists
  * of calling the trained model stats API, processing the results, determining
@@ -188,6 +185,12 @@ Collection<DoubleWithAttributes> observeDouble(Function<AdaptiveAllocationsScale
      */
     private static final long SCALE_UP_COOLDOWN_TIME_MILLIS = TimeValue.timeValueMinutes(5).getMillis();
 
+    /**
+     * The time interval without any requests that has to pass, before scaling down
+     * to zero allocations (in case min_allocations = 0).
+     */
+    private static final long SCALE_TO_ZERO_AFTER_NO_REQUESTS_TIME_SECONDS = TimeValue.timeValueMinutes(15).getSeconds();
+
     private static final Logger logger = LogManager.getLogger(AdaptiveAllocationsScalerService.class);
 
     private final int timeIntervalSeconds;
@@ -206,7 +209,7 @@ Collection<DoubleWithAttributes> observeDouble(Function<AdaptiveAllocationsScale
     private volatile Scheduler.Cancellable cancellable;
     private final AtomicBoolean busy;
 
-    private final AtomicLong scaleToZeroAfterNoRequestsSeconds = new AtomicLong();
+    private final long scaleToZeroAfterNoRequestsSeconds;
 
     private final Set<String> deploymentIdsWithInFlightScaleFromZeroRequests = new ConcurrentSkipListSet<>();
 
@@ -245,10 +248,7 @@ public AdaptiveAllocationsScalerService(
         scalers = new HashMap<>();
         metrics = new Metrics();
         busy = new AtomicBoolean(false);
-
-        setScaleToZeroPeriod(ADAPTIVE_ALLOCATIONS_SCALE_TO_ZERO_INTERVAL.get(clusterService.getSettings()));
-        clusterService.getClusterSettings()
-            .addSettingsUpdateConsumer(ADAPTIVE_ALLOCATIONS_SCALE_TO_ZERO_INTERVAL, this::setScaleToZeroPeriod);
+        scaleToZeroAfterNoRequestsSeconds = SCALE_TO_ZERO_AFTER_NO_REQUESTS_TIME_SECONDS;
     }
 
     public synchronized void start() {
@@ -295,7 +295,7 @@ private synchronized void updateAutoscalers(ClusterState state) {
                     key -> new AdaptiveAllocationsScaler(
                         assignment.getDeploymentId(),
                         assignment.totalTargetAllocations(),
-                        scaleToZeroAfterNoRequestsSeconds.get()
+                        scaleToZeroAfterNoRequestsSeconds
                     )
                 );
                 adaptiveAllocationsScaler.setMinMaxNumberOfAllocations(
@@ -336,8 +336,6 @@ private synchronized void stopScheduling() {
     }
 
     private void trigger() {
-        logger.info("trigger adaptive");
-
         if (busy.getAndSet(true)) {
             logger.debug("Skipping inference adaptive allocations scaling, because it's still busy.");
             return;
@@ -350,7 +348,6 @@ private void trigger() {
     }
 
     private void getDeploymentStats(ActionListener<GetDeploymentStatsAction.Response> processDeploymentStats) {
-        logger.info("get deployment stats");
         String deploymentIds = String.join(",", scalers.keySet());
         ClientHelper.executeAsyncWithOrigin(
             client,
@@ -472,17 +469,12 @@ private void updateNumberOfAllocations(
         );
     }
 
-    private void setScaleToZeroPeriod(TimeValue timeValue) {
-        logger.info("setting scaler service to zero " + timeValue);
-        scaleToZeroAfterNoRequestsSeconds.set(timeValue.seconds());
-    }
-
     private ActionListener<CreateTrainedModelAssignmentAction.Response> updateAssigmentListener(
         String deploymentId,
         int numberOfAllocations
     ) {
         return ActionListener.wrap(updateResponse -> {
-            logger.info("adaptive allocations scaler: scaled [{}] to [{}] allocations.", deploymentId, numberOfAllocations);
+            logger.debug("adaptive allocations scaler: scaled [{}] to [{}] allocations.", deploymentId, numberOfAllocations);
             threadPool.executor(MachineLearning.UTILITY_THREAD_POOL_NAME)
                 .execute(
                     () -> inferenceAuditor.info(
diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScalerServiceTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/adaptiveallocations/AdaptiveAllocationsScalerServiceTests.java