review

davidkyle · davidkyle · commit e9f172394b7c · 2024-10-14T20:44:34.000+01:00
diff --git a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/AdaptiveAllocationsScaleFromZeroIT.java b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/AdaptiveAllocationsScaleFromZeroIT.java
@@ -23,6 +23,7 @@
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.TimeUnit;
 
+import static org.hamcrest.Matchers.empty;
 import static org.hamcrest.Matchers.hasSize;
 import static org.hamcrest.Matchers.is;
 import static org.hamcrest.Matchers.not;
@@ -93,9 +94,7 @@ public void onFailure(Exception exception) {
         }
 
         latch.await();
-        if (failures.isEmpty() == false) {
-            fail(failures.getFirst());
-        }
+        assertThat(failures, empty());
     }
 
     @SuppressWarnings("unchecked")
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportInternalInferModelAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportInternalInferModelAction.java
@@ -71,7 +71,7 @@ public class TransportInternalInferModelAction extends HandledTransportAction<Re
     private final XPackLicenseState licenseState;
     private final TrainedModelProvider trainedModelProvider;
     private final AdaptiveAllocationsScalerService adaptiveAllocationsScalerService;
-    private final InferenceWaitForAllocation scalingInference;
+    private final InferenceWaitForAllocation waitForAllocation;
     private final ThreadPool threadPool;
 
     TransportInternalInferModelAction(
@@ -94,7 +94,7 @@ public class TransportInternalInferModelAction extends HandledTransportAction<Re
         this.licenseState = licenseState;
         this.trainedModelProvider = trainedModelProvider;
         this.adaptiveAllocationsScalerService = adaptiveAllocationsScalerService;
-        this.scalingInference = new InferenceWaitForAllocation(assignmentService, this::inferOnBlockedRequest);
+        this.waitForAllocation = new InferenceWaitForAllocation(assignmentService, this::inferOnBlockedRequest);
         this.threadPool = threadPool;
     }
 
@@ -280,7 +280,7 @@ private void inferAgainstAllocatedModel(
             if (starting) {
                 message += "; starting deployment of one allocation";
                 logger.info(message);
-                scalingInference.waitForAssignment(
+                waitForAllocation.waitForAssignment(
                     new InferenceWaitForAllocation.WaitingRequest(request, responseBuilder, parentTaskId, listener)
                 );
                 return;
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/InferenceWaitForAllocation.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/InferenceWaitForAllocation.java
@@ -85,7 +85,8 @@ public InferenceWaitForAllocation(
      */
     public synchronized void waitForAssignment(WaitingRequest request) {
         logger.info("waitForAssignment will wait for condition");
-        if (pendingRequestCount.get() > MAX_PENDING_REQUEST_COUNT) {
+        if (pendingRequestCount.incrementAndGet() >= MAX_PENDING_REQUEST_COUNT) {
+            pendingRequestCount.decrementAndGet();
             request.listener.onFailure(
                 new ElasticsearchStatusException(
                     "Rejected inference request waiting for an allocation of deployment [{}]. Too many pending requests",
@@ -96,7 +97,6 @@ public synchronized void waitForAssignment(WaitingRequest request) {
             return;
         }
 
-        pendingRequestCount.incrementAndGet();
         var predicate = new DeploymentHasAtLeastOneAllocation(request.deploymentId());
 
         assignmentService.waitForAssignmentCondition(