diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/RequestExecutorService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/RequestExecutorService.java index e3fff14bf95d7..cbf823d96d8d9 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/RequestExecutorService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/RequestExecutorService.java @@ -449,10 +449,6 @@ public synchronized TimeValue executeEnqueuedTask() { } private TimeValue executeEnqueuedTaskInternal() { - var timeBeforeAvailableToken = rateLimiter.timeToReserve(1); - if (shouldExecuteImmediately(timeBeforeAvailableToken) == false) { - return timeBeforeAvailableToken; - } var task = queue.poll(); @@ -460,16 +456,16 @@ private TimeValue executeEnqueuedTaskInternal() { // So we'll need to check for null and call a helper method executePreparedTasks() if (shouldExecuteTask(task) == false) { - return NO_TASKS_AVAILABLE; + logger.warn( + "not executing task [{}] because it is null or has already completed", + task == null ? "null" : task.getRequestManager().inferenceEntityId() + ); + return TimeValue.ZERO; } - // We should never have to wait because we checked above - var reserveRes = rateLimiter.reserve(1); - assert shouldExecuteImmediately(reserveRes) : "Reserving request tokens required a sleep when it should not have"; - task.getRequestManager() .execute(task.getInferenceInputs(), requestSender, task.getRequestCompletedFunction(), task.getListener()); - return EXECUTED_A_TASK; + return TimeValue.ZERO; } private static boolean shouldExecuteTask(RejectableTask task) {