elastic · javanna · Oct 25, 2024 · Oct 25, 2024 · Oct 29, 2024 · Oct 29, 2024
diff --git a/docs/changelog/115668.yaml b/docs/changelog/115668.yaml
@@ -0,0 +1,5 @@
+pr: 115668
+summary: Limit the number of tasks that a single search can submit
+area: Search
+type: bug
+issues: []
diff --git a/server/src/main/java/org/elasticsearch/search/DefaultSearchContext.java b/server/src/main/java/org/elasticsearch/search/DefaultSearchContext.java
@@ -87,6 +87,7 @@
 import java.util.TreeSet;
 import java.util.concurrent.Executor;
 import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.atomic.AtomicInteger;
 import java.util.function.LongSupplier;
 import java.util.function.ToLongFunction;
 
@@ -202,7 +203,7 @@ final class DefaultSearchContext extends SearchContext {
                     engineSearcher.getQueryCache(),
                     engineSearcher.getQueryCachingPolicy(),
                     lowLevelCancellation,
-                    executor,
+                    wrapExecutor(executor),
                     maximumNumberOfSlices,
                     minimumDocsPerSlice
                 );
@@ -229,6 +230,32 @@ final class DefaultSearchContext extends SearchContext {
         }
     }
 
+    private static Executor wrapExecutor(Executor executor) {
+        if (executor instanceof ThreadPoolExecutor tpe) {
+            // let this searcher fork to a limited maximum number of tasks, to protect against situations where Lucene may
+            // submit too many segment level tasks. With enough parallel search requests and segments per shards, they may all see
+            // an empty queue and start parallelizing, filling up the queue very quickly and causing rejections, due to
+            // many small tasks in the queue that become no-op because the active caller thread will execute them instead.
+            // Note that despite all tasks are completed, TaskExecutor#invokeAll leaves the leftover no-op tasks in queue hence
+            // they contribute to the queue size until they are removed from it.
+            AtomicInteger segmentLevelTasks = new AtomicInteger(0);
+            return command -> {
+                if (segmentLevelTasks.incrementAndGet() > tpe.getMaximumPoolSize()) {
+                    command.run();
+                } else {
+                    executor.execute(() -> {
+                        try {
+                            command.run();
+                        } finally {
+                            segmentLevelTasks.decrementAndGet();
+                        }
+                    });
+                }
+            };
+        }
+        return executor;
+    }
+
     static long getFieldCardinality(String field, IndexService indexService, DirectoryReader directoryReader) {
         MappedFieldType mappedFieldType = indexService.mapperService().fieldType(field);
         if (mappedFieldType == null) {
@@ -290,6 +317,8 @@ static int determineMaximumNumberOfSlices(
         boolean enableQueryPhaseParallelCollection,
         ToLongFunction<String> fieldCardinality
     ) {
+        // Note: although this method refers to parallel collection, it affects any kind of parallelism, including query rewrite,
+        // given that if 1 is the returned value, no executor is provided to the searcher.
         return executor instanceof ThreadPoolExecutor tpe
             && tpe.getQueue().size() <= tpe.getMaximumPoolSize()
             && isParallelCollectionSupportedForResults(resultsType, request.source(), fieldCardinality, enableQueryPhaseParallelCollection)

diff --git a/server/src/test/java/org/elasticsearch/search/DefaultSearchContextTests.java b/server/src/test/java/org/elasticsearch/search/DefaultSearchContextTests.java
@@ -35,6 +35,7 @@
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.util.concurrent.EsExecutors;
 import org.elasticsearch.common.util.concurrent.ThreadContext;
+import org.elasticsearch.core.SuppressForbidden;
 import org.elasticsearch.core.TimeValue;
 import org.elasticsearch.index.IndexService;
 import org.elasticsearch.index.IndexSettings;
@@ -78,17 +79,33 @@
 import org.elasticsearch.xcontent.XContentBuilder;
 
 import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
 import java.util.UUID;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Executor;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.FutureTask;
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.RunnableFuture;
 import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
 import java.util.function.Function;
 import java.util.function.Supplier;
 import java.util.function.ToLongFunction;
 
 import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.greaterThanOrEqualTo;
 import static org.hamcrest.Matchers.instanceOf;
 import static org.hamcrest.Matchers.is;
+import static org.hamcrest.Matchers.lessThan;
 import static org.mockito.ArgumentMatchers.any;
 import static org.mockito.ArgumentMatchers.anyString;
 import static org.mockito.ArgumentMatchers.eq;
@@ -959,11 +976,161 @@ public void testGetFieldCardinalityRuntimeField() {
         assertEquals(-1, DefaultSearchContext.getFieldCardinality("field", indexService, null));
     }
 
+    public void testSingleThreadNoSearchConcurrency() throws IOException, ExecutionException, InterruptedException {
+        // with a single thread in the pool the max number of slices will always be 1, hence we won't provide the executor to the searcher
+        int executorPoolSize = 1;
+        int numIters = randomIntBetween(10, 50);
+        int numSegmentTasks = randomIntBetween(50, 100);
+        AtomicInteger completedTasks = new AtomicInteger(0);
+        ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(executorPoolSize);
+        try {
+            doTestSearchConcurrency(executor, numIters, numSegmentTasks, completedTasks);
+        } finally {
+            terminate(executor);
+        }
+        // Tasks are still created, but the internal executor is a direct one hence there is no parallelism in practice
+        assertEquals((long) numIters * numSegmentTasks + numIters, completedTasks.get());
+        assertEquals(numIters, executor.getCompletedTaskCount());
+    }
+
+    @SuppressForbidden(reason = "need to provide queue to ThreadPoolExecutor")
+    public void testNoSearchConcurrencyWhenQueueing() throws IOException, ExecutionException, InterruptedException {
+        // with multiple threads, but constant queueing, the max number of slices will always be 1, hence we won't provide the
+        // executor to the searcher
+        int executorPoolSize = randomIntBetween(2, 5);
+        int numIters = randomIntBetween(10, 50);
+        int numSegmentTasks = randomIntBetween(50, 100);
+        AtomicInteger completedTasks = new AtomicInteger(0);
+        final AtomicBoolean terminating = new AtomicBoolean(false);
+        LinkedBlockingQueue<Runnable> queue = new LinkedBlockingQueue<>() {
+            @Override
+            public int size() {
+                // for the purpose of this test we pretend that we always have more items in the queue than threads, but we need to revert
+                // to normal behaviour to ensure graceful shutdown
+                if (terminating.get()) {
+                    return super.size();
+                }
+                return randomIntBetween(executorPoolSize + 1, Integer.MAX_VALUE);
+            }
+        };
+        ThreadPoolExecutor executor = new ThreadPoolExecutor(executorPoolSize, executorPoolSize, 0L, TimeUnit.MILLISECONDS, queue);
+        try {
+            doTestSearchConcurrency(executor, numIters, numSegmentTasks, completedTasks);
+            terminating.set(true);
+        } finally {
+            terminate(executor);
+        }
+        // Tasks are still created, but the internal executor is a direct one hence there is no parallelism in practice
+        assertEquals((long) numIters * numSegmentTasks + numIters, completedTasks.get());
+        assertEquals(numIters, executor.getCompletedTaskCount());
+    }
+
+    @SuppressForbidden(reason = "need to provide queue to ThreadPoolExecutor")
+    public void testSearchConcurrencyDoesNotCreateMoreTasksThanThreads() throws Exception {
+        // with multiple threads, but not enough queueing to disable parallelism, we will provide the executor to the searcher
+        int executorPoolSize = randomIntBetween(2, 5);
+        int numIters = randomIntBetween(10, 50);
+        int numSegmentTasks = randomIntBetween(50, 100);
+        AtomicInteger completedTasks = new AtomicInteger(0);
+        final AtomicBoolean terminating = new AtomicBoolean(false);
+        LinkedBlockingQueue<Runnable> queue = new LinkedBlockingQueue<>() {
+            @Override
+            public int size() {
+                int size = super.size();
+                // for the purpose of this test we pretend that we only ever have as many items in the queue as number of threads, but we
+                // need to revert to normal behaviour to ensure graceful shutdown
+                if (size <= executorPoolSize || terminating.get()) {
+                    return size;
+                }
+                return randomIntBetween(0, executorPoolSize);
+            }
+        };
+        ThreadPoolExecutor executor = new ThreadPoolExecutor(executorPoolSize, executorPoolSize, 0L, TimeUnit.MILLISECONDS, queue);
+        try {
+            doTestSearchConcurrency(executor, numIters, numSegmentTasks, completedTasks);
+            terminating.set(true);
+        } finally {
+            terminate(executor);
+        }
+        // make sure that we do parallelize execution: each operation will use at minimum as many tasks as threads available
+        assertThat(executor.getCompletedTaskCount(), greaterThanOrEqualTo((long) numIters * executorPoolSize));
+        // while we parallelize we also limit the number of tasks that each searcher submits
+        assertThat(executor.getCompletedTaskCount(), lessThan((long) numIters * numSegmentTasks));
+        // *2 is just a wild guess to account for tasks that get executed while we are still submitting
+        assertThat(executor.getCompletedTaskCount(), lessThan((long) numIters * executorPoolSize * 2));
+    }
+
+    private void doTestSearchConcurrency(ThreadPoolExecutor executor, int numIters, int numSegmentTasks, AtomicInteger completedTasks)
+        throws IOException, ExecutionException, InterruptedException {
+        DefaultSearchContext[] contexts = new DefaultSearchContext[numIters];
+        for (int i = 0; i < numIters; i++) {
+            contexts[i] = createDefaultSearchContext(executor, randomFrom(SearchService.ResultsType.DFS, SearchService.ResultsType.QUERY));
+        }
+        List<Future<?>> futures = new ArrayList<>(numIters);
+        try {
+            for (int i = 0; i < numIters; i++) {
+                // simulate multiple concurrent search operations that parallelize each their execution across many segment level tasks
+                // via Lucene's TaskExecutor. Segment level tasks are never rejected (they execute on the caller upon rejection), but
+                // the top-level execute call is subject to rejection once the queue is filled with segment level tasks. That is why
+                // we want to limit the number of tasks that each search can parallelize to
+                // NOTE: DefaultSearchContext does not provide the executor to the searcher once it sees maxPoolSize items in the queue.
+                DefaultSearchContext searchContext = contexts[i];
+                AtomicInteger segmentTasksCompleted = new AtomicInteger(0);
+                RunnableFuture<Void> task = new FutureTask<>(() -> {
+                    Collection<Callable<Void>> tasks = new ArrayList<>();
+                    for (int j = 0; j < numSegmentTasks; j++) {
+                        tasks.add(() -> {
+                            segmentTasksCompleted.incrementAndGet();
+                            completedTasks.incrementAndGet();
+                            return null;
+                        });
+                    }
+                    try {
+                        searchContext.searcher().getTaskExecutor().invokeAll(tasks);
+                        // TODO additional calls to invokeAll
+
+                        // invokeAll is blocking, hence at this point we are done executing all the sub-tasks, but the queue may
+                        // still be filled up with no-op leftover tasks
+                        assertEquals(numSegmentTasks, segmentTasksCompleted.get());
+                    } catch (IOException e) {
+                        throw new UncheckedIOException(e);
+                    } finally {
+                        completedTasks.incrementAndGet();
+                    }
+                    return null;
+                });
+                futures.add(task);
+                executor.execute(task);
+            }
+            for (Future<?> future : futures) {
+                future.get();
+            }
+        } finally {
+            for (DefaultSearchContext searchContext : contexts) {
+                searchContext.indexShard().getThreadPool().shutdown();
+                searchContext.close();
+            }
+        }
+    }
+
+    private DefaultSearchContext createDefaultSearchContext(Executor executor, SearchService.ResultsType resultsType) throws IOException {
+        return createDefaultSearchContext(Settings.EMPTY, null, executor, resultsType);
+    }
+
     private DefaultSearchContext createDefaultSearchContext(Settings providedIndexSettings) throws IOException {
         return createDefaultSearchContext(providedIndexSettings, null);
     }
 
     private DefaultSearchContext createDefaultSearchContext(Settings providedIndexSettings, XContentBuilder mappings) throws IOException {
+        return createDefaultSearchContext(providedIndexSettings, mappings, null, randomFrom(SearchService.ResultsType.values()));
+    }
+
+    private DefaultSearchContext createDefaultSearchContext(
+        Settings providedIndexSettings,
+        XContentBuilder mappings,
+        Executor executor,
+        SearchService.ResultsType resultsType
+    ) throws IOException {
         TimeValue timeout = new TimeValue(randomIntBetween(1, 100));
         ShardSearchRequest shardSearchRequest = mock(ShardSearchRequest.class);
         when(shardSearchRequest.searchType()).thenReturn(SearchType.DEFAULT);
@@ -1047,9 +1214,9 @@ protected Engine.Searcher acquireSearcherInternal(String source) {
                 timeout,
                 null,
                 false,
-                null,
-                randomFrom(SearchService.ResultsType.values()),
-                randomBoolean(),
+                executor,
+                resultsType,
+                executor != null || randomBoolean(),
                 randomInt()
             );
         }