[8.x] Limit the number of tasks that a single search can submit (#115932)

javanna · jpountz · web-flow · commit c20d2fce5c8b · 2024-10-30T19:58:54.000+01:00
Since we removed the search workers thread pool with #111099, we execute many more tasks in the search thread pool, given that each shard search request parallelizes across slices or even segments (knn query rewrite. There are also rare situations where segment level tasks may parallelize further (e.g. createWeight), that cause the creation of many many tasks for a single top-level request. These are rather small tasks that previously queued up in the unbounded search workers queue. With recent improvements in Lucene, these tasks queue up in the search queue, yet they get executed by the caller thread while they are still in the queue, and remain in the queue as no-op until they are pulled out of the queue. We have protection against rejections based on turning off search concurrency when we have more than maxPoolSize items in the queue, yet that is not enough if enough parallel requests see an empty queue and manage to submit enough tasks to fill the queue at once. That will cause rejections for top-level searches that should not be rejected. This commit introduces wrapping for the executor to limit the number of tasks that a single search instance can submit to the executor, to prevent the situation where a single search submits way more tasks than threads available. Co-authored-by: Adrien Grand <jpountz@gmail.com>
diff --git a/docs/changelog/115932.yaml b/docs/changelog/115932.yaml
@@ -0,0 +1,5 @@
+pr: 115932
+summary: "[8.x] Limit the number of tasks that a single search can submit"
+area: Search
+type: bug
+issues: []
diff --git a/server/src/main/java/org/elasticsearch/search/DefaultSearchContext.java b/server/src/main/java/org/elasticsearch/search/DefaultSearchContext.java
@@ -87,6 +87,7 @@
 import java.util.TreeSet;
 import java.util.concurrent.Executor;
 import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.atomic.AtomicInteger;
 import java.util.function.LongSupplier;
 import java.util.function.ToLongFunction;
 
@@ -202,7 +203,7 @@ final class DefaultSearchContext extends SearchContext {
                     engineSearcher.getQueryCache(),
                     engineSearcher.getQueryCachingPolicy(),
                     lowLevelCancellation,
-                    executor,
+                    wrapExecutor(executor),
                     maximumNumberOfSlices,
                     minimumDocsPerSlice
                 );
@@ -229,6 +230,36 @@ final class DefaultSearchContext extends SearchContext {
         }
     }
 
+    private static Executor wrapExecutor(Executor executor) {
+        if (executor instanceof ThreadPoolExecutor tpe) {
+            // let this searcher fork to a limited maximum number of tasks, to protect against situations where Lucene may
+            // submit too many segment level tasks. With enough parallel search requests and segments per shards, they may all see
+            // an empty queue and start parallelizing, filling up the queue very quickly and causing rejections, due to
+            // many small tasks in the queue that become no-op because the active caller thread will execute them instead.
+            // Note that despite all tasks are completed, TaskExecutor#invokeAll leaves the leftover no-op tasks in queue hence
+            // they contribute to the queue size until they are removed from it.
+            AtomicInteger segmentLevelTasks = new AtomicInteger(0);
+            return command -> {
+                if (segmentLevelTasks.incrementAndGet() > tpe.getMaximumPoolSize()) {
+                    try {
+                        command.run();
+                    } finally {
+                        segmentLevelTasks.decrementAndGet();
+                    }
+                } else {
+                    executor.execute(() -> {
+                        try {
+                            command.run();
+                        } finally {
+                            segmentLevelTasks.decrementAndGet();
+                        }
+                    });
+                }
+            };
+        }
+        return executor;
+    }
+
     static long getFieldCardinality(String field, IndexService indexService, DirectoryReader directoryReader) {
         MappedFieldType mappedFieldType = indexService.mapperService().fieldType(field);
         if (mappedFieldType == null) {
@@ -290,6 +321,8 @@ static int determineMaximumNumberOfSlices(
         boolean enableQueryPhaseParallelCollection,
         ToLongFunction<String> fieldCardinality
     ) {
+        // Note: although this method refers to parallel collection, it affects any kind of parallelism, including query rewrite,
+        // given that if 1 is the returned value, no executor is provided to the searcher.
         return executor instanceof ThreadPoolExecutor tpe
             && tpe.getQueue().size() <= tpe.getMaximumPoolSize()
             && isParallelCollectionSupportedForResults(resultsType, request.source(), fieldCardinality, enableQueryPhaseParallelCollection)
diff --git a/server/src/test/java/org/elasticsearch/search/DefaultSearchContextTests.java b/server/src/test/java/org/elasticsearch/search/DefaultSearchContextTests.java
@@ -35,6 +35,7 @@
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.util.concurrent.EsExecutors;
 import org.elasticsearch.common.util.concurrent.ThreadContext;
+import org.elasticsearch.core.SuppressForbidden;
 import org.elasticsearch.core.TimeValue;
 import org.elasticsearch.index.IndexService;
 import org.elasticsearch.index.IndexSettings;
@@ -78,17 +79,33 @@
 import org.elasticsearch.xcontent.XContentBuilder;
 
 import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
 import java.util.UUID;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Executor;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.FutureTask;
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.RunnableFuture;
 import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
 import java.util.function.Function;
 import java.util.function.Supplier;
 import java.util.function.ToLongFunction;
 
 import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.greaterThanOrEqualTo;
 import static org.hamcrest.Matchers.instanceOf;
 import static org.hamcrest.Matchers.is;
+import static org.hamcrest.Matchers.lessThan;
 import static org.mockito.ArgumentMatchers.any;
 import static org.mockito.ArgumentMatchers.anyString;
 import static org.mockito.ArgumentMatchers.eq;
@@ -959,11 +976,161 @@ public void testGetFieldCardinalityRuntimeField() {
         assertEquals(-1, DefaultSearchContext.getFieldCardinality("field", indexService, null));
     }
 
+    public void testSingleThreadNoSearchConcurrency() throws IOException, ExecutionException, InterruptedException {
+        // with a single thread in the pool the max number of slices will always be 1, hence we won't provide the executor to the searcher
+        int executorPoolSize = 1;
+        int numIters = randomIntBetween(10, 50);
+        int numSegmentTasks = randomIntBetween(50, 100);
+        AtomicInteger completedTasks = new AtomicInteger(0);
+        ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(executorPoolSize);
+        try {
+            doTestSearchConcurrency(executor, numIters, numSegmentTasks, completedTasks);
+        } finally {
+            terminate(executor);
+        }
+        // Tasks are still created, but the internal executor is a direct one hence there is no parallelism in practice
+        assertEquals((long) numIters * numSegmentTasks + numIters, completedTasks.get());
+        assertEquals(numIters, executor.getCompletedTaskCount());
+    }
+
+    @SuppressForbidden(reason = "need to provide queue to ThreadPoolExecutor")
+    public void testNoSearchConcurrencyWhenQueueing() throws IOException, ExecutionException, InterruptedException {
+        // with multiple threads, but constant queueing, the max number of slices will always be 1, hence we won't provide the
+        // executor to the searcher
+        int executorPoolSize = randomIntBetween(2, 5);
+        int numIters = randomIntBetween(10, 50);
+        int numSegmentTasks = randomIntBetween(50, 100);
+        AtomicInteger completedTasks = new AtomicInteger(0);
+        final AtomicBoolean terminating = new AtomicBoolean(false);
+        LinkedBlockingQueue<Runnable> queue = new LinkedBlockingQueue<>() {
+            @Override
+            public int size() {
+                // for the purpose of this test we pretend that we always have more items in the queue than threads, but we need to revert
+                // to normal behaviour to ensure graceful shutdown
+                if (terminating.get()) {
+                    return super.size();
+                }
+                return randomIntBetween(executorPoolSize + 1, Integer.MAX_VALUE);
+            }
+        };
+        ThreadPoolExecutor executor = new ThreadPoolExecutor(executorPoolSize, executorPoolSize, 0L, TimeUnit.MILLISECONDS, queue);
+        try {
+            doTestSearchConcurrency(executor, numIters, numSegmentTasks, completedTasks);
+            terminating.set(true);
+        } finally {
+            terminate(executor);
+        }
+        // Tasks are still created, but the internal executor is a direct one hence there is no parallelism in practice
+        assertEquals((long) numIters * numSegmentTasks + numIters, completedTasks.get());
+        assertEquals(numIters, executor.getCompletedTaskCount());
+    }
+
+    @SuppressForbidden(reason = "need to provide queue to ThreadPoolExecutor")
+    public void testSearchConcurrencyDoesNotCreateMoreTasksThanThreads() throws Exception {
+        // with multiple threads, but not enough queueing to disable parallelism, we will provide the executor to the searcher
+        int executorPoolSize = randomIntBetween(2, 5);
+        int numIters = randomIntBetween(10, 50);
+        int numSegmentTasks = randomIntBetween(50, 100);
+        AtomicInteger completedTasks = new AtomicInteger(0);
+        final AtomicBoolean terminating = new AtomicBoolean(false);
+        LinkedBlockingQueue<Runnable> queue = new LinkedBlockingQueue<>() {
+            @Override
+            public int size() {
+                int size = super.size();
+                // for the purpose of this test we pretend that we only ever have as many items in the queue as number of threads, but we
+                // need to revert to normal behaviour to ensure graceful shutdown
+                if (size <= executorPoolSize || terminating.get()) {
+                    return size;
+                }
+                return randomIntBetween(0, executorPoolSize);
+            }
+        };
+        ThreadPoolExecutor executor = new ThreadPoolExecutor(executorPoolSize, executorPoolSize, 0L, TimeUnit.MILLISECONDS, queue);
+        try {
+            doTestSearchConcurrency(executor, numIters, numSegmentTasks, completedTasks);
+            terminating.set(true);
+        } finally {
+            terminate(executor);
+        }
+        // make sure that we do parallelize execution: each operation will use at minimum as many tasks as threads available
+        assertThat(executor.getCompletedTaskCount(), greaterThanOrEqualTo((long) numIters * executorPoolSize));
+        // while we parallelize we also limit the number of tasks that each searcher submits
+        assertThat(executor.getCompletedTaskCount(), lessThan((long) numIters * numSegmentTasks));
+        // *3 is just a wild guess to account for tasks that get executed while we are still submitting
+        assertThat(executor.getCompletedTaskCount(), lessThan((long) numIters * executorPoolSize * 3));
+    }
+
+    private void doTestSearchConcurrency(ThreadPoolExecutor executor, int numIters, int numSegmentTasks, AtomicInteger completedTasks)
+        throws IOException, ExecutionException, InterruptedException {
+        DefaultSearchContext[] contexts = new DefaultSearchContext[numIters];
+        for (int i = 0; i < numIters; i++) {
+            contexts[i] = createDefaultSearchContext(executor, randomFrom(SearchService.ResultsType.DFS, SearchService.ResultsType.QUERY));
+        }
+        List<Future<?>> futures = new ArrayList<>(numIters);
+        try {
+            for (int i = 0; i < numIters; i++) {
+                // simulate multiple concurrent search operations that parallelize each their execution across many segment level tasks
+                // via Lucene's TaskExecutor. Segment level tasks are never rejected (they execute on the caller upon rejection), but
+                // the top-level execute call is subject to rejection once the queue is filled with segment level tasks. That is why
+                // we want to limit the number of tasks that each search can parallelize to
+                // NOTE: DefaultSearchContext does not provide the executor to the searcher once it sees maxPoolSize items in the queue.
+                DefaultSearchContext searchContext = contexts[i];
+                AtomicInteger segmentTasksCompleted = new AtomicInteger(0);
+                RunnableFuture<Void> task = new FutureTask<>(() -> {
+                    Collection<Callable<Void>> tasks = new ArrayList<>();
+                    for (int j = 0; j < numSegmentTasks; j++) {
+                        tasks.add(() -> {
+                            segmentTasksCompleted.incrementAndGet();
+                            completedTasks.incrementAndGet();
+                            return null;
+                        });
+                    }
+                    try {
+                        searchContext.searcher().getTaskExecutor().invokeAll(tasks);
+                        // TODO additional calls to invokeAll
+
+                        // invokeAll is blocking, hence at this point we are done executing all the sub-tasks, but the queue may
+                        // still be filled up with no-op leftover tasks
+                        assertEquals(numSegmentTasks, segmentTasksCompleted.get());
+                    } catch (IOException e) {
+                        throw new UncheckedIOException(e);
+                    } finally {
+                        completedTasks.incrementAndGet();
+                    }
+                    return null;
+                });
+                futures.add(task);
+                executor.execute(task);
+            }
+            for (Future<?> future : futures) {
+                future.get();
+            }
+        } finally {
+            for (DefaultSearchContext searchContext : contexts) {
+                searchContext.indexShard().getThreadPool().shutdown();
+                searchContext.close();
+            }
+        }
+    }
+
+    private DefaultSearchContext createDefaultSearchContext(Executor executor, SearchService.ResultsType resultsType) throws IOException {
+        return createDefaultSearchContext(Settings.EMPTY, null, executor, resultsType);
+    }
+
     private DefaultSearchContext createDefaultSearchContext(Settings providedIndexSettings) throws IOException {
         return createDefaultSearchContext(providedIndexSettings, null);
     }
 
     private DefaultSearchContext createDefaultSearchContext(Settings providedIndexSettings, XContentBuilder mappings) throws IOException {
+        return createDefaultSearchContext(providedIndexSettings, mappings, null, randomFrom(SearchService.ResultsType.values()));
+    }
+
+    private DefaultSearchContext createDefaultSearchContext(
+        Settings providedIndexSettings,
+        XContentBuilder mappings,
+        Executor executor,
+        SearchService.ResultsType resultsType
+    ) throws IOException {
         TimeValue timeout = new TimeValue(randomIntBetween(1, 100));
         ShardSearchRequest shardSearchRequest = mock(ShardSearchRequest.class);
         when(shardSearchRequest.searchType()).thenReturn(SearchType.DEFAULT);
@@ -1047,9 +1214,9 @@ protected Engine.Searcher acquireSearcherInternal(String source) {
                 timeout,
                 null,
                 false,
-                null,
-                randomFrom(SearchService.ResultsType.values()),
-                randomBoolean(),
+                executor,
+                resultsType,
+                executor != null || randomBoolean(),
                 randomInt()
             );
         }