From f4a1a81e51a4211b7943496e21cf9344c0a163bc Mon Sep 17 00:00:00 2001 From: Dimitris Rempapis Date: Tue, 9 Sep 2025 13:43:27 +0300 Subject: [PATCH 01/32] AsyncSearch: make MutableSearchResponse ref-counted --- ...MutableSearchResponseRefCountingTests.java | 160 ++++++++++++++++++ .../xpack/search/AsyncSearchTask.java | 8 +- .../xpack/search/MutableSearchResponse.java | 9 +- 3 files changed, 174 insertions(+), 3 deletions(-) create mode 100644 x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/MutableSearchResponseRefCountingTests.java diff --git a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/MutableSearchResponseRefCountingTests.java b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/MutableSearchResponseRefCountingTests.java new file mode 100644 index 0000000000000..ae3d0632606c0 --- /dev/null +++ b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/MutableSearchResponseRefCountingTests.java @@ -0,0 +1,160 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ +package org.elasticsearch.xpack.search; + +import org.elasticsearch.ElasticsearchStatusException; +import org.elasticsearch.ExceptionsHelper; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.action.search.ShardSearchFailure; +import org.elasticsearch.common.lucene.Lucene; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.rest.RestStatus; +import org.elasticsearch.search.SearchHits; +import org.elasticsearch.search.aggregations.AggregationReduceContext; +import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; +import org.elasticsearch.tasks.TaskId; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.client.NoOpClient; +import org.elasticsearch.threadpool.TestThreadPool; +import org.elasticsearch.xpack.core.async.AsyncExecutionId; +import org.elasticsearch.xpack.core.search.action.AsyncSearchResponse; +import org.junit.After; +import org.junit.Before; + +import java.lang.reflect.Field; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.util.Map; + +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.is; + +public class MutableSearchResponseRefCountingTests extends ESTestCase { + + private TestThreadPool threadPool; + private NoOpClient client; + + @Before + public void setup() { + this.threadPool = new TestThreadPool(getTestName()); + this.client = new NoOpClient(threadPool); + } + + @After + public void cleanup() throws Exception { + terminate(threadPool); + } + + public void testBuildSucceedsIfAnotherThreadHoldsRef() { + final int totalShards = 1; + final int skippedShards = 0; + + // Build a SearchResponse (sr refCount -> 1) + SearchResponse searchResponse = createSearchResponse(totalShards, totalShards, skippedShards); + + // Take a ref - (msr refCount -> 1, sr refCount -> 2) + MutableSearchResponse msr = new MutableSearchResponse(threadPool.getThreadContext()); + msr.updateShardsAndClusters(totalShards, skippedShards, null); + msr.updateFinalResponse(searchResponse, false); + + searchResponse.decRef(); // sr refCount -> 1 + + //Simulate another thread : take a resource (msr refCount -> 2) + msr.incRef(); + // close resource (msr refCount -> 1) -> closeInternal not called yet + msr.decRef(); + + // Build a response + AsyncSearchResponse resp = msr.toAsyncSearchResponse(createAsyncSearchTask(), + System.currentTimeMillis() + 60_000, /*restoreResponseHeaders*/ + false); + try { + assertNotNull("Expect SearchResponse when a live ref prevents close", resp.getSearchResponse()); + assertNull("No failure expected while ref is held", resp.getFailure()); + assertFalse("Response should not be marked running", resp.isRunning()); + } finally { + resp.decRef(); + } + + // Release msr (msr refCount -> 0, sr refCount -> 0) -> now calling closeInternal + msr.decRef(); + } + + + public void testGetResponseAfterCloseReturnsGone () throws Exception { + final int totalShards = 1; + final int skippedShards = 0; + + // Build a SearchResponse (sr refCount -> 1) + SearchResponse searchResponse = createSearchResponse(totalShards, totalShards, skippedShards); + + // Create an AsyncSearchTask + AsyncSearchTask task = createAsyncSearchTask(); + + // Get response instance and method from task + Field f = AsyncSearchTask.class.getDeclaredField("searchResponse"); + f.setAccessible(true); + Method m = AsyncSearchTask.class.getDeclaredMethod("getResponseWithHeaders"); + m.setAccessible(true); + + // Take a ref - (msr refCount -> 1, sr refCount -> 2) + MutableSearchResponse msr = (MutableSearchResponse) f.get(task); + msr.updateShardsAndClusters(totalShards, skippedShards, null); + msr.updateFinalResponse(searchResponse, false); + + searchResponse.decRef(); // sr ref -> 1 + msr.decRef(); // msr ref -> 0 -> closeInternal() -> sr ref -> 0 + + // Invoke getResponseWithHeaders and expect GONE exception + InvocationTargetException ite = expectThrows(InvocationTargetException.class, () -> { + AsyncSearchResponse resp = (AsyncSearchResponse) m.invoke(task); + if (resp != null) { + resp.decRef(); + } + }); + + Throwable cause = ExceptionsHelper.unwrapCause(ite.getCause()); + assertThat(cause, instanceOf(ElasticsearchStatusException.class)); + assertThat(ExceptionsHelper.status(cause), is(RestStatus.GONE)); + } + + private AsyncSearchTask createAsyncSearchTask() { + return new AsyncSearchTask( + 1L, + "search", + "indices:data/read/search", + TaskId.EMPTY_TASK_ID, + () -> "debug", + TimeValue.timeValueMinutes(1), + Map.of(), + Map.of(), + new AsyncExecutionId("debug", new TaskId("node", 1L)), + client, + threadPool, + isCancelled -> () -> new AggregationReduceContext.ForFinal(null, null, null, null, null, PipelineAggregator.PipelineTree.EMPTY) + ); + } + + private SearchResponse createSearchResponse(int totalShards, int successfulShards, int skippedShards) { + return new SearchResponse( + SearchHits.empty(Lucene.TOTAL_HITS_GREATER_OR_EQUAL_TO_ZERO, Float.NaN), + null, + null, + false, + false, + null, + 0, + null, + totalShards, + successfulShards, + skippedShards, + 1L, + ShardSearchFailure.EMPTY_ARRAY, + null + ); + } +} diff --git a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java index d14b60f7b77f8..abc83416bbce4 100644 --- a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java +++ b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java @@ -26,6 +26,7 @@ import org.elasticsearch.core.Releasable; import org.elasticsearch.core.Releasables; import org.elasticsearch.core.TimeValue; +import org.elasticsearch.rest.RestStatus; import org.elasticsearch.search.SearchShardTarget; import org.elasticsearch.search.aggregations.AggregationReduceContext; import org.elasticsearch.search.aggregations.InternalAggregations; @@ -344,6 +345,9 @@ private AsyncSearchResponse getResponse(boolean restoreResponseHeaders) { assert mutableSearchResponse != null; checkCancellation(); AsyncSearchResponse asyncSearchResponse; + if (mutableSearchResponse.tryIncRef() == false) { + throw new ElasticsearchStatusException("async-search result, no longer available", RestStatus.GONE); + } try { asyncSearchResponse = mutableSearchResponse.toAsyncSearchResponse(this, expirationTimeMillis, restoreResponseHeaders); } catch (Exception e) { @@ -353,6 +357,8 @@ private AsyncSearchResponse getResponse(boolean restoreResponseHeaders) { e ); asyncSearchResponse = mutableSearchResponse.toAsyncSearchResponse(this, expirationTimeMillis, exception); + } finally { + mutableSearchResponse.decRef(); } return asyncSearchResponse; } @@ -381,7 +387,7 @@ public static AsyncStatusResponse getStatusResponse(AsyncSearchTask asyncTask) { @Override public void close() { - Releasables.close(searchResponse); + searchResponse.decRef(); } class Listener extends SearchProgressActionListener { diff --git a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/MutableSearchResponse.java b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/MutableSearchResponse.java index 11ff403237888..3f0bc28997946 100644 --- a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/MutableSearchResponse.java +++ b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/MutableSearchResponse.java @@ -17,6 +17,7 @@ import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.util.concurrent.AtomicArray; import org.elasticsearch.common.util.concurrent.ThreadContext; +import org.elasticsearch.core.AbstractRefCounted; import org.elasticsearch.core.Releasable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.search.SearchHits; @@ -38,7 +39,7 @@ * creating an async response concurrently. This limits the number of final reduction that can * run concurrently to 1 and ensures that we pause the search progress when an {@link AsyncSearchResponse} is built. */ -class MutableSearchResponse implements Releasable { +class MutableSearchResponse extends AbstractRefCounted { private int totalShards; private int skippedShards; private Clusters clusters; @@ -85,6 +86,7 @@ class MutableSearchResponse implements Releasable { * @param threadContext The thread context to retrieve the final response headers. */ MutableSearchResponse(ThreadContext threadContext) { + super(); this.isPartial = true; this.threadContext = threadContext; this.totalHits = Lucene.TOTAL_HITS_GREATER_OR_EQUAL_TO_ZERO; @@ -487,14 +489,17 @@ private String getShardsInResponseMismatchInfo(SearchResponse response, boolean } @Override - public synchronized void close() { + protected synchronized void closeInternal() { if (finalResponse != null) { finalResponse.decRef(); + finalResponse = null; } if (clusterResponses != null) { for (SearchResponse clusterResponse : clusterResponses) { clusterResponse.decRef(); } + clusterResponses.clear(); + clusterResponses = null; } } } From 025e9e32dbe509f20cb79d7e471a5905952b93ca Mon Sep 17 00:00:00 2001 From: Dimitris Rempapis Date: Tue, 9 Sep 2025 13:52:25 +0300 Subject: [PATCH 02/32] Update docs/changelog/134359.yaml --- docs/changelog/134359.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 docs/changelog/134359.yaml diff --git a/docs/changelog/134359.yaml b/docs/changelog/134359.yaml new file mode 100644 index 0000000000000..89cdcb63704d2 --- /dev/null +++ b/docs/changelog/134359.yaml @@ -0,0 +1,6 @@ +pr: 134359 +summary: Make `MutableSearchResponse` ref-counted to prevent use-after-close in async + search +area: Search +type: bug +issues: [] From 47f554e70e11b123aa96c72bc988c27c06d8a9ec Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 9 Sep 2025 10:59:01 +0000 Subject: [PATCH 03/32] [CI] Auto commit changes from spotless --- .../MutableSearchResponseRefCountingTests.java | 13 +++++++------ .../elasticsearch/xpack/search/AsyncSearchTask.java | 1 - .../xpack/search/MutableSearchResponse.java | 1 - 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/MutableSearchResponseRefCountingTests.java b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/MutableSearchResponseRefCountingTests.java index ae3d0632606c0..89ac4d41af51b 100644 --- a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/MutableSearchResponseRefCountingTests.java +++ b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/MutableSearchResponseRefCountingTests.java @@ -63,15 +63,17 @@ public void testBuildSucceedsIfAnotherThreadHoldsRef() { searchResponse.decRef(); // sr refCount -> 1 - //Simulate another thread : take a resource (msr refCount -> 2) + // Simulate another thread : take a resource (msr refCount -> 2) msr.incRef(); // close resource (msr refCount -> 1) -> closeInternal not called yet msr.decRef(); // Build a response - AsyncSearchResponse resp = msr.toAsyncSearchResponse(createAsyncSearchTask(), + AsyncSearchResponse resp = msr.toAsyncSearchResponse( + createAsyncSearchTask(), System.currentTimeMillis() + 60_000, /*restoreResponseHeaders*/ - false); + false + ); try { assertNotNull("Expect SearchResponse when a live ref prevents close", resp.getSearchResponse()); assertNull("No failure expected while ref is held", resp.getFailure()); @@ -84,8 +86,7 @@ public void testBuildSucceedsIfAnotherThreadHoldsRef() { msr.decRef(); } - - public void testGetResponseAfterCloseReturnsGone () throws Exception { + public void testGetResponseAfterCloseReturnsGone() throws Exception { final int totalShards = 1; final int skippedShards = 0; @@ -112,7 +113,7 @@ public void testGetResponseAfterCloseReturnsGone () throws Exception { // Invoke getResponseWithHeaders and expect GONE exception InvocationTargetException ite = expectThrows(InvocationTargetException.class, () -> { AsyncSearchResponse resp = (AsyncSearchResponse) m.invoke(task); - if (resp != null) { + if (resp != null) { resp.decRef(); } }); diff --git a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java index abc83416bbce4..42935837b7c99 100644 --- a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java +++ b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java @@ -24,7 +24,6 @@ import org.elasticsearch.action.search.TransportSearchAction; import org.elasticsearch.client.internal.Client; import org.elasticsearch.core.Releasable; -import org.elasticsearch.core.Releasables; import org.elasticsearch.core.TimeValue; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.search.SearchShardTarget; diff --git a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/MutableSearchResponse.java b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/MutableSearchResponse.java index 3f0bc28997946..7545d82f6b32e 100644 --- a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/MutableSearchResponse.java +++ b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/MutableSearchResponse.java @@ -18,7 +18,6 @@ import org.elasticsearch.common.util.concurrent.AtomicArray; import org.elasticsearch.common.util.concurrent.ThreadContext; import org.elasticsearch.core.AbstractRefCounted; -import org.elasticsearch.core.Releasable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.search.SearchHits; import org.elasticsearch.search.aggregations.InternalAggregations; From 4a2cdc0903b7f7864cd248cf401fd3d19cc9b87a Mon Sep 17 00:00:00 2001 From: Dimitris Rempapis Date: Tue, 9 Sep 2025 14:12:40 +0300 Subject: [PATCH 04/32] apply spot --- .../MutableSearchResponseRefCountingTests.java | 13 +++++++------ .../elasticsearch/xpack/search/AsyncSearchTask.java | 1 - .../xpack/search/MutableSearchResponse.java | 1 - 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/MutableSearchResponseRefCountingTests.java b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/MutableSearchResponseRefCountingTests.java index ae3d0632606c0..89ac4d41af51b 100644 --- a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/MutableSearchResponseRefCountingTests.java +++ b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/MutableSearchResponseRefCountingTests.java @@ -63,15 +63,17 @@ public void testBuildSucceedsIfAnotherThreadHoldsRef() { searchResponse.decRef(); // sr refCount -> 1 - //Simulate another thread : take a resource (msr refCount -> 2) + // Simulate another thread : take a resource (msr refCount -> 2) msr.incRef(); // close resource (msr refCount -> 1) -> closeInternal not called yet msr.decRef(); // Build a response - AsyncSearchResponse resp = msr.toAsyncSearchResponse(createAsyncSearchTask(), + AsyncSearchResponse resp = msr.toAsyncSearchResponse( + createAsyncSearchTask(), System.currentTimeMillis() + 60_000, /*restoreResponseHeaders*/ - false); + false + ); try { assertNotNull("Expect SearchResponse when a live ref prevents close", resp.getSearchResponse()); assertNull("No failure expected while ref is held", resp.getFailure()); @@ -84,8 +86,7 @@ public void testBuildSucceedsIfAnotherThreadHoldsRef() { msr.decRef(); } - - public void testGetResponseAfterCloseReturnsGone () throws Exception { + public void testGetResponseAfterCloseReturnsGone() throws Exception { final int totalShards = 1; final int skippedShards = 0; @@ -112,7 +113,7 @@ public void testGetResponseAfterCloseReturnsGone () throws Exception { // Invoke getResponseWithHeaders and expect GONE exception InvocationTargetException ite = expectThrows(InvocationTargetException.class, () -> { AsyncSearchResponse resp = (AsyncSearchResponse) m.invoke(task); - if (resp != null) { + if (resp != null) { resp.decRef(); } }); diff --git a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java index abc83416bbce4..42935837b7c99 100644 --- a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java +++ b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java @@ -24,7 +24,6 @@ import org.elasticsearch.action.search.TransportSearchAction; import org.elasticsearch.client.internal.Client; import org.elasticsearch.core.Releasable; -import org.elasticsearch.core.Releasables; import org.elasticsearch.core.TimeValue; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.search.SearchShardTarget; diff --git a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/MutableSearchResponse.java b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/MutableSearchResponse.java index 3f0bc28997946..7545d82f6b32e 100644 --- a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/MutableSearchResponse.java +++ b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/MutableSearchResponse.java @@ -18,7 +18,6 @@ import org.elasticsearch.common.util.concurrent.AtomicArray; import org.elasticsearch.common.util.concurrent.ThreadContext; import org.elasticsearch.core.AbstractRefCounted; -import org.elasticsearch.core.Releasable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.search.SearchHits; import org.elasticsearch.search.aggregations.InternalAggregations; From 0c7e61d352788404409687532330aaf5e7d894be Mon Sep 17 00:00:00 2001 From: Dimitris Rempapis Date: Tue, 14 Oct 2025 11:04:29 +0300 Subject: [PATCH 05/32] update code --- .../xpack/search/AsyncSearchTask.java | 71 +++++++++++++++++-- 1 file changed, 65 insertions(+), 6 deletions(-) diff --git a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java index 42935837b7c99..5906b375efac6 100644 --- a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java +++ b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java @@ -22,6 +22,7 @@ import org.elasticsearch.action.search.SearchTask; import org.elasticsearch.action.search.ShardSearchFailure; import org.elasticsearch.action.search.TransportSearchAction; +import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.client.internal.Client; import org.elasticsearch.core.Releasable; import org.elasticsearch.core.TimeValue; @@ -36,6 +37,7 @@ import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.xpack.core.async.AsyncExecutionId; import org.elasticsearch.xpack.core.async.AsyncTask; +import org.elasticsearch.xpack.core.async.AsyncTaskIndexService; import org.elasticsearch.xpack.core.search.action.AsyncSearchResponse; import org.elasticsearch.xpack.core.search.action.AsyncStatusResponse; @@ -74,6 +76,10 @@ final class AsyncSearchTask extends SearchTask implements AsyncTask, Releasable private final MutableSearchResponse searchResponse; + private final AsyncTaskIndexService store; + + private static final TimeValue STORE_FETCH_TIMEOUT = TimeValue.timeValueSeconds(30); + /** * Creates an instance of {@link AsyncSearchTask}. * @@ -100,7 +106,8 @@ final class AsyncSearchTask extends SearchTask implements AsyncTask, Releasable AsyncExecutionId searchId, Client client, ThreadPool threadPool, - Function, Supplier> aggReduceContextSupplierFactory + Function, Supplier> aggReduceContextSupplierFactory, + AsyncTaskIndexService store ) { super(id, type, action, () -> "async_search{" + descriptionSupplier.get() + "}", parentTaskId, taskHeaders); this.expirationTimeMillis = getStartTime() + keepAlive.getMillis(); @@ -112,6 +119,7 @@ final class AsyncSearchTask extends SearchTask implements AsyncTask, Releasable this.progressListener = new Listener(); setProgressListener(progressListener); searchResponse = new MutableSearchResponse(threadPool.getThreadContext()); + this.store = store; } /** @@ -203,7 +211,12 @@ public boolean addCompletionListener(ActionListener listene } } if (executeImmediately) { - ActionListener.respondAndRelease(listener, getResponseWithHeaders()); + try { + AsyncSearchResponse resp = getResponseWithHeaders(); + ActionListener.respondAndRelease(listener, resp); + } catch (Exception e) { + listener.onFailure(e); + } } return true; // unused } @@ -246,7 +259,13 @@ private void internalAddCompletionListener(ActionListener l if (hasRun.compareAndSet(false, true)) { // timeout occurred before completion removeCompletionListener(id); - ActionListener.respondAndRelease(listener, getResponseWithHeaders()); + + try { + AsyncSearchResponse resp = getResponseWithHeaders(); + ActionListener.respondAndRelease(listener, resp); + } catch (Exception e) { + listener.onFailure(e); + } } }, waitForCompletion, threadPool.generic()); } catch (Exception exc) { @@ -263,7 +282,12 @@ private void internalAddCompletionListener(ActionListener l } } if (executeImmediately) { - ActionListener.respondAndRelease(listener, getResponseWithHeaders()); + try { + AsyncSearchResponse resp = getResponseWithHeaders(); + ActionListener.respondAndRelease(listener, resp); + } catch (Exception e) { + listener.onFailure(e); + } } } @@ -314,7 +338,24 @@ private void executeCompletionListeners() { } // we don't need to restore the response headers, they should be included in the current // context since we are called by the search action listener. - AsyncSearchResponse finalResponse = getResponse(); + AsyncSearchResponse finalResponse; + + try { + // do NOT restore headers here; we’re on the search action thread already + finalResponse = getResponse(); // must NOT propagate + } catch (Exception e) { + ElasticsearchException ex = (e instanceof ElasticsearchException) + ? (ElasticsearchException) e + : ExceptionsHelper.convertToElastic(e); + + finalResponse = AsyncSearchResponse.failure( + searchId.getEncoded(), + getStartTime(), + expirationTimeMillis, + ex + ); + } + try { for (Consumer consumer : completionsListenersCopy.values()) { consumer.accept(finalResponse); @@ -344,9 +385,27 @@ private AsyncSearchResponse getResponse(boolean restoreResponseHeaders) { assert mutableSearchResponse != null; checkCancellation(); AsyncSearchResponse asyncSearchResponse; + + // Fallback: in-memory state was released → read from .async-search if (mutableSearchResponse.tryIncRef() == false) { - throw new ElasticsearchStatusException("async-search result, no longer available", RestStatus.GONE); + PlainActionFuture future = new PlainActionFuture<>(); + try { + store.getResponse(searchId, restoreResponseHeaders, future); + } catch (Exception e) { + throw ExceptionsHelper.convertToElastic(e); + } + + try { + return future.actionGet(STORE_FETCH_TIMEOUT); + } catch (Exception e) { + RestStatus st = ExceptionsHelper.status(ExceptionsHelper.unwrapCause(e)); + if (st == RestStatus.NOT_FOUND || st == RestStatus.GONE) { + throw new ElasticsearchStatusException("async-search result no longer available", RestStatus.GONE, e); + } + throw ExceptionsHelper.convertToElastic(e); + } } + try { asyncSearchResponse = mutableSearchResponse.toAsyncSearchResponse(this, expirationTimeMillis, restoreResponseHeaders); } catch (Exception e) { From b02ce526df1d43ef1dbf849c53d47af5543f7835 Mon Sep 17 00:00:00 2001 From: Dimitris Rempapis Date: Tue, 14 Oct 2025 11:04:42 +0300 Subject: [PATCH 06/32] update code --- .../org/elasticsearch/xpack/search/MutableSearchResponse.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/MutableSearchResponse.java b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/MutableSearchResponse.java index 7545d82f6b32e..ae8420d2f8d7d 100644 --- a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/MutableSearchResponse.java +++ b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/MutableSearchResponse.java @@ -233,7 +233,7 @@ private SearchResponse buildResponse(long taskStartTimeNanos, InternalAggregatio * This method is synchronized to ensure that we don't perform final reduces concurrently. * This method also restores the response headers in the current thread context when requested, if the final response is available. */ - synchronized AsyncSearchResponse toAsyncSearchResponse(AsyncSearchTask task, long expirationTime, boolean restoreResponseHeaders) { + synchronized AsyncSearchResponse toAsyncSearchResponse(AsyncSearchTask task, long expirationTime, boolean restoreResponseHeaders) { if (restoreResponseHeaders && responseHeaders != null) { restoreResponseHeadersContext(threadContext, responseHeaders); } @@ -243,6 +243,7 @@ synchronized AsyncSearchResponse toAsyncSearchResponse(AsyncSearchTask task, lon // We have a final response, use it. searchResponse = finalResponse; searchResponse.mustIncRef(); + //System.out.println("Thread:" + Thread.currentThread().getName() + " finalResponse=" + finalResponse); } else if (clusters == null) { // An error occurred before we got the shard list searchResponse = null; From 987434ed96f412d99130201470a1604e91f50f2d Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 14 Oct 2025 08:10:21 +0000 Subject: [PATCH 07/32] [CI] Auto commit changes from spotless --- .../org/elasticsearch/xpack/search/MutableSearchResponse.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/MutableSearchResponse.java b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/MutableSearchResponse.java index b9ab0eef57442..4d8577bf676f0 100644 --- a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/MutableSearchResponse.java +++ b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/MutableSearchResponse.java @@ -6,7 +6,6 @@ */ package org.elasticsearch.xpack.search; -import org.apache.logging.log4j.Logger; import org.apache.lucene.search.TotalHits; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.ExceptionsHelper; @@ -15,7 +14,6 @@ import org.elasticsearch.action.search.SearchResponseMerger; import org.elasticsearch.action.search.ShardSearchFailure; import org.elasticsearch.common.Strings; -import org.elasticsearch.common.logging.Loggers; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.util.concurrent.AtomicArray; import org.elasticsearch.common.util.concurrent.ThreadContext; From 91eac660f08c9ba9b0636fe1faac0063d6f2bca4 Mon Sep 17 00:00:00 2001 From: Dimitris Rempapis Date: Tue, 14 Oct 2025 11:54:43 +0300 Subject: [PATCH 08/32] update AsyncSearchTask to fallback from the store if entry available --- .../xpack/search/AsyncSearchGoneIT.java | 270 ++++++++++++++++++ .../xpack/search/AsyncSearchRefCountIT.java | 247 ++++++++++++++++ ...MutableSearchResponseRefCountingTests.java | 108 ++++++- .../xpack/search/AsyncSearchTask.java | 18 +- .../xpack/search/MutableSearchResponse.java | 16 +- .../TransportSubmitAsyncSearchAction.java | 3 +- .../xpack/search/AsyncSearchTaskTests.java | 9 +- 7 files changed, 646 insertions(+), 25 deletions(-) create mode 100644 x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchGoneIT.java create mode 100644 x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchRefCountIT.java diff --git a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchGoneIT.java b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchGoneIT.java new file mode 100644 index 0000000000000..ffd851f7e097b --- /dev/null +++ b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchGoneIT.java @@ -0,0 +1,270 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.search; + +import com.carrotsearch.randomizedtesting.annotations.Repeat; + +import org.elasticsearch.ElasticsearchStatusException; +import org.elasticsearch.ExceptionsHelper; +import org.elasticsearch.action.index.IndexRequestBuilder; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.rest.RestStatus; +import org.elasticsearch.search.aggregations.AggregationBuilders; +import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.elasticsearch.test.ESIntegTestCase.SuiteScopeTestCase; +import org.elasticsearch.xpack.core.search.action.AsyncSearchResponse; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.CancellationException; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; +import java.util.concurrent.atomic.LongAdder; + +@SuiteScopeTestCase +public class AsyncSearchGoneIT extends AsyncSearchIntegTestCase { + private static String indexName; + private static int numShards; + + private static int numKeywords; + private static Map keywordFreqs; + private static float maxMetric = Float.NEGATIVE_INFINITY; + private static float minMetric = Float.POSITIVE_INFINITY; + + @Override + public void setupSuiteScopeCluster() throws InterruptedException { + indexName = "test-async"; + numShards = randomIntBetween(1, 20); + int numDocs = randomIntBetween(100, 1000); + createIndex(indexName, Settings.builder().put("index.number_of_shards", numShards).build()); + numKeywords = randomIntBetween(50, 100); + keywordFreqs = new HashMap<>(); + Set keywordSet = new HashSet<>(); + for (int i = 0; i < numKeywords; i++) { + keywordSet.add(randomAlphaOfLengthBetween(10, 20)); + } + numKeywords = keywordSet.size(); + String[] keywords = keywordSet.toArray(String[]::new); + List reqs = new ArrayList<>(); + for (int i = 0; i < numDocs; i++) { + float metric = randomFloat(); + maxMetric = Math.max(metric, maxMetric); + minMetric = Math.min(metric, minMetric); + String keyword = keywords[randomIntBetween(0, numKeywords - 1)]; + keywordFreqs.compute(keyword, (k, v) -> { + if (v == null) { + return new AtomicInteger(1); + } + v.incrementAndGet(); + return v; + }); + reqs.add(prepareIndex(indexName).setSource("terms", keyword, "metric", metric)); + } + indexRandom(true, true, reqs); + } + + @Repeat(iterations = 10000) + public void testConcurrentStatusFetchWhileTaskCloses() throws Exception { + CountDownLatch beforeClose = new CountDownLatch(1); + + final SearchSourceBuilder source = new SearchSourceBuilder() + .aggregation(AggregationBuilders.terms("terms").field("terms.keyword").size(Math.max(1, numKeywords))); + + final int progressStep = (numShards > 2) ? randomIntBetween(2, numShards) : 2; + try (SearchResponseIterator it = assertBlockingIterator(indexName, numShards, source, 0, progressStep)) { + setBeforeCloseLatch(beforeClose); + + String id = getAsyncId(it); + + PollStats stats = new PollStats(); + StartableThreadGroup pollers = startGetStatusThreadsHot(id, 32, stats); + pollers.startHotThreads.countDown(); // release workers + + //Thread.sleep(100); + + // Finish consumption on a separate thread and capture errors + var finisherExec = Executors.newSingleThreadExecutor(); + var finisherError = new AtomicReference(); + Future finisher = finisherExec.submit(() -> { + try { + consumeAllResponses(it, "terms"); + } catch (Throwable t) { + finisherError.set(t); + } + }); + + // Align with the exact completion edge + beforeClose.await(); + + // Keep pollers hot long enough to land inside the small GONE window + pollers.stopAndAwait(TimeValue.timeValueSeconds(3)); + + // Join finisher & surface errors + try { + finisher.get(); + } catch (Exception ignored) { + } finally { finisherExec.shutdown(); } + + if (finisherError.get() != null) { + fail("consumeAllResponses failed: " + finisherError.get()); + } + + printPollerStats(stats); + assertGoneSeen(stats); + } + } + + private void printPollerStats(PollStats stats) { + System.out.println("Total:" + stats.totalCalls.sum()); + System.out.println("Completed:" + stats.completedResponses.sum()); + System.out.println("Running:" + stats.runningResponses.sum()); + System.out.println("Exceptions:" + stats.exceptions.sum()); + System.out.println("Gone:" + stats.gone410.sum()); + } + + private void assertGoneSeen(PollStats stats) { + assertFalse("Expected at least one GONE (410) from concurrent STATUS during task close; totals=" + stats.totalCalls.sum(), + stats.gone410.sum() > 0L); + } + + private String getAsyncId(SearchResponseIterator it) { + AsyncSearchResponse response = it.next(); + try { + assertNotNull(response.getId()); + return response.getId(); + } finally { + response.decRef(); + } + } + + private void consumeAllResponses(SearchResponseIterator it, String aggName) throws Exception { + while (it.hasNext()) { + AsyncSearchResponse response = it.next(); + try { + if (response.getSearchResponse() != null && response.getSearchResponse().getAggregations() != null) { + assertNotNull(response.getSearchResponse().getAggregations().get(aggName)); + } + } finally { + response.decRef(); + } + } + } + + private StartableThreadGroup startGetStatusThreadsHot(String id, int threads, PollStats stats) { + final ExecutorService exec = Executors.newFixedThreadPool(threads); + final List> futures = new ArrayList<>(threads); + final AtomicBoolean running = new AtomicBoolean(true); + final CountDownLatch start = new CountDownLatch(1); + + for (int i = 0; i < threads; i++) { + futures.add(exec.submit(() -> { + start.await(); + while (running.get()) { + AsyncSearchResponse resp = null; + try { + resp = getAsyncSearch(id); + stats.totalCalls.increment(); + + if (resp.isRunning()) { + stats.runningResponses.increment(); + } else { + stats.completedResponses.increment(); + } + } catch (Exception e) { + Throwable cause = ExceptionsHelper.unwrapCause(e); + if (cause instanceof ElasticsearchStatusException) { + RestStatus st = ExceptionsHelper.status(cause); + if (st == RestStatus.GONE) { + stats.gone410.increment(); + continue; + } + if (st == RestStatus.NOT_FOUND) { + continue; + } + } + continue; + } finally { + if (resp != null) { + resp.decRef(); + } + } + } + return null; + })); + } + return new StartableThreadGroup(exec, futures, running, start); + } + + // (Result pollers unchanged/not used for GONE case) + + static final class PollStats { + final LongAdder totalCalls = new LongAdder(); + final LongAdder runningResponses = new LongAdder(); + final LongAdder completedResponses = new LongAdder(); + final LongAdder hadSearchResponse = new LongAdder(); // not used by STATUS + final LongAdder exceptions = new LongAdder(); + final LongAdder gone410 = new LongAdder(); + } + + private final class StartableThreadGroup extends ThreadGroup { + CountDownLatch startHotThreads; + StartableThreadGroup(ExecutorService exec, List> futures, AtomicBoolean running, CountDownLatch startHotThreads) { + super(exec, futures, running); + this.startHotThreads = startHotThreads; + } + } + + class ThreadGroup { + private final ExecutorService exec; + private final List> futures; + private final AtomicBoolean running; + + private ThreadGroup(ExecutorService exec, List> futures, AtomicBoolean running) { + this.exec = exec; + this.futures = futures; + this.running = running; + } + + void stopAndAwait(TimeValue timeout) throws InterruptedException { + running.set(false); + exec.shutdown(); + if (exec.awaitTermination(timeout.millis(), TimeUnit.MILLISECONDS) == false) { + exec.shutdownNow(); + exec.awaitTermination(timeout.millis(), TimeUnit.MILLISECONDS); + } + } + + List getFailures() { + List failures = new ArrayList<>(); + for (Future f : futures) { + try { + f.get(); + } catch (CancellationException ignored) { + } catch (ExecutionException ee) { + failures.add(ExceptionsHelper.unwrapCause(ee.getCause())); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + if (failures.isEmpty()) failures.add(ie); + } + } + return failures; + } + } +} diff --git a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchRefCountIT.java b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchRefCountIT.java new file mode 100644 index 0000000000000..cd23900240743 --- /dev/null +++ b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchRefCountIT.java @@ -0,0 +1,247 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.search; + +import com.carrotsearch.randomizedtesting.annotations.Repeat; + +import org.apache.lucene.store.AlreadyClosedException; +import org.elasticsearch.ExceptionsHelper; +import org.elasticsearch.ResourceNotFoundException; +import org.elasticsearch.TransportVersion; +import org.elasticsearch.action.index.IndexRequestBuilder; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.index.query.MatchAllQueryBuilder; +import org.elasticsearch.indices.SystemIndices; +import org.elasticsearch.rest.RestStatus; +import org.elasticsearch.search.DummyQueryBuilder; +import org.elasticsearch.search.SearchService; +import org.elasticsearch.search.aggregations.AggregationBuilders; +import org.elasticsearch.search.aggregations.bucket.terms.InternalTerms; +import org.elasticsearch.search.aggregations.bucket.terms.StringTerms; +import org.elasticsearch.search.aggregations.metrics.Max; +import org.elasticsearch.search.aggregations.metrics.Min; +import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.elasticsearch.search.query.ThrowingQueryBuilder; +import org.elasticsearch.test.ESIntegTestCase.SuiteScopeTestCase; +import org.elasticsearch.test.TransportVersionUtils; +import org.elasticsearch.test.junit.annotations.TestLogging; +import org.elasticsearch.xpack.core.XPackPlugin; +import org.elasticsearch.xpack.core.search.action.AsyncSearchResponse; +import org.elasticsearch.xpack.core.search.action.AsyncStatusResponse; +import org.elasticsearch.xpack.core.search.action.SubmitAsyncSearchAction; +import org.elasticsearch.xpack.core.search.action.SubmitAsyncSearchRequest; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.CancellationException; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.elasticsearch.search.SearchService.MAX_ASYNC_SEARCH_RESPONSE_SIZE_SETTING; +import static org.elasticsearch.search.aggregations.AggregationBuilders.terms; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.lessThan; +import static org.hamcrest.Matchers.lessThanOrEqualTo; + +@TestLogging( + reason = "testing debug log output to identify race condition", + value = "org.elasticsearch.xpack.search.MutableSearchResponse:DEBUG,org.elasticsearch.xpack.search.AsyncSearchTask:DEBUG" +) +@SuiteScopeTestCase +public class AsyncSearchRefCountIT extends AsyncSearchIntegTestCase { + private static String indexName; + private static int numShards; + + private static int numKeywords; + private static Map keywordFreqs; + private static float maxMetric = Float.NEGATIVE_INFINITY; + private static float minMetric = Float.POSITIVE_INFINITY; + + @Override + public void setupSuiteScopeCluster() throws InterruptedException { + indexName = "test-async"; + numShards = randomIntBetween(1, 20); + int numDocs = randomIntBetween(100, 1000); + createIndex(indexName, Settings.builder().put("index.number_of_shards", numShards).build()); + numKeywords = randomIntBetween(50, 100); + keywordFreqs = new HashMap<>(); + Set keywordSet = new HashSet<>(); + for (int i = 0; i < numKeywords; i++) { + keywordSet.add(randomAlphaOfLengthBetween(10, 20)); + } + numKeywords = keywordSet.size(); + String[] keywords = keywordSet.toArray(String[]::new); + List reqs = new ArrayList<>(); + for (int i = 0; i < numDocs; i++) { + float metric = randomFloat(); + maxMetric = Math.max(metric, maxMetric); + minMetric = Math.min(metric, minMetric); + String keyword = keywords[randomIntBetween(0, numKeywords - 1)]; + keywordFreqs.compute(keyword, (k, v) -> { + if (v == null) { + return new AtomicInteger(1); + } + v.incrementAndGet(); + return v; + }); + reqs.add(prepareIndex(indexName).setSource("terms", keyword, "metric", metric)); + } + indexRandom(true, true, reqs); + } + + /** + * Category 1, point 2 : We receive the final underlying search response, then the AsyncSearchTask#close() + * is invoked when we finish consuming the iterator. + * Category 2, point 1 : We execute repeatedly GET _async_search/{id} + */ + //@Repeat(iterations = 1000) + public void testConcurrentStatusFetchWhileTaskCloses() throws Exception { + final TimeValue WAIT = TimeValue.timeValueMillis(100); + + final SearchSourceBuilder source = new SearchSourceBuilder() + .aggregation(AggregationBuilders.terms("terms").field("terms.keyword").size(Math.max(1, numKeywords))); + + final int progressStep = (numShards > 2) ? randomIntBetween(2, numShards) : 2; + try (SearchResponseIterator it = assertBlockingIterator(indexName, numShards, source, 0, progressStep)) { + String id = getAsyncId(it); + ThreadGroup getThreads = startGetResultThreads(id, 10); + + Thread.sleep(WAIT.millis()); + consumeAllResponses(it, "terms"); + Thread.sleep(WAIT.millis()); + + getThreads.stopAndAwait(WAIT); + cleanupAsyncSearch(id); + + assertError(getThreads.getFailures()); + } + } + + private void assertError( List fails ) { + if (fails.isEmpty() == false) { + for (Throwable failure : fails) { + final String msg = String.valueOf(failure.getMessage()); + if (failure instanceof AssertionError && msg.contains("already closed, can't increment ref count")) { + fail("Race detected in poller thread:\n" + ExceptionsHelper.stackTrace(failure)); + } else { + fail("Unexpected exception in poller thread:\n" + ExceptionsHelper.stackTrace(failure)); + } + } + } + } + + private String getAsyncId(SearchResponseIterator it) { + AsyncSearchResponse response = it.next(); + try { + assertNotNull(response.getId()); + return response.getId(); + } finally { + response.decRef(); + } + } + + private void consumeAllResponses(SearchResponseIterator it, String aggName) throws Exception { + while (it.hasNext()) { + AsyncSearchResponse response = it.next(); + try { + if (response.getSearchResponse() != null && response.getSearchResponse().getAggregations() != null) { + assertNotNull(response.getSearchResponse().getAggregations().get(aggName)); + } + System.out.println("Async response: " + response.getSearchResponse().toString()); + } finally { + response.decRef(); + } + } + } + + private void cleanupAsyncSearch(String id) throws Exception { + ensureTaskCompletion(id); + deleteAsyncSearch(id); + ensureTaskRemoval(id); + } + + private ThreadGroup startGetResultThreads(String id, int threads) { + final ExecutorService exec = Executors.newFixedThreadPool(threads); + final List> futures = new ArrayList<>(threads); + final AtomicBoolean running = new AtomicBoolean(true); + + for (int i = 0; i < threads; i++) { + futures.add(exec.submit(() -> { + while (running.get()) { + AsyncSearchResponse resp = null; + try { + resp = getAsyncSearch(id); + System.out.println("Thread" + Thread.currentThread().getName() + " isRunning: " + resp.isRunning()); + if (resp.isRunning()) { } + } catch (Exception e) { + System.out.println(e.getMessage()); + throw new IllegalArgumentException(); + } finally { + if (resp != null) { + resp.decRef(); + } + } + } + })); + } + return new ThreadGroup(exec, futures, running); + } + + + private final class ThreadGroup { + + private final ExecutorService exec; + private final List> futures; + private final AtomicBoolean running; + + private ThreadGroup(ExecutorService exec, List> futures, AtomicBoolean running) { + this.exec = exec; + this.futures = futures; + this.running = running; + } + + void stopAndAwait(TimeValue timeoutMillis) throws InterruptedException { + running.set(false); + exec.shutdown(); + if (exec.awaitTermination(timeoutMillis.millis(), java.util.concurrent.TimeUnit.MILLISECONDS) == false) { + exec.shutdownNow(); + exec.awaitTermination(timeoutMillis.millis(), java.util.concurrent.TimeUnit.MILLISECONDS); + } + } + + List getFailures() { + List failures = new ArrayList<>(); + for (Future f : futures) { + try { + f.get(); + } catch (CancellationException ignored) { + } catch (ExecutionException ee) { + failures.add(ee.getCause()); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + if (failures.isEmpty()) failures.add(ie); + } + } + return failures; + } + } +} diff --git a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/MutableSearchResponseRefCountingTests.java b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/MutableSearchResponseRefCountingTests.java index 89ac4d41af51b..daa62cada0f2b 100644 --- a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/MutableSearchResponseRefCountingTests.java +++ b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/MutableSearchResponseRefCountingTests.java @@ -8,19 +8,26 @@ import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.ExceptionsHelper; +import org.elasticsearch.action.DocWriteResponse; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.search.ShardSearchFailure; +import org.elasticsearch.action.support.PlainActionFuture; +import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.lucene.Lucene; +import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.core.SuppressForbidden; import org.elasticsearch.core.TimeValue; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.search.SearchHits; import org.elasticsearch.search.aggregations.AggregationReduceContext; import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; import org.elasticsearch.tasks.TaskId; -import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.ESSingleNodeTestCase; import org.elasticsearch.test.client.NoOpClient; import org.elasticsearch.threadpool.TestThreadPool; +import org.elasticsearch.transport.TransportService; import org.elasticsearch.xpack.core.async.AsyncExecutionId; +import org.elasticsearch.xpack.core.async.AsyncTaskIndexService; import org.elasticsearch.xpack.core.search.action.AsyncSearchResponse; import org.junit.After; import org.junit.Before; @@ -33,7 +40,10 @@ import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.is; -public class MutableSearchResponseRefCountingTests extends ESTestCase { +public class MutableSearchResponseRefCountingTests extends ESSingleNodeTestCase { + + private AsyncTaskIndexService store; + public String index = ".async-search"; private TestThreadPool threadPool; private NoOpClient client; @@ -42,10 +52,24 @@ public class MutableSearchResponseRefCountingTests extends ESTestCase { public void setup() { this.threadPool = new TestThreadPool(getTestName()); this.client = new NoOpClient(threadPool); + + ClusterService clusterService = getInstanceFromNode(ClusterService.class); + TransportService transportService = getInstanceFromNode(TransportService.class); + BigArrays bigArrays = getInstanceFromNode(BigArrays.class); + this.store = new AsyncTaskIndexService<>( + index, + clusterService, + transportService.getThreadPool().getThreadContext(), + client(), + "test_origin", + AsyncSearchResponse::new, + writableRegistry(), + bigArrays + ); } @After - public void cleanup() throws Exception { + public void cleanup() { terminate(threadPool); } @@ -69,11 +93,7 @@ public void testBuildSucceedsIfAnotherThreadHoldsRef() { msr.decRef(); // Build a response - AsyncSearchResponse resp = msr.toAsyncSearchResponse( - createAsyncSearchTask(), - System.currentTimeMillis() + 60_000, /*restoreResponseHeaders*/ - false - ); + AsyncSearchResponse resp = msr.toAsyncSearchResponse(createAsyncSearchTask(), System.currentTimeMillis() + 60_000, false); try { assertNotNull("Expect SearchResponse when a live ref prevents close", resp.getSearchResponse()); assertNull("No failure expected while ref is held", resp.getFailure()); @@ -86,12 +106,69 @@ public void testBuildSucceedsIfAnotherThreadHoldsRef() { msr.decRef(); } - public void testGetResponseAfterCloseReturnsGone() throws Exception { + @SuppressForbidden(reason = "access violation required in order to read private field for this test") + public void testFallbackToStoreWhenInMemoryResponseReleased() throws Exception { final int totalShards = 1; final int skippedShards = 0; - // Build a SearchResponse (sr refCount -> 1) - SearchResponse searchResponse = createSearchResponse(totalShards, totalShards, skippedShards); + // Create an AsyncSearchTask + AsyncSearchTask task = createAsyncSearchTask(); + + // Get response instance and method from task + Field f = AsyncSearchTask.class.getDeclaredField("searchResponse"); + f.setAccessible(true); + Method m = AsyncSearchTask.class.getDeclaredMethod("getResponseWithHeaders"); + m.setAccessible(true); + + // Build a SearchResponse (ssr refCount -> 1) to be stored in the index + SearchResponse storedSearchResponse = createSearchResponse(totalShards, totalShards, skippedShards); + + // Take a ref - (msr refCount -> 1, ssr refCount -> 2) + MutableSearchResponse msr = (MutableSearchResponse) f.get(task); + msr.updateShardsAndClusters(totalShards, skippedShards, null); + msr.updateFinalResponse(storedSearchResponse, false); + + // Build the AsyncSearchResponse to persist + AsyncSearchResponse asyncSearchResponse = null; + try { + long now = System.currentTimeMillis(); + asyncSearchResponse = new AsyncSearchResponse( + task.getExecutionId().getEncoded(), + storedSearchResponse, + null, + false, + false, + now, + now + TimeValue.timeValueMinutes(1).getMillis() + ); + } finally { + if (asyncSearchResponse != null) { + // (ssr refCount -> 2, asr refCount -> 0) + asyncSearchResponse.decRef(); + } + } + + PlainActionFuture write = new PlainActionFuture<>(); + store.createResponse(task.getExecutionId().getDocId(), Map.of(), asyncSearchResponse, write); + write.actionGet(); + + storedSearchResponse.decRef(); // sr -> 1 + msr.decRef(); // msr -> 0 (closeInternal runs, releasing its ssr) → tryIncRef() will now fail + + // Invoke getResponseWithHeaders -> should read from store and return 200 + AsyncSearchResponse resp = (AsyncSearchResponse) m.invoke(task); + assertNotNull("Expected response loaded from store", resp); + assertNull("No failure expected when loaded from store", resp.getFailure()); + assertNotNull("SearchResponse must be present", resp.getSearchResponse()); + assertFalse("Should not be running", resp.isRunning()); + assertFalse("Should not be partial", resp.isPartial()); + assertEquals(RestStatus.OK, resp.status()); + } + + @SuppressForbidden(reason = "access violation required in order to read private field for this test") + public void testReturnsGoneWhenInMemoryResponseReleased() throws Exception { + final int totalShards = 1; + final int skippedShards = 0; // Create an AsyncSearchTask AsyncSearchTask task = createAsyncSearchTask(); @@ -102,6 +179,9 @@ public void testGetResponseAfterCloseReturnsGone() throws Exception { Method m = AsyncSearchTask.class.getDeclaredMethod("getResponseWithHeaders"); m.setAccessible(true); + // Build a SearchResponse (sr refCount -> 1) + SearchResponse searchResponse = createSearchResponse(totalShards, totalShards, skippedShards); + // Take a ref - (msr refCount -> 1, sr refCount -> 2) MutableSearchResponse msr = (MutableSearchResponse) f.get(task); msr.updateShardsAndClusters(totalShards, skippedShards, null); @@ -136,11 +216,13 @@ private AsyncSearchTask createAsyncSearchTask() { new AsyncExecutionId("debug", new TaskId("node", 1L)), client, threadPool, - isCancelled -> () -> new AggregationReduceContext.ForFinal(null, null, null, null, null, PipelineAggregator.PipelineTree.EMPTY) + isCancelled -> () -> new AggregationReduceContext.ForFinal(null, null, null, null, null, PipelineAggregator.PipelineTree.EMPTY), + store ); } private SearchResponse createSearchResponse(int totalShards, int successfulShards, int skippedShards) { + SearchResponse.Clusters clusters = new SearchResponse.Clusters(1, 1, 0); return new SearchResponse( SearchHits.empty(Lucene.TOTAL_HITS_GREATER_OR_EQUAL_TO_ZERO, Float.NaN), null, @@ -155,7 +237,7 @@ private SearchResponse createSearchResponse(int totalShards, int successfulShard skippedShards, 1L, ShardSearchFailure.EMPTY_ARRAY, - null + clusters ); } } diff --git a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java index 19f545dbbd1f4..f59ca281feed9 100644 --- a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java +++ b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java @@ -351,13 +351,9 @@ private void executeCompletionListeners() { ElasticsearchException ex = (e instanceof ElasticsearchException) ? (ElasticsearchException) e : ExceptionsHelper.convertToElastic(e); + ex.setStackTrace(new StackTraceElement[0]); // keep stored payloads small - finalResponse = AsyncSearchResponse.failure( - searchId.getEncoded(), - getStartTime(), - expirationTimeMillis, - ex - ); + finalResponse = new AsyncSearchResponse(searchId.getEncoded(), null, ex, false, false, getStartTime(), expirationTimeMillis); } try { @@ -449,6 +445,16 @@ public static AsyncStatusResponse getStatusResponse(AsyncSearchTask asyncTask) { @Override public void close() { + if (logger.isDebugEnabled()) { + logger.debug( + "AsyncSearchTask.close(): byThread={}, asyncId={}, taskId={}, hasCompleted={}, stack={}", + Thread.currentThread().getName(), + searchId != null ? searchId.getEncoded() : "", + getId(), + hasCompleted, + new Exception().getStackTrace() + ); + } searchResponse.decRef(); } diff --git a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/MutableSearchResponse.java b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/MutableSearchResponse.java index 384d156bc25b5..eb1489f335434 100644 --- a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/MutableSearchResponse.java +++ b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/MutableSearchResponse.java @@ -42,6 +42,8 @@ */ class MutableSearchResponse extends AbstractRefCounted { + private final Logger logger = Loggers.getLogger(getClass(), "async"); + private int totalShards; private int skippedShards; private Clusters clusters; @@ -236,7 +238,7 @@ private SearchResponse buildResponse(long taskStartTimeNanos, InternalAggregatio * This method is synchronized to ensure that we don't perform final reduces concurrently. * This method also restores the response headers in the current thread context when requested, if the final response is available. */ - synchronized AsyncSearchResponse toAsyncSearchResponse(AsyncSearchTask task, long expirationTime, boolean restoreResponseHeaders) { + synchronized AsyncSearchResponse toAsyncSearchResponse(AsyncSearchTask task, long expirationTime, boolean restoreResponseHeaders) { if (restoreResponseHeaders && responseHeaders != null) { restoreResponseHeadersContext(threadContext, responseHeaders); } @@ -246,7 +248,7 @@ synchronized AsyncSearchResponse toAsyncSearchResponse(AsyncSearchTask task, lo // We have a final response, use it. searchResponse = finalResponse; searchResponse.mustIncRef(); - //System.out.println("Thread:" + Thread.currentThread().getName() + " finalResponse=" + finalResponse); + // System.out.println("Thread:" + Thread.currentThread().getName() + " finalResponse=" + finalResponse); } else if (clusters == null) { // An error occurred before we got the shard list searchResponse = null; @@ -494,6 +496,16 @@ private String getShardsInResponseMismatchInfo(SearchResponse response, boolean @Override protected synchronized void closeInternal() { + if (logger.isDebugEnabled()) { + logger.debug( + "MutableSearchResponse.close(): byThread={}, finalResponsePresent={}, clusterResponsesCount={}, stack={}", + Thread.currentThread().getName(), + finalResponse != null, + clusterResponses != null ? clusterResponses.size() : 0, + new Exception().getStackTrace() + ); + } + if (finalResponse != null) { finalResponse.decRef(); finalResponse = null; diff --git a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/TransportSubmitAsyncSearchAction.java b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/TransportSubmitAsyncSearchAction.java index ad648af2c5571..16aaac68754a6 100644 --- a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/TransportSubmitAsyncSearchAction.java +++ b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/TransportSubmitAsyncSearchAction.java @@ -207,7 +207,8 @@ public AsyncSearchTask createTask(long id, String type, String action, TaskId pa store.getClientWithOrigin(), nodeClient.threadPool(), isCancelled -> () -> searchService.aggReduceContextBuilder(isCancelled, originalSearchRequest.source().aggregations()) - .forFinalReduction() + .forFinalReduction(), + store ); } }; diff --git a/x-pack/plugin/async-search/src/test/java/org/elasticsearch/xpack/search/AsyncSearchTaskTests.java b/x-pack/plugin/async-search/src/test/java/org/elasticsearch/xpack/search/AsyncSearchTaskTests.java index ab1b6189c0133..70be3a506ddba 100644 --- a/x-pack/plugin/async-search/src/test/java/org/elasticsearch/xpack/search/AsyncSearchTaskTests.java +++ b/x-pack/plugin/async-search/src/test/java/org/elasticsearch/xpack/search/AsyncSearchTaskTests.java @@ -91,7 +91,8 @@ private AsyncSearchTask createAsyncSearchTask() { new AsyncExecutionId("0", new TaskId("node1", 1)), new NoOpClient(threadPool), threadPool, - (t) -> () -> null + (t) -> () -> null, + null ); } @@ -112,7 +113,8 @@ public void testTaskDescription() { new AsyncExecutionId("0", new TaskId("node1", 1)), new NoOpClient(threadPool), threadPool, - (t) -> () -> null + (t) -> () -> null, + null ) ) { assertEquals(""" @@ -135,7 +137,8 @@ public void testWaitForInit() throws InterruptedException { new AsyncExecutionId("0", new TaskId("node1", 1)), new NoOpClient(threadPool), threadPool, - (t) -> () -> null + (t) -> () -> null, + null ) ) { int numShards = randomIntBetween(0, 10); From dd9567d383dc6fb8aa446c6d756ea8ae9a367c54 Mon Sep 17 00:00:00 2001 From: Dimitris Rempapis Date: Tue, 14 Oct 2025 11:56:26 +0300 Subject: [PATCH 09/32] remove files --- .../xpack/search/AsyncSearchGoneIT.java | 270 ------------------ .../xpack/search/AsyncSearchRefCountIT.java | 247 ---------------- 2 files changed, 517 deletions(-) delete mode 100644 x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchGoneIT.java delete mode 100644 x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchRefCountIT.java diff --git a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchGoneIT.java b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchGoneIT.java deleted file mode 100644 index ffd851f7e097b..0000000000000 --- a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchGoneIT.java +++ /dev/null @@ -1,270 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.search; - -import com.carrotsearch.randomizedtesting.annotations.Repeat; - -import org.elasticsearch.ElasticsearchStatusException; -import org.elasticsearch.ExceptionsHelper; -import org.elasticsearch.action.index.IndexRequestBuilder; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.core.TimeValue; -import org.elasticsearch.rest.RestStatus; -import org.elasticsearch.search.aggregations.AggregationBuilders; -import org.elasticsearch.search.builder.SearchSourceBuilder; -import org.elasticsearch.test.ESIntegTestCase.SuiteScopeTestCase; -import org.elasticsearch.xpack.core.search.action.AsyncSearchResponse; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.CancellationException; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicReference; -import java.util.concurrent.atomic.LongAdder; - -@SuiteScopeTestCase -public class AsyncSearchGoneIT extends AsyncSearchIntegTestCase { - private static String indexName; - private static int numShards; - - private static int numKeywords; - private static Map keywordFreqs; - private static float maxMetric = Float.NEGATIVE_INFINITY; - private static float minMetric = Float.POSITIVE_INFINITY; - - @Override - public void setupSuiteScopeCluster() throws InterruptedException { - indexName = "test-async"; - numShards = randomIntBetween(1, 20); - int numDocs = randomIntBetween(100, 1000); - createIndex(indexName, Settings.builder().put("index.number_of_shards", numShards).build()); - numKeywords = randomIntBetween(50, 100); - keywordFreqs = new HashMap<>(); - Set keywordSet = new HashSet<>(); - for (int i = 0; i < numKeywords; i++) { - keywordSet.add(randomAlphaOfLengthBetween(10, 20)); - } - numKeywords = keywordSet.size(); - String[] keywords = keywordSet.toArray(String[]::new); - List reqs = new ArrayList<>(); - for (int i = 0; i < numDocs; i++) { - float metric = randomFloat(); - maxMetric = Math.max(metric, maxMetric); - minMetric = Math.min(metric, minMetric); - String keyword = keywords[randomIntBetween(0, numKeywords - 1)]; - keywordFreqs.compute(keyword, (k, v) -> { - if (v == null) { - return new AtomicInteger(1); - } - v.incrementAndGet(); - return v; - }); - reqs.add(prepareIndex(indexName).setSource("terms", keyword, "metric", metric)); - } - indexRandom(true, true, reqs); - } - - @Repeat(iterations = 10000) - public void testConcurrentStatusFetchWhileTaskCloses() throws Exception { - CountDownLatch beforeClose = new CountDownLatch(1); - - final SearchSourceBuilder source = new SearchSourceBuilder() - .aggregation(AggregationBuilders.terms("terms").field("terms.keyword").size(Math.max(1, numKeywords))); - - final int progressStep = (numShards > 2) ? randomIntBetween(2, numShards) : 2; - try (SearchResponseIterator it = assertBlockingIterator(indexName, numShards, source, 0, progressStep)) { - setBeforeCloseLatch(beforeClose); - - String id = getAsyncId(it); - - PollStats stats = new PollStats(); - StartableThreadGroup pollers = startGetStatusThreadsHot(id, 32, stats); - pollers.startHotThreads.countDown(); // release workers - - //Thread.sleep(100); - - // Finish consumption on a separate thread and capture errors - var finisherExec = Executors.newSingleThreadExecutor(); - var finisherError = new AtomicReference(); - Future finisher = finisherExec.submit(() -> { - try { - consumeAllResponses(it, "terms"); - } catch (Throwable t) { - finisherError.set(t); - } - }); - - // Align with the exact completion edge - beforeClose.await(); - - // Keep pollers hot long enough to land inside the small GONE window - pollers.stopAndAwait(TimeValue.timeValueSeconds(3)); - - // Join finisher & surface errors - try { - finisher.get(); - } catch (Exception ignored) { - } finally { finisherExec.shutdown(); } - - if (finisherError.get() != null) { - fail("consumeAllResponses failed: " + finisherError.get()); - } - - printPollerStats(stats); - assertGoneSeen(stats); - } - } - - private void printPollerStats(PollStats stats) { - System.out.println("Total:" + stats.totalCalls.sum()); - System.out.println("Completed:" + stats.completedResponses.sum()); - System.out.println("Running:" + stats.runningResponses.sum()); - System.out.println("Exceptions:" + stats.exceptions.sum()); - System.out.println("Gone:" + stats.gone410.sum()); - } - - private void assertGoneSeen(PollStats stats) { - assertFalse("Expected at least one GONE (410) from concurrent STATUS during task close; totals=" + stats.totalCalls.sum(), - stats.gone410.sum() > 0L); - } - - private String getAsyncId(SearchResponseIterator it) { - AsyncSearchResponse response = it.next(); - try { - assertNotNull(response.getId()); - return response.getId(); - } finally { - response.decRef(); - } - } - - private void consumeAllResponses(SearchResponseIterator it, String aggName) throws Exception { - while (it.hasNext()) { - AsyncSearchResponse response = it.next(); - try { - if (response.getSearchResponse() != null && response.getSearchResponse().getAggregations() != null) { - assertNotNull(response.getSearchResponse().getAggregations().get(aggName)); - } - } finally { - response.decRef(); - } - } - } - - private StartableThreadGroup startGetStatusThreadsHot(String id, int threads, PollStats stats) { - final ExecutorService exec = Executors.newFixedThreadPool(threads); - final List> futures = new ArrayList<>(threads); - final AtomicBoolean running = new AtomicBoolean(true); - final CountDownLatch start = new CountDownLatch(1); - - for (int i = 0; i < threads; i++) { - futures.add(exec.submit(() -> { - start.await(); - while (running.get()) { - AsyncSearchResponse resp = null; - try { - resp = getAsyncSearch(id); - stats.totalCalls.increment(); - - if (resp.isRunning()) { - stats.runningResponses.increment(); - } else { - stats.completedResponses.increment(); - } - } catch (Exception e) { - Throwable cause = ExceptionsHelper.unwrapCause(e); - if (cause instanceof ElasticsearchStatusException) { - RestStatus st = ExceptionsHelper.status(cause); - if (st == RestStatus.GONE) { - stats.gone410.increment(); - continue; - } - if (st == RestStatus.NOT_FOUND) { - continue; - } - } - continue; - } finally { - if (resp != null) { - resp.decRef(); - } - } - } - return null; - })); - } - return new StartableThreadGroup(exec, futures, running, start); - } - - // (Result pollers unchanged/not used for GONE case) - - static final class PollStats { - final LongAdder totalCalls = new LongAdder(); - final LongAdder runningResponses = new LongAdder(); - final LongAdder completedResponses = new LongAdder(); - final LongAdder hadSearchResponse = new LongAdder(); // not used by STATUS - final LongAdder exceptions = new LongAdder(); - final LongAdder gone410 = new LongAdder(); - } - - private final class StartableThreadGroup extends ThreadGroup { - CountDownLatch startHotThreads; - StartableThreadGroup(ExecutorService exec, List> futures, AtomicBoolean running, CountDownLatch startHotThreads) { - super(exec, futures, running); - this.startHotThreads = startHotThreads; - } - } - - class ThreadGroup { - private final ExecutorService exec; - private final List> futures; - private final AtomicBoolean running; - - private ThreadGroup(ExecutorService exec, List> futures, AtomicBoolean running) { - this.exec = exec; - this.futures = futures; - this.running = running; - } - - void stopAndAwait(TimeValue timeout) throws InterruptedException { - running.set(false); - exec.shutdown(); - if (exec.awaitTermination(timeout.millis(), TimeUnit.MILLISECONDS) == false) { - exec.shutdownNow(); - exec.awaitTermination(timeout.millis(), TimeUnit.MILLISECONDS); - } - } - - List getFailures() { - List failures = new ArrayList<>(); - for (Future f : futures) { - try { - f.get(); - } catch (CancellationException ignored) { - } catch (ExecutionException ee) { - failures.add(ExceptionsHelper.unwrapCause(ee.getCause())); - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - if (failures.isEmpty()) failures.add(ie); - } - } - return failures; - } - } -} diff --git a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchRefCountIT.java b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchRefCountIT.java deleted file mode 100644 index cd23900240743..0000000000000 --- a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchRefCountIT.java +++ /dev/null @@ -1,247 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.search; - -import com.carrotsearch.randomizedtesting.annotations.Repeat; - -import org.apache.lucene.store.AlreadyClosedException; -import org.elasticsearch.ExceptionsHelper; -import org.elasticsearch.ResourceNotFoundException; -import org.elasticsearch.TransportVersion; -import org.elasticsearch.action.index.IndexRequestBuilder; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.core.TimeValue; -import org.elasticsearch.index.query.MatchAllQueryBuilder; -import org.elasticsearch.indices.SystemIndices; -import org.elasticsearch.rest.RestStatus; -import org.elasticsearch.search.DummyQueryBuilder; -import org.elasticsearch.search.SearchService; -import org.elasticsearch.search.aggregations.AggregationBuilders; -import org.elasticsearch.search.aggregations.bucket.terms.InternalTerms; -import org.elasticsearch.search.aggregations.bucket.terms.StringTerms; -import org.elasticsearch.search.aggregations.metrics.Max; -import org.elasticsearch.search.aggregations.metrics.Min; -import org.elasticsearch.search.builder.SearchSourceBuilder; -import org.elasticsearch.search.query.ThrowingQueryBuilder; -import org.elasticsearch.test.ESIntegTestCase.SuiteScopeTestCase; -import org.elasticsearch.test.TransportVersionUtils; -import org.elasticsearch.test.junit.annotations.TestLogging; -import org.elasticsearch.xpack.core.XPackPlugin; -import org.elasticsearch.xpack.core.search.action.AsyncSearchResponse; -import org.elasticsearch.xpack.core.search.action.AsyncStatusResponse; -import org.elasticsearch.xpack.core.search.action.SubmitAsyncSearchAction; -import org.elasticsearch.xpack.core.search.action.SubmitAsyncSearchRequest; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.CancellationException; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.concurrent.ThreadLocalRandom; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicInteger; - -import static org.elasticsearch.search.SearchService.MAX_ASYNC_SEARCH_RESPONSE_SIZE_SETTING; -import static org.elasticsearch.search.aggregations.AggregationBuilders.terms; -import static org.hamcrest.Matchers.containsString; -import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.greaterThan; -import static org.hamcrest.Matchers.greaterThanOrEqualTo; -import static org.hamcrest.Matchers.instanceOf; -import static org.hamcrest.Matchers.lessThan; -import static org.hamcrest.Matchers.lessThanOrEqualTo; - -@TestLogging( - reason = "testing debug log output to identify race condition", - value = "org.elasticsearch.xpack.search.MutableSearchResponse:DEBUG,org.elasticsearch.xpack.search.AsyncSearchTask:DEBUG" -) -@SuiteScopeTestCase -public class AsyncSearchRefCountIT extends AsyncSearchIntegTestCase { - private static String indexName; - private static int numShards; - - private static int numKeywords; - private static Map keywordFreqs; - private static float maxMetric = Float.NEGATIVE_INFINITY; - private static float minMetric = Float.POSITIVE_INFINITY; - - @Override - public void setupSuiteScopeCluster() throws InterruptedException { - indexName = "test-async"; - numShards = randomIntBetween(1, 20); - int numDocs = randomIntBetween(100, 1000); - createIndex(indexName, Settings.builder().put("index.number_of_shards", numShards).build()); - numKeywords = randomIntBetween(50, 100); - keywordFreqs = new HashMap<>(); - Set keywordSet = new HashSet<>(); - for (int i = 0; i < numKeywords; i++) { - keywordSet.add(randomAlphaOfLengthBetween(10, 20)); - } - numKeywords = keywordSet.size(); - String[] keywords = keywordSet.toArray(String[]::new); - List reqs = new ArrayList<>(); - for (int i = 0; i < numDocs; i++) { - float metric = randomFloat(); - maxMetric = Math.max(metric, maxMetric); - minMetric = Math.min(metric, minMetric); - String keyword = keywords[randomIntBetween(0, numKeywords - 1)]; - keywordFreqs.compute(keyword, (k, v) -> { - if (v == null) { - return new AtomicInteger(1); - } - v.incrementAndGet(); - return v; - }); - reqs.add(prepareIndex(indexName).setSource("terms", keyword, "metric", metric)); - } - indexRandom(true, true, reqs); - } - - /** - * Category 1, point 2 : We receive the final underlying search response, then the AsyncSearchTask#close() - * is invoked when we finish consuming the iterator. - * Category 2, point 1 : We execute repeatedly GET _async_search/{id} - */ - //@Repeat(iterations = 1000) - public void testConcurrentStatusFetchWhileTaskCloses() throws Exception { - final TimeValue WAIT = TimeValue.timeValueMillis(100); - - final SearchSourceBuilder source = new SearchSourceBuilder() - .aggregation(AggregationBuilders.terms("terms").field("terms.keyword").size(Math.max(1, numKeywords))); - - final int progressStep = (numShards > 2) ? randomIntBetween(2, numShards) : 2; - try (SearchResponseIterator it = assertBlockingIterator(indexName, numShards, source, 0, progressStep)) { - String id = getAsyncId(it); - ThreadGroup getThreads = startGetResultThreads(id, 10); - - Thread.sleep(WAIT.millis()); - consumeAllResponses(it, "terms"); - Thread.sleep(WAIT.millis()); - - getThreads.stopAndAwait(WAIT); - cleanupAsyncSearch(id); - - assertError(getThreads.getFailures()); - } - } - - private void assertError( List fails ) { - if (fails.isEmpty() == false) { - for (Throwable failure : fails) { - final String msg = String.valueOf(failure.getMessage()); - if (failure instanceof AssertionError && msg.contains("already closed, can't increment ref count")) { - fail("Race detected in poller thread:\n" + ExceptionsHelper.stackTrace(failure)); - } else { - fail("Unexpected exception in poller thread:\n" + ExceptionsHelper.stackTrace(failure)); - } - } - } - } - - private String getAsyncId(SearchResponseIterator it) { - AsyncSearchResponse response = it.next(); - try { - assertNotNull(response.getId()); - return response.getId(); - } finally { - response.decRef(); - } - } - - private void consumeAllResponses(SearchResponseIterator it, String aggName) throws Exception { - while (it.hasNext()) { - AsyncSearchResponse response = it.next(); - try { - if (response.getSearchResponse() != null && response.getSearchResponse().getAggregations() != null) { - assertNotNull(response.getSearchResponse().getAggregations().get(aggName)); - } - System.out.println("Async response: " + response.getSearchResponse().toString()); - } finally { - response.decRef(); - } - } - } - - private void cleanupAsyncSearch(String id) throws Exception { - ensureTaskCompletion(id); - deleteAsyncSearch(id); - ensureTaskRemoval(id); - } - - private ThreadGroup startGetResultThreads(String id, int threads) { - final ExecutorService exec = Executors.newFixedThreadPool(threads); - final List> futures = new ArrayList<>(threads); - final AtomicBoolean running = new AtomicBoolean(true); - - for (int i = 0; i < threads; i++) { - futures.add(exec.submit(() -> { - while (running.get()) { - AsyncSearchResponse resp = null; - try { - resp = getAsyncSearch(id); - System.out.println("Thread" + Thread.currentThread().getName() + " isRunning: " + resp.isRunning()); - if (resp.isRunning()) { } - } catch (Exception e) { - System.out.println(e.getMessage()); - throw new IllegalArgumentException(); - } finally { - if (resp != null) { - resp.decRef(); - } - } - } - })); - } - return new ThreadGroup(exec, futures, running); - } - - - private final class ThreadGroup { - - private final ExecutorService exec; - private final List> futures; - private final AtomicBoolean running; - - private ThreadGroup(ExecutorService exec, List> futures, AtomicBoolean running) { - this.exec = exec; - this.futures = futures; - this.running = running; - } - - void stopAndAwait(TimeValue timeoutMillis) throws InterruptedException { - running.set(false); - exec.shutdown(); - if (exec.awaitTermination(timeoutMillis.millis(), java.util.concurrent.TimeUnit.MILLISECONDS) == false) { - exec.shutdownNow(); - exec.awaitTermination(timeoutMillis.millis(), java.util.concurrent.TimeUnit.MILLISECONDS); - } - } - - List getFailures() { - List failures = new ArrayList<>(); - for (Future f : futures) { - try { - f.get(); - } catch (CancellationException ignored) { - } catch (ExecutionException ee) { - failures.add(ee.getCause()); - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - if (failures.isEmpty()) failures.add(ie); - } - } - return failures; - } - } -} From c643cdad04f77263b83afaf1cd396d78a11fbfcc Mon Sep 17 00:00:00 2001 From: Dimitris Rempapis Date: Tue, 14 Oct 2025 11:57:49 +0300 Subject: [PATCH 10/32] update code --- .../java/org/elasticsearch/xpack/search/AsyncSearchTask.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java index f59ca281feed9..6058aa758ac10 100644 --- a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java +++ b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java @@ -345,13 +345,12 @@ private void executeCompletionListeners() { AsyncSearchResponse finalResponse; try { - // do NOT restore headers here; we’re on the search action thread already - finalResponse = getResponse(); // must NOT propagate + finalResponse = getResponse(); } catch (Exception e) { ElasticsearchException ex = (e instanceof ElasticsearchException) ? (ElasticsearchException) e : ExceptionsHelper.convertToElastic(e); - ex.setStackTrace(new StackTraceElement[0]); // keep stored payloads small + ex.setStackTrace(new StackTraceElement[0]); finalResponse = new AsyncSearchResponse(searchId.getEncoded(), null, ex, false, false, getStartTime(), expirationTimeMillis); } From dd092517bbd2f31f735d8b6ea434bf9fcdb5d47a Mon Sep 17 00:00:00 2001 From: Dimitris Rempapis Date: Tue, 14 Oct 2025 12:00:22 +0300 Subject: [PATCH 11/32] Delete System.out from debugging --- .../java/org/elasticsearch/xpack/search/AsyncSearchTask.java | 2 +- .../org/elasticsearch/xpack/search/MutableSearchResponse.java | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java index 6058aa758ac10..73f27574a0acb 100644 --- a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java +++ b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java @@ -345,7 +345,7 @@ private void executeCompletionListeners() { AsyncSearchResponse finalResponse; try { - finalResponse = getResponse(); + finalResponse = getResponse(); } catch (Exception e) { ElasticsearchException ex = (e instanceof ElasticsearchException) ? (ElasticsearchException) e diff --git a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/MutableSearchResponse.java b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/MutableSearchResponse.java index 500f373e4d54a..45f97add18a2f 100644 --- a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/MutableSearchResponse.java +++ b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/MutableSearchResponse.java @@ -246,7 +246,6 @@ synchronized AsyncSearchResponse toAsyncSearchResponse(AsyncSearchTask task, lon // We have a final response, use it. searchResponse = finalResponse; searchResponse.mustIncRef(); - // System.out.println("Thread:" + Thread.currentThread().getName() + " finalResponse=" + finalResponse); } else if (clusters == null) { // An error occurred before we got the shard list searchResponse = null; From 497a736b16f1a65de9a3004eb82711785376aa95 Mon Sep 17 00:00:00 2001 From: Dimitris Rempapis Date: Tue, 14 Oct 2025 12:01:13 +0300 Subject: [PATCH 12/32] Add forgotten imports --- .../org/elasticsearch/xpack/search/MutableSearchResponse.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/MutableSearchResponse.java b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/MutableSearchResponse.java index 45f97add18a2f..e09a4722b31f4 100644 --- a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/MutableSearchResponse.java +++ b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/MutableSearchResponse.java @@ -6,6 +6,7 @@ */ package org.elasticsearch.xpack.search; +import org.apache.logging.log4j.Logger; import org.apache.lucene.search.TotalHits; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.ExceptionsHelper; @@ -14,6 +15,7 @@ import org.elasticsearch.action.search.SearchResponseMerger; import org.elasticsearch.action.search.ShardSearchFailure; import org.elasticsearch.common.Strings; +import org.elasticsearch.common.logging.Loggers; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.util.concurrent.AtomicArray; import org.elasticsearch.common.util.concurrent.ThreadContext; From 85ec60fea9148ec2d54c2823768753bc843aaa9e Mon Sep 17 00:00:00 2001 From: Dimitris Rempapis Date: Wed, 15 Oct 2025 15:07:13 +0300 Subject: [PATCH 13/32] make the store getResponse async --- ... AsyncSearchRefcountAndFallbackTests.java} | 193 ++++++++++++++---- .../xpack/search/AsyncSearchTask.java | 184 +++++++++-------- 2 files changed, 255 insertions(+), 122 deletions(-) rename x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/{MutableSearchResponseRefCountingTests.java => AsyncSearchRefcountAndFallbackTests.java} (55%) diff --git a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/MutableSearchResponseRefCountingTests.java b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchRefcountAndFallbackTests.java similarity index 55% rename from x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/MutableSearchResponseRefCountingTests.java rename to x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchRefcountAndFallbackTests.java index daa62cada0f2b..642b0f34f54e3 100644 --- a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/MutableSearchResponseRefCountingTests.java +++ b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchRefcountAndFallbackTests.java @@ -8,7 +8,9 @@ import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.ExceptionsHelper; +import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.DocWriteResponse; +import org.elasticsearch.action.delete.DeleteResponse; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.search.ShardSearchFailure; import org.elasticsearch.action.support.PlainActionFuture; @@ -33,14 +35,17 @@ import org.junit.Before; import java.lang.reflect.Field; -import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.util.Map; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.is; -public class MutableSearchResponseRefCountingTests extends ESSingleNodeTestCase { +public class AsyncSearchRefcountAndFallbackTests extends ESSingleNodeTestCase { private AsyncTaskIndexService store; public String index = ".async-search"; @@ -48,6 +53,9 @@ public class MutableSearchResponseRefCountingTests extends ESSingleNodeTestCase private TestThreadPool threadPool; private NoOpClient client; + final int totalShards = 1; + final int skippedShards = 0; + @Before public void setup() { this.threadPool = new TestThreadPool(getTestName()); @@ -73,10 +81,11 @@ public void cleanup() { terminate(threadPool); } + /** + * If another thread still holds a ref to the in-memory MutableSearchResponse, + * building an AsyncSearchResponse succeeds (final SearchResponse is still live). + */ public void testBuildSucceedsIfAnotherThreadHoldsRef() { - final int totalShards = 1; - final int skippedShards = 0; - // Build a SearchResponse (sr refCount -> 1) SearchResponse searchResponse = createSearchResponse(totalShards, totalShards, skippedShards); @@ -106,40 +115,42 @@ public void testBuildSucceedsIfAnotherThreadHoldsRef() { msr.decRef(); } + /** + * When the in-memory MutableSearchResponse has been released (tryIncRef == false), + * the task falls back to the async-search store and returns the persisted response (200 OK). + */ @SuppressForbidden(reason = "access violation required in order to read private field for this test") public void testFallbackToStoreWhenInMemoryResponseReleased() throws Exception { - final int totalShards = 1; - final int skippedShards = 0; - // Create an AsyncSearchTask AsyncSearchTask task = createAsyncSearchTask(); + assertNotNull(task); // Get response instance and method from task Field f = AsyncSearchTask.class.getDeclaredField("searchResponse"); f.setAccessible(true); - Method m = AsyncSearchTask.class.getDeclaredMethod("getResponseWithHeaders"); - m.setAccessible(true); + Method getResponse = AsyncSearchTask.class.getDeclaredMethod("getResponse", boolean.class, ActionListener.class); + getResponse.setAccessible(true); // Build a SearchResponse (ssr refCount -> 1) to be stored in the index SearchResponse storedSearchResponse = createSearchResponse(totalShards, totalShards, skippedShards); // Take a ref - (msr refCount -> 1, ssr refCount -> 2) MutableSearchResponse msr = (MutableSearchResponse) f.get(task); - msr.updateShardsAndClusters(totalShards, skippedShards, null); - msr.updateFinalResponse(storedSearchResponse, false); + msr.updateShardsAndClusters(totalShards, skippedShards, /*clusters*/ null); + msr.updateFinalResponse(storedSearchResponse, /*ccsMinimizeRoundtrips*/ false); - // Build the AsyncSearchResponse to persist + // Build the AsyncSearchResponse to persist in the store AsyncSearchResponse asyncSearchResponse = null; try { long now = System.currentTimeMillis(); asyncSearchResponse = new AsyncSearchResponse( task.getExecutionId().getEncoded(), storedSearchResponse, - null, - false, - false, - now, - now + TimeValue.timeValueMinutes(1).getMillis() + /*failure*/ null, + /*isPartial*/ false, + /*isRunning*/ false, + task.getStartTime(), + now + TimeValue.timeValueMinutes(1).millis() ); } finally { if (asyncSearchResponse != null) { @@ -148,15 +159,22 @@ public void testFallbackToStoreWhenInMemoryResponseReleased() throws Exception { } } + // Persist initial/final response to the store while we still hold a ref PlainActionFuture write = new PlainActionFuture<>(); store.createResponse(task.getExecutionId().getDocId(), Map.of(), asyncSearchResponse, write); write.actionGet(); - storedSearchResponse.decRef(); // sr -> 1 - msr.decRef(); // msr -> 0 (closeInternal runs, releasing its ssr) → tryIncRef() will now fail + // Release the in-memory objects so the task path must fall back to the store + // - drop our extra ref to the SearchResponse that updateFinalResponse() took (sr -> 1) + storedSearchResponse.decRef(); + // - drop the in-memory MutableSearchResponse so mutableSearchResponse.tryIncRef() == false + // msr -> 0 (closeInternal runs, releasing its ssr) → tryIncRef() will now fail + msr.decRef(); + + PlainActionFuture future = new PlainActionFuture<>(); + getResponse.invoke(task, true, future); - // Invoke getResponseWithHeaders -> should read from store and return 200 - AsyncSearchResponse resp = (AsyncSearchResponse) m.invoke(task); + AsyncSearchResponse resp = future.actionGet(); assertNotNull("Expected response loaded from store", resp); assertNull("No failure expected when loaded from store", resp.getFailure()); assertNotNull("SearchResponse must be present", resp.getSearchResponse()); @@ -165,44 +183,135 @@ public void testFallbackToStoreWhenInMemoryResponseReleased() throws Exception { assertEquals(RestStatus.OK, resp.status()); } + /** + * When both the in-memory MutableSearchResponse has been released AND the stored + * document has been deleted or not found, the task returns GONE (410). + */ @SuppressForbidden(reason = "access violation required in order to read private field for this test") - public void testReturnsGoneWhenInMemoryResponseReleased() throws Exception { - final int totalShards = 1; - final int skippedShards = 0; - - // Create an AsyncSearchTask + public void testGoneWhenInMemoryReleasedAndStoreMissing() throws Exception { AsyncSearchTask task = createAsyncSearchTask(); - // Get response instance and method from task Field f = AsyncSearchTask.class.getDeclaredField("searchResponse"); f.setAccessible(true); - Method m = AsyncSearchTask.class.getDeclaredMethod("getResponseWithHeaders"); - m.setAccessible(true); + Method getResponse = AsyncSearchTask.class.getDeclaredMethod("getResponse", boolean.class, ActionListener.class); + getResponse.setAccessible(true); - // Build a SearchResponse (sr refCount -> 1) SearchResponse searchResponse = createSearchResponse(totalShards, totalShards, skippedShards); - // Take a ref - (msr refCount -> 1, sr refCount -> 2) MutableSearchResponse msr = (MutableSearchResponse) f.get(task); msr.updateShardsAndClusters(totalShards, skippedShards, null); msr.updateFinalResponse(searchResponse, false); - searchResponse.decRef(); // sr ref -> 1 - msr.decRef(); // msr ref -> 0 -> closeInternal() -> sr ref -> 0 + long now = System.currentTimeMillis(); + AsyncSearchResponse asr = new AsyncSearchResponse( + task.getExecutionId().getEncoded(), + searchResponse, + null, + false, + false, + task.getStartTime(), + now + TimeValue.timeValueMinutes(1).millis() + ); + asr.decRef(); - // Invoke getResponseWithHeaders and expect GONE exception - InvocationTargetException ite = expectThrows(InvocationTargetException.class, () -> { - AsyncSearchResponse resp = (AsyncSearchResponse) m.invoke(task); - if (resp != null) { - resp.decRef(); - } - }); + PlainActionFuture write = new PlainActionFuture<>(); + store.createResponse(task.getExecutionId().getDocId(), Map.of(), asr, write); + write.actionGet(); - Throwable cause = ExceptionsHelper.unwrapCause(ite.getCause()); + searchResponse.decRef(); + msr.decRef(); + + // Delete the doc from store + PlainActionFuture del = new PlainActionFuture<>(); + store.deleteResponse(task.getExecutionId(), del); + del.actionGet(); + + // Now the task must surface GONE + PlainActionFuture fut = new PlainActionFuture<>(); + getResponse.invoke(task, /*restoreHeaders*/ true, fut); + Exception ex = expectThrows(Exception.class, fut::actionGet); + Throwable cause = ExceptionsHelper.unwrapCause(ex); assertThat(cause, instanceOf(ElasticsearchStatusException.class)); assertThat(ExceptionsHelper.status(cause), is(RestStatus.GONE)); } + /** + * Stresses concurrent GETs while the in-memory MutableSearchResponse is being closed. + * All callers either receive a valid response (from memory or store) or a GONE (410), + * and no unexpected failures occur. + */ + @SuppressForbidden(reason = "access violation required in order to read private field for this test") + public void testConcurrentGetResponseWhileClosing() throws Exception { + AsyncSearchTask task = createAsyncSearchTask(); + Field f = AsyncSearchTask.class.getDeclaredField("searchResponse"); + f.setAccessible(true); + MutableSearchResponse msr = (MutableSearchResponse) f.get(task); + + SearchResponse searchResponse = createSearchResponse(totalShards, totalShards, skippedShards); + msr.updateShardsAndClusters(totalShards, skippedShards, new SearchResponse.Clusters(1, 1, 0)); + msr.updateFinalResponse(searchResponse, false); + + // persist to store so fallback is available + AsyncSearchResponse asr = null; + try { + long now = System.currentTimeMillis(); + asr = new AsyncSearchResponse( + task.getExecutionId().getEncoded(), + searchResponse, + null, + false, + false, + task.getStartTime(), + now + TimeValue.timeValueMinutes(1).millis() + ); + } finally { + if(asr != null) { + asr.decRef(); + } + } + + PlainActionFuture write = new PlainActionFuture<>(); + store.createResponse(task.getExecutionId().getDocId(), Map.of(), asr, write); + write.actionGet(); + + int threads = 32, iterations = 200; + var exec = Executors.newFixedThreadPool(threads); + var failures = new ConcurrentLinkedQueue(); + var start = new CountDownLatch(1); + + for (int t = 0; t < threads; t++) { + exec.submit(() -> { + start.await(); + for (int i = 0; i < iterations; i++) { + PlainActionFuture future = new PlainActionFuture<>(); + + try { + AsyncSearchResponse resp = future.actionGet(); + resp.decRef(); + } catch (Exception e) { + Throwable cause = ExceptionsHelper.unwrapCause(e); + if ((cause instanceof ElasticsearchStatusException) == false + || ExceptionsHelper.status(cause) != RestStatus.GONE) { + failures.add(e); + } + } + } + return null; + }); + } + + start.countDown(); + searchResponse.decRef(); + msr.decRef(); + + exec.shutdown(); + if (exec.awaitTermination(10, TimeUnit.SECONDS) == false) { + exec.shutdownNow(); + } + + assertTrue("No unexpected failures", failures.isEmpty()); + } + private AsyncSearchTask createAsyncSearchTask() { return new AsyncSearchTask( 1L, diff --git a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java index 73f27574a0acb..10d3f9dc5613a 100644 --- a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java +++ b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java @@ -23,7 +23,6 @@ import org.elasticsearch.action.search.SearchTask; import org.elasticsearch.action.search.ShardSearchFailure; import org.elasticsearch.action.search.TransportSearchAction; -import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.client.internal.Client; import org.elasticsearch.common.logging.Loggers; import org.elasticsearch.core.Releasable; @@ -82,8 +81,6 @@ final class AsyncSearchTask extends SearchTask implements AsyncTask, Releasable private final AsyncTaskIndexService store; - private static final TimeValue STORE_FETCH_TIMEOUT = TimeValue.timeValueSeconds(30); - /** * Creates an instance of {@link AsyncSearchTask}. * @@ -215,12 +212,7 @@ public boolean addCompletionListener(ActionListener listene } } if (executeImmediately) { - try { - AsyncSearchResponse resp = getResponseWithHeaders(); - ActionListener.respondAndRelease(listener, resp); - } catch (Exception e) { - listener.onFailure(e); - } + getResponseWithHeaders(listener); } return true; // unused } @@ -239,12 +231,36 @@ public void addCompletionListener(Consumer listener) { } } if (executeImmediately) { - var response = getResponseWithHeaders(); - try { - listener.accept(response); - } finally { - response.decRef(); - } + getResponseWithHeaders( new ActionListener<>() { + @Override + public void onResponse(AsyncSearchResponse resp) { + // We don't decRef here, respondAndRelease will do it. + listener.accept(resp); + } + + @Override + public void onFailure(Exception e) { + ElasticsearchException ex = (e instanceof ElasticsearchException) + ? (ElasticsearchException) e + : ExceptionsHelper.convertToElastic(e); + ex.setStackTrace(new StackTraceElement[0]); + + AsyncSearchResponse failureResponse = new AsyncSearchResponse( + searchId.getEncoded(), + null, + ex, + false, + false, + getStartTime(), + expirationTimeMillis + ); + try { + listener.accept(failureResponse); + } finally { + failureResponse.decRef(); + } + } + }); } } @@ -263,13 +279,7 @@ private void internalAddCompletionListener(ActionListener l if (hasRun.compareAndSet(false, true)) { // timeout occurred before completion removeCompletionListener(id); - - try { - AsyncSearchResponse resp = getResponseWithHeaders(); - ActionListener.respondAndRelease(listener, resp); - } catch (Exception e) { - listener.onFailure(e); - } + getResponseWithHeaders(listener); } }, waitForCompletion, threadPool.generic()); } catch (Exception exc) { @@ -286,12 +296,7 @@ private void internalAddCompletionListener(ActionListener l } } if (executeImmediately) { - try { - AsyncSearchResponse resp = getResponseWithHeaders(); - ActionListener.respondAndRelease(listener, resp); - } catch (Exception e) { - listener.onFailure(e); - } + getResponseWithHeaders( listener); } } @@ -342,82 +347,101 @@ private void executeCompletionListeners() { } // we don't need to restore the response headers, they should be included in the current // context since we are called by the search action listener. - AsyncSearchResponse finalResponse; - - try { - finalResponse = getResponse(); - } catch (Exception e) { - ElasticsearchException ex = (e instanceof ElasticsearchException) - ? (ElasticsearchException) e - : ExceptionsHelper.convertToElastic(e); - ex.setStackTrace(new StackTraceElement[0]); - - finalResponse = new AsyncSearchResponse(searchId.getEncoded(), null, ex, false, false, getStartTime(), expirationTimeMillis); - } + getResponse( new ActionListener<>() { + @Override + public void onResponse(AsyncSearchResponse finalResponse) { + for (Consumer consumer : completionsListenersCopy.values()) { + consumer.accept(finalResponse); + } + } - try { - for (Consumer consumer : completionsListenersCopy.values()) { - consumer.accept(finalResponse); + @Override + public void onFailure(Exception e) { + ElasticsearchException ex = (e instanceof ElasticsearchException) + ? (ElasticsearchException) e + : ExceptionsHelper.convertToElastic(e); + ex.setStackTrace(new StackTraceElement[0]); + + AsyncSearchResponse failureResponse = new AsyncSearchResponse( + searchId.getEncoded(), + null, + ex, + false, + false, + getStartTime(), + expirationTimeMillis + ); + try { + for (Consumer consumer : completionsListenersCopy.values()) { + consumer.accept(failureResponse); + } + } finally { + failureResponse.decRef(); + } } - } finally { - finalResponse.decRef(); - } + }); } /** - * Returns the current {@link AsyncSearchResponse}. + * Invokes the listener with the current {@link AsyncSearchResponse} + * without restoring response headers into the calling thread context. */ - private AsyncSearchResponse getResponse() { - return getResponse(false); + private void getResponse(ActionListener listener) { + getResponse(false, listener); } /** - * Returns the current {@link AsyncSearchResponse} and restores the response headers - * in the local thread context. + * Invokes the listener with the current {@link AsyncSearchResponse}, + * restoring response headers into the calling thread context. */ - private AsyncSearchResponse getResponseWithHeaders() { - return getResponse(true); + private void getResponseWithHeaders(ActionListener listener) { + getResponse(true, listener); } - private AsyncSearchResponse getResponse(boolean restoreResponseHeaders) { - MutableSearchResponse mutableSearchResponse = searchResponse; + private void getResponse(boolean restoreResponseHeaders, ActionListener listener) { + final MutableSearchResponse mutableSearchResponse = searchResponse; assert mutableSearchResponse != null; checkCancellation(); - AsyncSearchResponse asyncSearchResponse; - // Fallback: in-memory state was released → read from .async-search + // Fallback: fetch from store asynchronously if (mutableSearchResponse.tryIncRef() == false) { - PlainActionFuture future = new PlainActionFuture<>(); - try { - store.getResponse(searchId, restoreResponseHeaders, future); - } catch (Exception e) { - throw ExceptionsHelper.convertToElastic(e); - } + store.getResponse(searchId, restoreResponseHeaders, new ActionListener<>() { + @Override + public void onResponse(AsyncSearchResponse resp) { + if (resp.tryIncRef() == false) { + listener.onFailure( + new ElasticsearchStatusException("async-search result no longer available", RestStatus.GONE)); + return; + } + ActionListener.respondAndRelease(listener, resp); + } - try { - return future.actionGet(STORE_FETCH_TIMEOUT); - } catch (Exception e) { - RestStatus st = ExceptionsHelper.status(ExceptionsHelper.unwrapCause(e)); - if (st == RestStatus.NOT_FOUND || st == RestStatus.GONE) { - throw new ElasticsearchStatusException("async-search result no longer available", RestStatus.GONE, e); + @Override + public void onFailure(Exception e) { + final Exception unwrapped = (Exception) ExceptionsHelper.unwrapCause(e); + listener.onFailure( + new ElasticsearchStatusException("async-search result no longer available", RestStatus.GONE, unwrapped)); } - throw ExceptionsHelper.convertToElastic(e); - } + }); + return; } try { - asyncSearchResponse = mutableSearchResponse.toAsyncSearchResponse(this, expirationTimeMillis, restoreResponseHeaders); - } catch (Exception e) { - ElasticsearchException exception = new ElasticsearchStatusException( - "Async search: error while reducing partial results", - ExceptionsHelper.status(e), - e - ); - asyncSearchResponse = mutableSearchResponse.toAsyncSearchResponse(this, expirationTimeMillis, exception); + AsyncSearchResponse asyncSearchResponse; + try { + asyncSearchResponse = mutableSearchResponse.toAsyncSearchResponse(this, expirationTimeMillis, restoreResponseHeaders); + } catch (Exception e) { + final ElasticsearchException ex = new ElasticsearchStatusException( + "Async search: error while reducing partial results", + ExceptionsHelper.status(e), + e + ); + asyncSearchResponse = mutableSearchResponse.toAsyncSearchResponse(this, expirationTimeMillis, ex); + } + ActionListener.respondAndRelease(listener, asyncSearchResponse); } finally { mutableSearchResponse.decRef(); } - return asyncSearchResponse; } // checks if the search task should be cancelled From 8758981c801e48b9bcaf5d21c0da499e504488dd Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 15 Oct 2025 12:15:17 +0000 Subject: [PATCH 14/32] [CI] Auto commit changes from spotless --- .../AsyncSearchRefcountAndFallbackTests.java | 5 ++--- .../xpack/search/AsyncSearchTask.java | 16 ++++++++-------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchRefcountAndFallbackTests.java b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchRefcountAndFallbackTests.java index 642b0f34f54e3..4ed753612e0c2 100644 --- a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchRefcountAndFallbackTests.java +++ b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchRefcountAndFallbackTests.java @@ -265,7 +265,7 @@ public void testConcurrentGetResponseWhileClosing() throws Exception { now + TimeValue.timeValueMinutes(1).millis() ); } finally { - if(asr != null) { + if (asr != null) { asr.decRef(); } } @@ -290,8 +290,7 @@ public void testConcurrentGetResponseWhileClosing() throws Exception { resp.decRef(); } catch (Exception e) { Throwable cause = ExceptionsHelper.unwrapCause(e); - if ((cause instanceof ElasticsearchStatusException) == false - || ExceptionsHelper.status(cause) != RestStatus.GONE) { + if ((cause instanceof ElasticsearchStatusException) == false || ExceptionsHelper.status(cause) != RestStatus.GONE) { failures.add(e); } } diff --git a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java index 10d3f9dc5613a..cae1c891b70e1 100644 --- a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java +++ b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java @@ -231,7 +231,7 @@ public void addCompletionListener(Consumer listener) { } } if (executeImmediately) { - getResponseWithHeaders( new ActionListener<>() { + getResponseWithHeaders(new ActionListener<>() { @Override public void onResponse(AsyncSearchResponse resp) { // We don't decRef here, respondAndRelease will do it. @@ -296,7 +296,7 @@ private void internalAddCompletionListener(ActionListener l } } if (executeImmediately) { - getResponseWithHeaders( listener); + getResponseWithHeaders(listener); } } @@ -347,7 +347,7 @@ private void executeCompletionListeners() { } // we don't need to restore the response headers, they should be included in the current // context since we are called by the search action listener. - getResponse( new ActionListener<>() { + getResponse(new ActionListener<>() { @Override public void onResponse(AsyncSearchResponse finalResponse) { for (Consumer consumer : completionsListenersCopy.values()) { @@ -387,7 +387,7 @@ public void onFailure(Exception e) { * without restoring response headers into the calling thread context. */ private void getResponse(ActionListener listener) { - getResponse(false, listener); + getResponse(false, listener); } /** @@ -395,7 +395,7 @@ private void getResponse(ActionListener listener) { * restoring response headers into the calling thread context. */ private void getResponseWithHeaders(ActionListener listener) { - getResponse(true, listener); + getResponse(true, listener); } private void getResponse(boolean restoreResponseHeaders, ActionListener listener) { @@ -409,8 +409,7 @@ private void getResponse(boolean restoreResponseHeaders, ActionListener Date: Wed, 15 Oct 2025 18:31:18 +0300 Subject: [PATCH 15/32] remove test --- .../AsyncSearchRefcountAndFallbackTests.java | 76 ------------------- 1 file changed, 76 deletions(-) diff --git a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchRefcountAndFallbackTests.java b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchRefcountAndFallbackTests.java index 4ed753612e0c2..6c1a6a5379379 100644 --- a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchRefcountAndFallbackTests.java +++ b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchRefcountAndFallbackTests.java @@ -235,82 +235,6 @@ public void testGoneWhenInMemoryReleasedAndStoreMissing() throws Exception { assertThat(ExceptionsHelper.status(cause), is(RestStatus.GONE)); } - /** - * Stresses concurrent GETs while the in-memory MutableSearchResponse is being closed. - * All callers either receive a valid response (from memory or store) or a GONE (410), - * and no unexpected failures occur. - */ - @SuppressForbidden(reason = "access violation required in order to read private field for this test") - public void testConcurrentGetResponseWhileClosing() throws Exception { - AsyncSearchTask task = createAsyncSearchTask(); - Field f = AsyncSearchTask.class.getDeclaredField("searchResponse"); - f.setAccessible(true); - MutableSearchResponse msr = (MutableSearchResponse) f.get(task); - - SearchResponse searchResponse = createSearchResponse(totalShards, totalShards, skippedShards); - msr.updateShardsAndClusters(totalShards, skippedShards, new SearchResponse.Clusters(1, 1, 0)); - msr.updateFinalResponse(searchResponse, false); - - // persist to store so fallback is available - AsyncSearchResponse asr = null; - try { - long now = System.currentTimeMillis(); - asr = new AsyncSearchResponse( - task.getExecutionId().getEncoded(), - searchResponse, - null, - false, - false, - task.getStartTime(), - now + TimeValue.timeValueMinutes(1).millis() - ); - } finally { - if (asr != null) { - asr.decRef(); - } - } - - PlainActionFuture write = new PlainActionFuture<>(); - store.createResponse(task.getExecutionId().getDocId(), Map.of(), asr, write); - write.actionGet(); - - int threads = 32, iterations = 200; - var exec = Executors.newFixedThreadPool(threads); - var failures = new ConcurrentLinkedQueue(); - var start = new CountDownLatch(1); - - for (int t = 0; t < threads; t++) { - exec.submit(() -> { - start.await(); - for (int i = 0; i < iterations; i++) { - PlainActionFuture future = new PlainActionFuture<>(); - - try { - AsyncSearchResponse resp = future.actionGet(); - resp.decRef(); - } catch (Exception e) { - Throwable cause = ExceptionsHelper.unwrapCause(e); - if ((cause instanceof ElasticsearchStatusException) == false || ExceptionsHelper.status(cause) != RestStatus.GONE) { - failures.add(e); - } - } - } - return null; - }); - } - - start.countDown(); - searchResponse.decRef(); - msr.decRef(); - - exec.shutdown(); - if (exec.awaitTermination(10, TimeUnit.SECONDS) == false) { - exec.shutdownNow(); - } - - assertTrue("No unexpected failures", failures.isEmpty()); - } - private AsyncSearchTask createAsyncSearchTask() { return new AsyncSearchTask( 1L, From a2f7f7a9e55b4d168bdaaf29a28e4982eb76724b Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 15 Oct 2025 15:42:38 +0000 Subject: [PATCH 16/32] [CI] Auto commit changes from spotless --- .../xpack/search/AsyncSearchRefcountAndFallbackTests.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchRefcountAndFallbackTests.java b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchRefcountAndFallbackTests.java index 6c1a6a5379379..1fd663b17c49c 100644 --- a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchRefcountAndFallbackTests.java +++ b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchRefcountAndFallbackTests.java @@ -37,10 +37,6 @@ import java.lang.reflect.Field; import java.lang.reflect.Method; import java.util.Map; -import java.util.concurrent.ConcurrentLinkedQueue; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.is; From aed9d1f200f32708aef7d01654ace680aa02ba1a Mon Sep 17 00:00:00 2001 From: Dimitris Rempapis Date: Thu, 16 Oct 2025 12:49:37 +0300 Subject: [PATCH 17/32] Integration test that validates the behavior of async search status pollingunder concurrent access while the underlying async search task is finishing and being cleaned up. --- .../search/AsyncSearchConcurrentStatusIT.java | 274 ++++++++++++++++++ 1 file changed, 274 insertions(+) create mode 100644 x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java diff --git a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java new file mode 100644 index 0000000000000..969f11a235592 --- /dev/null +++ b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java @@ -0,0 +1,274 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.search; + +import com.carrotsearch.randomizedtesting.annotations.Repeat; +import org.elasticsearch.ElasticsearchStatusException; +import org.elasticsearch.ExceptionsHelper; +import org.elasticsearch.action.index.IndexRequestBuilder; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.rest.RestStatus; +import org.elasticsearch.search.aggregations.AggregationBuilders; +import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.elasticsearch.test.ESIntegTestCase.SuiteScopeTestCase; +import org.elasticsearch.xpack.core.search.action.AsyncSearchResponse; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.CancellationException; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; +import java.util.concurrent.atomic.LongAdder; + +@SuiteScopeTestCase +public class AsyncSearchConcurrentStatusIT extends AsyncSearchIntegTestCase { + private static String indexName; + private static int numShards; + + private static int numKeywords; + private static Map keywordFreqs; + private static float maxMetric = Float.NEGATIVE_INFINITY; + private static float minMetric = Float.POSITIVE_INFINITY; + + @Override + public void setupSuiteScopeCluster() { + indexName = "test-async"; + numShards = randomIntBetween(1, 20); + int numDocs = randomIntBetween(100, 1000); + createIndex(indexName, Settings.builder().put("index.number_of_shards", numShards).build()); + numKeywords = randomIntBetween(50, 100); + keywordFreqs = new HashMap<>(); + Set keywordSet = new HashSet<>(); + for (int i = 0; i < numKeywords; i++) { + keywordSet.add(randomAlphaOfLengthBetween(10, 20)); + } + numKeywords = keywordSet.size(); + String[] keywords = keywordSet.toArray(String[]::new); + List reqs = new ArrayList<>(); + for (int i = 0; i < numDocs; i++) { + float metric = randomFloat(); + maxMetric = Math.max(metric, maxMetric); + minMetric = Math.min(metric, minMetric); + String keyword = keywords[randomIntBetween(0, numKeywords - 1)]; + keywordFreqs.compute(keyword, (k, v) -> { + if (v == null) { + return new AtomicInteger(1); + } + v.incrementAndGet(); + return v; + }); + reqs.add(prepareIndex(indexName).setSource("terms", keyword, "metric", metric)); + } + indexRandom(true, true, reqs); + } + + /** + * Tests that concurrent async search status requests behave correctly + * while the underlying async search task is still executing and during its close/cleanup. + */ + public void testConcurrentStatusFetchWhileTaskCloses() throws Exception { + final String aggName = "terms"; + final SearchSourceBuilder source = new SearchSourceBuilder().aggregation( + AggregationBuilders.terms(aggName).field("terms.keyword").size(Math.max(1, numKeywords)) + ); + + final int progressStep = (numShards > 2) ? randomIntBetween(2, numShards) : 2; + try (SearchResponseIterator it = assertBlockingIterator(indexName, numShards, source, 0, progressStep)) { + String id = getAsyncId(it); + + PollStats stats = new PollStats(); + + int statusThreads = randomIntBetween(1, Math.max(2, 4 * numShards)); + StartableThreadGroup pollers = startGetStatusThreadsHot(id, statusThreads, aggName, stats); + pollers.startHotThreads.countDown(); // release pollers + + // Finish consumption on a separate thread and capture errors + var consumerExec = Executors.newSingleThreadExecutor(); + AtomicReference consumerError = new AtomicReference<>(); + Future consumer = consumerExec.submit(() -> { + try { + consumeAllResponses(it, aggName); + } catch (Throwable t) { + consumerError.set(t); + } + }); + + Thread.sleep(randomIntBetween(100, 200)); + pollers.stopAndAwait(TimeValue.timeValueMillis(randomIntBetween(500, 1000))); + + // Join consumer & surface errors + try { + consumer.get(); + } catch (Exception ignored) { + } finally { + consumerExec.shutdown(); + } + + assertNull(consumerError.get()); + assertNoWorkerFailures(pollers); + assertStats(stats); + } + } + + private void assertNoWorkerFailures(StartableThreadGroup pollers) { + List failures = pollers.getFailures(); + assertTrue( + "Unexpected worker failures:\n" + failures.stream() + .map(ExceptionsHelper::stackTrace) + .reduce("", (a, b) -> a + "\n---\n" + b), + failures.isEmpty() + ); + } + + private void assertStats(PollStats stats) { + assertEquals(stats.totalCalls.sum(), stats.runningResponses.sum() + stats.completedResponses.sum()); + assertEquals("There should be no exceptions other than GONE", 0, stats.exceptions.sum()); + assertTrue( + "Expected some completed STATUS responses with successful results; totals=" + stats.totalCalls.sum(), + stats.completedResponses.sum() > 0L + ); + } + + private String getAsyncId(SearchResponseIterator it) { + AsyncSearchResponse response = it.next(); + try { + assertNotNull(response.getId()); + return response.getId(); + } finally { + response.decRef(); + } + } + + private void consumeAllResponses(SearchResponseIterator it, String aggName) throws Exception { + while (it.hasNext()) { + AsyncSearchResponse response = it.next(); + try { + if (response.getSearchResponse() != null && response.getSearchResponse().getAggregations() != null) { + assertNotNull(response.getSearchResponse().getAggregations().get(aggName)); + } + } finally { + response.decRef(); + } + } + } + + private StartableThreadGroup startGetStatusThreadsHot(String id, int threads, String aggName, PollStats stats) { + final ExecutorService exec = Executors.newFixedThreadPool(threads); + final List> futures = new ArrayList<>(threads); + final AtomicBoolean running = new AtomicBoolean(true); + final CountDownLatch start = new CountDownLatch(1); + + for (int i = 0; i < threads; i++) { + futures.add(exec.submit(() -> { + start.await(); + while (running.get()) { + AsyncSearchResponse resp = null; + try { + resp = getAsyncSearch(id); + stats.totalCalls.increment(); + + if (resp.isRunning()) { + stats.runningResponses.increment(); + } else { + // Success-only assertions: if reported completed, we must have a proper search response + assertNull("Async search reported completed with failure", resp.getFailure()); + assertNotNull("Completed async search must carry a SearchResponse", resp.getSearchResponse()); + assertNotNull("Completed async search must have aggregations", resp.getSearchResponse().getAggregations()); + assertNotNull( + "Completed async search must contain the expected aggregation", + resp.getSearchResponse().getAggregations().get(aggName) + ); + stats.completedResponses.increment(); + } + } catch (Exception e) { + Throwable cause = ExceptionsHelper.unwrapCause(e); + if (cause instanceof ElasticsearchStatusException) { + RestStatus status = ExceptionsHelper.status(cause); + if (status == RestStatus.GONE) { + stats.gone410.increment(); + } + } else { + stats.exceptions.increment(); + } + } finally { + if (resp != null) { + resp.decRef(); + } + } + } + return null; + })); + } + return new StartableThreadGroup(exec, futures, running, start); + } + + static final class PollStats { + final LongAdder totalCalls = new LongAdder(); + final LongAdder runningResponses = new LongAdder(); + final LongAdder completedResponses = new LongAdder(); + final LongAdder exceptions = new LongAdder(); + final LongAdder gone410 = new LongAdder(); + } + + static class StartableThreadGroup extends ThreadGroup { + private final CountDownLatch startHotThreads; + + StartableThreadGroup(ExecutorService exec, List> futures, AtomicBoolean running, CountDownLatch startHotThreads) { + super(exec, futures, running); + this.startHotThreads = startHotThreads; + } + } + + static class ThreadGroup { + private final ExecutorService exec; + private final List> futures; + private final AtomicBoolean running; + + private ThreadGroup(ExecutorService exec, List> futures, AtomicBoolean running) { + this.exec = exec; + this.futures = futures; + this.running = running; + } + + void stopAndAwait(TimeValue timeout) throws InterruptedException { + running.set(false); + exec.shutdown(); + if (exec.awaitTermination(timeout.millis(), TimeUnit.MILLISECONDS) == false) { + exec.shutdownNow(); + exec.awaitTermination(timeout.millis(), TimeUnit.MILLISECONDS); + } + } + + List getFailures() { + List failures = new ArrayList<>(); + for (Future f : futures) { + try { + f.get(); + } catch (CancellationException ignored) {} catch (ExecutionException ee) { + failures.add(ExceptionsHelper.unwrapCause(ee.getCause())); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + if (failures.isEmpty()) failures.add(ie); + } + } + return failures; + } + } +} From b82dbe8cf8ed35b4e29cd5de271f2cc93d163be9 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 16 Oct 2025 09:56:24 +0000 Subject: [PATCH 18/32] [CI] Auto commit changes from spotless --- .../xpack/search/AsyncSearchConcurrentStatusIT.java | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java index 969f11a235592..1fe2f9e6b90fe 100644 --- a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java +++ b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java @@ -7,7 +7,6 @@ package org.elasticsearch.xpack.search; -import com.carrotsearch.randomizedtesting.annotations.Repeat; import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.action.index.IndexRequestBuilder; @@ -78,7 +77,7 @@ public void setupSuiteScopeCluster() { } indexRandom(true, true, reqs); } - + /** * Tests that concurrent async search status requests behave correctly * while the underlying async search task is still executing and during its close/cleanup. @@ -116,8 +115,7 @@ public void testConcurrentStatusFetchWhileTaskCloses() throws Exception { // Join consumer & surface errors try { consumer.get(); - } catch (Exception ignored) { - } finally { + } catch (Exception ignored) {} finally { consumerExec.shutdown(); } @@ -130,9 +128,7 @@ public void testConcurrentStatusFetchWhileTaskCloses() throws Exception { private void assertNoWorkerFailures(StartableThreadGroup pollers) { List failures = pollers.getFailures(); assertTrue( - "Unexpected worker failures:\n" + failures.stream() - .map(ExceptionsHelper::stackTrace) - .reduce("", (a, b) -> a + "\n---\n" + b), + "Unexpected worker failures:\n" + failures.stream().map(ExceptionsHelper::stackTrace).reduce("", (a, b) -> a + "\n---\n" + b), failures.isEmpty() ); } From 07825fd202f719cd67b23465fe14a612135eb099 Mon Sep 17 00:00:00 2001 From: Dimitris Rempapis Date: Thu, 16 Oct 2025 15:26:32 +0300 Subject: [PATCH 19/32] update after review --- .../search/AsyncSearchConcurrentStatusIT.java | 3 +- .../xpack/search/AsyncSearchTask.java | 83 ++++++++----------- 2 files changed, 38 insertions(+), 48 deletions(-) diff --git a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java index 969f11a235592..96060862032c0 100644 --- a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java +++ b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java @@ -78,11 +78,12 @@ public void setupSuiteScopeCluster() { } indexRandom(true, true, reqs); } - + /** * Tests that concurrent async search status requests behave correctly * while the underlying async search task is still executing and during its close/cleanup. */ + @Repeat(iterations = 1000) public void testConcurrentStatusFetchWhileTaskCloses() throws Exception { final String aggName = "terms"; final SearchSourceBuilder source = new SearchSourceBuilder().aggregation( diff --git a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java index cae1c891b70e1..4c02867e5ae08 100644 --- a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java +++ b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java @@ -36,6 +36,7 @@ import org.elasticsearch.tasks.TaskManager; import org.elasticsearch.threadpool.Scheduler.Cancellable; import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.Transports; import org.elasticsearch.xpack.core.async.AsyncExecutionId; import org.elasticsearch.xpack.core.async.AsyncTask; import org.elasticsearch.xpack.core.async.AsyncTaskIndexService; @@ -231,36 +232,31 @@ public void addCompletionListener(Consumer listener) { } } if (executeImmediately) { - getResponseWithHeaders(new ActionListener<>() { - @Override - public void onResponse(AsyncSearchResponse resp) { - // We don't decRef here, respondAndRelease will do it. + getResponseWithHeaders(ActionListener.wrap( + resp -> { listener.accept(resp); - } + }, + e -> { + ElasticsearchException ex = (e instanceof ElasticsearchException) + ? (ElasticsearchException) e + : ExceptionsHelper.convertToElastic(e); + ex.setStackTrace(new StackTraceElement[0]); - @Override - public void onFailure(Exception e) { - ElasticsearchException ex = (e instanceof ElasticsearchException) - ? (ElasticsearchException) e - : ExceptionsHelper.convertToElastic(e); - ex.setStackTrace(new StackTraceElement[0]); - - AsyncSearchResponse failureResponse = new AsyncSearchResponse( - searchId.getEncoded(), - null, - ex, - false, - false, - getStartTime(), - expirationTimeMillis - ); - try { - listener.accept(failureResponse); - } finally { - failureResponse.decRef(); - } + AsyncSearchResponse failureResponse = new AsyncSearchResponse( + searchId.getEncoded(), + null, + ex, + false, + false, + getStartTime(), + expirationTimeMillis + ); + try { + listener.accept(failureResponse); + } finally { + failureResponse.decRef(); } - }); + })); } } @@ -347,16 +343,13 @@ private void executeCompletionListeners() { } // we don't need to restore the response headers, they should be included in the current // context since we are called by the search action listener. - getResponse(new ActionListener<>() { - @Override - public void onResponse(AsyncSearchResponse finalResponse) { + getResponse(ActionListener.wrap( + finalResponse -> { for (Consumer consumer : completionsListenersCopy.values()) { consumer.accept(finalResponse); } - } - - @Override - public void onFailure(Exception e) { + }, + e -> { ElasticsearchException ex = (e instanceof ElasticsearchException) ? (ElasticsearchException) e : ExceptionsHelper.convertToElastic(e); @@ -378,8 +371,7 @@ public void onFailure(Exception e) { } finally { failureResponse.decRef(); } - } - }); + })); } /** @@ -405,24 +397,21 @@ private void getResponse(boolean restoreResponseHeaders, ActionListener() { - @Override - public void onResponse(AsyncSearchResponse resp) { + store.getResponse(searchId, restoreResponseHeaders, ActionListener.wrap( + resp -> { if (resp.tryIncRef() == false) { - listener.onFailure(new ElasticsearchStatusException("async-search result no longer available", RestStatus.GONE)); + listener.onFailure( + new ElasticsearchStatusException("async-search result no longer available", RestStatus.GONE)); return; } ActionListener.respondAndRelease(listener, resp); - } - - @Override - public void onFailure(Exception e) { + }, + e -> { final Exception unwrapped = (Exception) ExceptionsHelper.unwrapCause(e); listener.onFailure( - new ElasticsearchStatusException("async-search result no longer available", RestStatus.GONE, unwrapped) - ); + new ElasticsearchStatusException("async-search result no longer available", RestStatus.GONE, unwrapped)); } - }); + )); return; } From e4fedfc923dfa254b835aaca236d0ab260a7cbc7 Mon Sep 17 00:00:00 2001 From: Dimitris Rempapis Date: Thu, 16 Oct 2025 15:27:13 +0300 Subject: [PATCH 20/32] update after review --- .../xpack/search/AsyncSearchConcurrentStatusIT.java | 1 - 1 file changed, 1 deletion(-) diff --git a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java index ab700b0342e2b..1fe2f9e6b90fe 100644 --- a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java +++ b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java @@ -82,7 +82,6 @@ public void setupSuiteScopeCluster() { * Tests that concurrent async search status requests behave correctly * while the underlying async search task is still executing and during its close/cleanup. */ - @Repeat(iterations = 1000) public void testConcurrentStatusFetchWhileTaskCloses() throws Exception { final String aggName = "terms"; final SearchSourceBuilder source = new SearchSourceBuilder().aggregation( From c26b15de9371270c9a5fca4a983cc35be2f6322a Mon Sep 17 00:00:00 2001 From: Dimitris Rempapis Date: Thu, 16 Oct 2025 15:31:36 +0300 Subject: [PATCH 21/32] apply spotless --- .../xpack/search/AsyncSearchTask.java | 83 ++++++++----------- 1 file changed, 36 insertions(+), 47 deletions(-) diff --git a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java index 4c02867e5ae08..a2e847963d1af 100644 --- a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java +++ b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java @@ -36,7 +36,6 @@ import org.elasticsearch.tasks.TaskManager; import org.elasticsearch.threadpool.Scheduler.Cancellable; import org.elasticsearch.threadpool.ThreadPool; -import org.elasticsearch.transport.Transports; import org.elasticsearch.xpack.core.async.AsyncExecutionId; import org.elasticsearch.xpack.core.async.AsyncTask; import org.elasticsearch.xpack.core.async.AsyncTaskIndexService; @@ -232,11 +231,8 @@ public void addCompletionListener(Consumer listener) { } } if (executeImmediately) { - getResponseWithHeaders(ActionListener.wrap( - resp -> { - listener.accept(resp); - }, - e -> { + getResponseWithHeaders(ActionListener.wrap(resp -> { + listener.accept(resp); }, e -> { ElasticsearchException ex = (e instanceof ElasticsearchException) ? (ElasticsearchException) e : ExceptionsHelper.convertToElastic(e); @@ -343,35 +339,33 @@ private void executeCompletionListeners() { } // we don't need to restore the response headers, they should be included in the current // context since we are called by the search action listener. - getResponse(ActionListener.wrap( - finalResponse -> { + getResponse(ActionListener.wrap(finalResponse -> { + for (Consumer consumer : completionsListenersCopy.values()) { + consumer.accept(finalResponse); + } + }, e -> { + ElasticsearchException ex = (e instanceof ElasticsearchException) + ? (ElasticsearchException) e + : ExceptionsHelper.convertToElastic(e); + ex.setStackTrace(new StackTraceElement[0]); + + AsyncSearchResponse failureResponse = new AsyncSearchResponse( + searchId.getEncoded(), + null, + ex, + false, + false, + getStartTime(), + expirationTimeMillis + ); + try { for (Consumer consumer : completionsListenersCopy.values()) { - consumer.accept(finalResponse); + consumer.accept(failureResponse); } - }, - e -> { - ElasticsearchException ex = (e instanceof ElasticsearchException) - ? (ElasticsearchException) e - : ExceptionsHelper.convertToElastic(e); - ex.setStackTrace(new StackTraceElement[0]); - - AsyncSearchResponse failureResponse = new AsyncSearchResponse( - searchId.getEncoded(), - null, - ex, - false, - false, - getStartTime(), - expirationTimeMillis - ); - try { - for (Consumer consumer : completionsListenersCopy.values()) { - consumer.accept(failureResponse); - } - } finally { - failureResponse.decRef(); - } - })); + } finally { + failureResponse.decRef(); + } + })); } /** @@ -397,21 +391,16 @@ private void getResponse(boolean restoreResponseHeaders, ActionListener { - if (resp.tryIncRef() == false) { - listener.onFailure( - new ElasticsearchStatusException("async-search result no longer available", RestStatus.GONE)); - return; - } - ActionListener.respondAndRelease(listener, resp); - }, - e -> { - final Exception unwrapped = (Exception) ExceptionsHelper.unwrapCause(e); - listener.onFailure( - new ElasticsearchStatusException("async-search result no longer available", RestStatus.GONE, unwrapped)); + store.getResponse(searchId, restoreResponseHeaders, ActionListener.wrap(resp -> { + if (resp.tryIncRef() == false) { + listener.onFailure(new ElasticsearchStatusException("async-search result no longer available", RestStatus.GONE)); + return; } - )); + ActionListener.respondAndRelease(listener, resp); + }, e -> { + final Exception unwrapped = (Exception) ExceptionsHelper.unwrapCause(e); + listener.onFailure(new ElasticsearchStatusException("async-search result no longer available", RestStatus.GONE, unwrapped)); + })); return; } From 770a902764b04e9e2dd3ae8aa6234371880cb44a Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 16 Oct 2025 12:38:29 +0000 Subject: [PATCH 22/32] [CI] Auto commit changes from spotless --- .../java/org/elasticsearch/xpack/search/AsyncSearchTask.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java index a2e847963d1af..c4c58e26fb2c8 100644 --- a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java +++ b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java @@ -231,8 +231,7 @@ public void addCompletionListener(Consumer listener) { } } if (executeImmediately) { - getResponseWithHeaders(ActionListener.wrap(resp -> { - listener.accept(resp); }, e -> { + getResponseWithHeaders(ActionListener.wrap(resp -> { listener.accept(resp); }, e -> { ElasticsearchException ex = (e instanceof ElasticsearchException) ? (ElasticsearchException) e : ExceptionsHelper.convertToElastic(e); From fc178a9c5668b8bb88b10b12232eba9783a3e466 Mon Sep 17 00:00:00 2001 From: Dimitris Rempapis Date: Thu, 16 Oct 2025 16:03:03 +0300 Subject: [PATCH 23/32] update --- .../xpack/search/AsyncSearchConcurrentStatusIT.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java index 1fe2f9e6b90fe..4cba1ee0bbdda 100644 --- a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java +++ b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java @@ -136,10 +136,6 @@ private void assertNoWorkerFailures(StartableThreadGroup pollers) { private void assertStats(PollStats stats) { assertEquals(stats.totalCalls.sum(), stats.runningResponses.sum() + stats.completedResponses.sum()); assertEquals("There should be no exceptions other than GONE", 0, stats.exceptions.sum()); - assertTrue( - "Expected some completed STATUS responses with successful results; totals=" + stats.totalCalls.sum(), - stats.completedResponses.sum() > 0L - ); } private String getAsyncId(SearchResponseIterator it) { From 9fa88b9cee9210a2323567932fd61612ee93ef55 Mon Sep 17 00:00:00 2001 From: Dimitris Rempapis Date: Tue, 21 Oct 2025 11:52:26 +0300 Subject: [PATCH 24/32] update after review --- .../search/AsyncSearchConcurrentStatusIT.java | 88 ++++++++++--------- 1 file changed, 48 insertions(+), 40 deletions(-) diff --git a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java index 4cba1ee0bbdda..a6d37d70f1ab4 100644 --- a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java +++ b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java @@ -7,6 +7,8 @@ package org.elasticsearch.xpack.search; +import com.carrotsearch.randomizedtesting.annotations.Repeat; + import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.action.index.IndexRequestBuilder; @@ -19,10 +21,8 @@ import org.elasticsearch.xpack.core.search.action.AsyncSearchResponse; import java.util.ArrayList; -import java.util.HashMap; import java.util.HashSet; import java.util.List; -import java.util.Map; import java.util.Set; import java.util.concurrent.CancellationException; import java.util.concurrent.CountDownLatch; @@ -31,8 +31,8 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.atomic.LongAdder; @@ -42,9 +42,6 @@ public class AsyncSearchConcurrentStatusIT extends AsyncSearchIntegTestCase { private static int numShards; private static int numKeywords; - private static Map keywordFreqs; - private static float maxMetric = Float.NEGATIVE_INFINITY; - private static float minMetric = Float.POSITIVE_INFINITY; @Override public void setupSuiteScopeCluster() { @@ -53,7 +50,6 @@ public void setupSuiteScopeCluster() { int numDocs = randomIntBetween(100, 1000); createIndex(indexName, Settings.builder().put("index.number_of_shards", numShards).build()); numKeywords = randomIntBetween(50, 100); - keywordFreqs = new HashMap<>(); Set keywordSet = new HashSet<>(); for (int i = 0; i < numKeywords; i++) { keywordSet.add(randomAlphaOfLengthBetween(10, 20)); @@ -63,24 +59,19 @@ public void setupSuiteScopeCluster() { List reqs = new ArrayList<>(); for (int i = 0; i < numDocs; i++) { float metric = randomFloat(); - maxMetric = Math.max(metric, maxMetric); - minMetric = Math.min(metric, minMetric); String keyword = keywords[randomIntBetween(0, numKeywords - 1)]; - keywordFreqs.compute(keyword, (k, v) -> { - if (v == null) { - return new AtomicInteger(1); - } - v.incrementAndGet(); - return v; - }); reqs.add(prepareIndex(indexName).setSource("terms", keyword, "metric", metric)); } indexRandom(true, true, reqs); } /** - * Tests that concurrent async search status requests behave correctly - * while the underlying async search task is still executing and during its close/cleanup. + * The test spins up a set of poller threads that repeatedly call {@code _async_search/{d]}} + * but hold them behind a latch. Once all pollers are ready, the latch is released so they + * begin hammering the async search endpoint at the same time. In parallel, a consumer thread + * drives the async search to completion using the blocking iterator. This coordinated overlap + * ensures we exercise the window where the task is closing and some status calls may return + * {@code 410 GONE}. */ public void testConcurrentStatusFetchWhileTaskCloses() throws Exception { final String aggName = "terms"; @@ -94,9 +85,10 @@ public void testConcurrentStatusFetchWhileTaskCloses() throws Exception { PollStats stats = new PollStats(); - int statusThreads = randomIntBetween(1, Math.max(2, 4 * numShards)); - StartableThreadGroup pollers = startGetStatusThreadsHot(id, statusThreads, aggName, stats); - pollers.startHotThreads.countDown(); // release pollers + // Pick a random number of status-poller threads, at least 1, up to (4×numShards) + int pollerThreads = randomIntBetween(1, 4 * numShards); + PollerGroup pollers = startGetStatusThreadsHot(id, pollerThreads, aggName, stats); + pollers.start(); // release pollers // Finish consumption on a separate thread and capture errors var consumerExec = Executors.newSingleThreadExecutor(); @@ -110,22 +102,42 @@ public void testConcurrentStatusFetchWhileTaskCloses() throws Exception { }); Thread.sleep(randomIntBetween(100, 200)); - pollers.stopAndAwait(TimeValue.timeValueMillis(randomIntBetween(500, 1000))); + + TimeValue timeout = TimeValue.timeValueSeconds(3); // Join consumer & surface errors try { - consumer.get(); + consumer.get(timeout.millis(), TimeUnit.MILLISECONDS); + + if (consumerError.get() != null) { + fail("consumeAllResponses failed: " + consumerError.get()); + } + } catch (TimeoutException e) { + consumer.cancel(true); + fail(e, "Consumer thread did not finish within timeout"); } catch (Exception ignored) {} finally { + + try { + pollers.stopAndAwait(timeout); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + fail("Interrupted while stopping pollers: " + e.getMessage()); + } + consumerExec.shutdown(); + try { + consumerExec.awaitTermination(timeout.millis(), TimeUnit.MILLISECONDS); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + } } - assertNull(consumerError.get()); assertNoWorkerFailures(pollers); assertStats(stats); } } - private void assertNoWorkerFailures(StartableThreadGroup pollers) { + private void assertNoWorkerFailures(PollerGroup pollers) { List failures = pollers.getFailures(); assertTrue( "Unexpected worker failures:\n" + failures.stream().map(ExceptionsHelper::stackTrace).reduce("", (a, b) -> a + "\n---\n" + b), @@ -161,15 +173,15 @@ private void consumeAllResponses(SearchResponseIterator it, String aggName) thro } } - private StartableThreadGroup startGetStatusThreadsHot(String id, int threads, String aggName, PollStats stats) { + private PollerGroup startGetStatusThreadsHot(String id, int threads, String aggName, PollStats stats) { final ExecutorService exec = Executors.newFixedThreadPool(threads); final List> futures = new ArrayList<>(threads); final AtomicBoolean running = new AtomicBoolean(true); - final CountDownLatch start = new CountDownLatch(1); + final CountDownLatch startGate = new CountDownLatch(1); for (int i = 0; i < threads; i++) { futures.add(exec.submit(() -> { - start.await(); + startGate.await(); while (running.get()) { AsyncSearchResponse resp = null; try { @@ -208,7 +220,7 @@ private StartableThreadGroup startGetStatusThreadsHot(String id, int threads, St return null; })); } - return new StartableThreadGroup(exec, futures, running, start); + return new PollerGroup(exec, futures, running, startGate); } static final class PollStats { @@ -219,26 +231,22 @@ static final class PollStats { final LongAdder gone410 = new LongAdder(); } - static class StartableThreadGroup extends ThreadGroup { - private final CountDownLatch startHotThreads; - - StartableThreadGroup(ExecutorService exec, List> futures, AtomicBoolean running, CountDownLatch startHotThreads) { - super(exec, futures, running); - this.startHotThreads = startHotThreads; - } - } - - static class ThreadGroup { + static class PollerGroup { private final ExecutorService exec; private final List> futures; + // The threads are created right away, but they immediately block on the latch private final AtomicBoolean running; + private final CountDownLatch startGate; - private ThreadGroup(ExecutorService exec, List> futures, AtomicBoolean running) { + private PollerGroup(ExecutorService exec, List> futures, AtomicBoolean running, CountDownLatch startGate) { this.exec = exec; this.futures = futures; this.running = running; + this.startGate = startGate; } + void start() { startGate.countDown(); } + void stopAndAwait(TimeValue timeout) throws InterruptedException { running.set(false); exec.shutdown(); From 286b6a90df661cbe5c32c7e34cfd11cdb80098e6 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 21 Oct 2025 08:58:17 +0000 Subject: [PATCH 25/32] [CI] Auto commit changes from spotless --- .../xpack/search/AsyncSearchConcurrentStatusIT.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java index a6d37d70f1ab4..b82f61705f5b2 100644 --- a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java +++ b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java @@ -7,8 +7,6 @@ package org.elasticsearch.xpack.search; -import com.carrotsearch.randomizedtesting.annotations.Repeat; - import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.action.index.IndexRequestBuilder; @@ -245,7 +243,9 @@ private PollerGroup(ExecutorService exec, List> futures, AtomicBoolean this.startGate = startGate; } - void start() { startGate.countDown(); } + void start() { + startGate.countDown(); + } void stopAndAwait(TimeValue timeout) throws InterruptedException { running.set(false); From 9b57b9976f1a4514740bf88de2be67421192b375 Mon Sep 17 00:00:00 2001 From: Dimitris Rempapis Date: Tue, 21 Oct 2025 12:26:41 +0300 Subject: [PATCH 26/32] update after review --- .../xpack/search/AsyncSearchConcurrentStatusIT.java | 6 +++--- .../org/elasticsearch/xpack/search/AsyncSearchTask.java | 9 +++++++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java index a6d37d70f1ab4..b82f61705f5b2 100644 --- a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java +++ b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java @@ -7,8 +7,6 @@ package org.elasticsearch.xpack.search; -import com.carrotsearch.randomizedtesting.annotations.Repeat; - import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.action.index.IndexRequestBuilder; @@ -245,7 +243,9 @@ private PollerGroup(ExecutorService exec, List> futures, AtomicBoolean this.startGate = startGate; } - void start() { startGate.countDown(); } + void start() { + startGate.countDown(); + } void stopAndAwait(TimeValue timeout) throws InterruptedException { running.set(false); diff --git a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java index c4c58e26fb2c8..b3815635836a0 100644 --- a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java +++ b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java @@ -392,17 +392,22 @@ private void getResponse(boolean restoreResponseHeaders, ActionListener { if (resp.tryIncRef() == false) { - listener.onFailure(new ElasticsearchStatusException("async-search result no longer available", RestStatus.GONE)); + listener.onFailure(new ElasticsearchStatusException("Async search: result no longer available", RestStatus.GONE)); return; } ActionListener.respondAndRelease(listener, resp); }, e -> { final Exception unwrapped = (Exception) ExceptionsHelper.unwrapCause(e); - listener.onFailure(new ElasticsearchStatusException("async-search result no longer available", RestStatus.GONE, unwrapped)); + listener.onFailure( + new ElasticsearchStatusException("Async search: result no longer available", RestStatus.GONE, unwrapped) + ); })); return; } + // At this point we successfully incremented the ref on the MutableSearchResponse. + // This guarantees that the underlying SearchResponse it wraps will not be closed + // while we are using it, so calling toAsyncSearchResponse(..) is safe. try { AsyncSearchResponse asyncSearchResponse; try { From 89b3f3f217670dcb9822e8691d849f7eeb0c0977 Mon Sep 17 00:00:00 2001 From: Dimitris Rempapis Date: Tue, 21 Oct 2025 15:17:30 +0300 Subject: [PATCH 27/32] update after review --- .../search/AsyncSearchConcurrentStatusIT.java | 54 ++++++++++--------- 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java index b82f61705f5b2..c5c0430788d1e 100644 --- a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java +++ b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java @@ -7,6 +7,8 @@ package org.elasticsearch.xpack.search; +import com.carrotsearch.randomizedtesting.annotations.Repeat; + import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.action.index.IndexRequestBuilder; @@ -64,14 +66,17 @@ public void setupSuiteScopeCluster() { } /** - * The test spins up a set of poller threads that repeatedly call {@code _async_search/{d]}} - * but hold them behind a latch. Once all pollers are ready, the latch is released so they - * begin hammering the async search endpoint at the same time. In parallel, a consumer thread - * drives the async search to completion using the blocking iterator. This coordinated overlap - * ensures we exercise the window where the task is closing and some status calls may return - * {@code 410 GONE}. + * This test spins up a set of poller threads that repeatedly call + * {@code _async_search/{id}}. Each poller starts immediately, and once enough + * requests have been issued they signal a latch to indicate the group is "warmed up". + * The test waits on this latch to deterministically ensure pollers are active. + * In parallel, a consumer thread drives the async search to completion using the + * blocking iterator. This coordinated overlap exercises the window where the task + * is closing and some status calls may return {@code 410 GONE}. */ + @Repeat(iterations = 1000) public void testConcurrentStatusFetchWhileTaskCloses() throws Exception { + final TimeValue timeout = TimeValue.timeValueSeconds(3); final String aggName = "terms"; final SearchSourceBuilder source = new SearchSourceBuilder().aggregation( AggregationBuilders.terms(aggName).field("terms.keyword").size(Math.max(1, numKeywords)) @@ -85,10 +90,16 @@ public void testConcurrentStatusFetchWhileTaskCloses() throws Exception { // Pick a random number of status-poller threads, at least 1, up to (4×numShards) int pollerThreads = randomIntBetween(1, 4 * numShards); - PollerGroup pollers = startGetStatusThreadsHot(id, pollerThreads, aggName, stats); - pollers.start(); // release pollers - // Finish consumption on a separate thread and capture errors + // Wait for pollers to be active + CountDownLatch warmed = new CountDownLatch(1); + + PollerGroup pollers = createPollers(id, pollerThreads, aggName, stats, warmed); + + // Wait until pollers are issuing requests (warming period) + assertTrue("pollers did not warm up in time", warmed.await(timeout.millis(), TimeUnit.MILLISECONDS)); + + // Start consumer on a separate thread and capture errors var consumerExec = Executors.newSingleThreadExecutor(); AtomicReference consumerError = new AtomicReference<>(); Future consumer = consumerExec.submit(() -> { @@ -99,10 +110,6 @@ public void testConcurrentStatusFetchWhileTaskCloses() throws Exception { } }); - Thread.sleep(randomIntBetween(100, 200)); - - TimeValue timeout = TimeValue.timeValueSeconds(3); - // Join consumer & surface errors try { consumer.get(timeout.millis(), TimeUnit.MILLISECONDS); @@ -171,21 +178,24 @@ private void consumeAllResponses(SearchResponseIterator it, String aggName) thro } } - private PollerGroup startGetStatusThreadsHot(String id, int threads, String aggName, PollStats stats) { + private PollerGroup createPollers(String id, int threads, String aggName, PollStats stats, CountDownLatch warmed) { final ExecutorService exec = Executors.newFixedThreadPool(threads); final List> futures = new ArrayList<>(threads); final AtomicBoolean running = new AtomicBoolean(true); - final CountDownLatch startGate = new CountDownLatch(1); for (int i = 0; i < threads; i++) { futures.add(exec.submit(() -> { - startGate.await(); while (running.get()) { AsyncSearchResponse resp = null; try { resp = getAsyncSearch(id); stats.totalCalls.increment(); + // Once enough requests have been sent, consider pollers "warmed". + if (stats.totalCalls.sum() >= threads) { + warmed.countDown(); + } + if (resp.isRunning()) { stats.runningResponses.increment(); } else { @@ -218,7 +228,7 @@ private PollerGroup startGetStatusThreadsHot(String id, int threads, String aggN return null; })); } - return new PollerGroup(exec, futures, running, startGate); + return new PollerGroup(exec, futures, running); } static final class PollStats { @@ -232,19 +242,13 @@ static final class PollStats { static class PollerGroup { private final ExecutorService exec; private final List> futures; - // The threads are created right away, but they immediately block on the latch + // The threads are created and running right away private final AtomicBoolean running; - private final CountDownLatch startGate; - private PollerGroup(ExecutorService exec, List> futures, AtomicBoolean running, CountDownLatch startGate) { + private PollerGroup(ExecutorService exec, List> futures, AtomicBoolean running) { this.exec = exec; this.futures = futures; this.running = running; - this.startGate = startGate; - } - - void start() { - startGate.countDown(); } void stopAndAwait(TimeValue timeout) throws InterruptedException { From 30fe790de7aef9f58fa7bd2c51c1b42cefc03d4c Mon Sep 17 00:00:00 2001 From: Dimitris Rempapis Date: Tue, 21 Oct 2025 15:37:29 +0300 Subject: [PATCH 28/32] Remove Repeat annotation --- .../xpack/search/AsyncSearchConcurrentStatusIT.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java index c5c0430788d1e..177c964d3a9c3 100644 --- a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java +++ b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java @@ -7,8 +7,6 @@ package org.elasticsearch.xpack.search; -import com.carrotsearch.randomizedtesting.annotations.Repeat; - import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.action.index.IndexRequestBuilder; @@ -74,7 +72,6 @@ public void setupSuiteScopeCluster() { * blocking iterator. This coordinated overlap exercises the window where the task * is closing and some status calls may return {@code 410 GONE}. */ - @Repeat(iterations = 1000) public void testConcurrentStatusFetchWhileTaskCloses() throws Exception { final TimeValue timeout = TimeValue.timeValueSeconds(3); final String aggName = "terms"; From 547a05004550bc9eeb59f87078c4aa8b117ebdb6 Mon Sep 17 00:00:00 2001 From: Dimitris Rempapis Date: Wed, 22 Oct 2025 17:30:15 +0300 Subject: [PATCH 29/32] code udpated after review --- .../AsyncSearchRefcountAndFallbackTests.java | 31 +++++-------------- .../xpack/search/AsyncSearchTask.java | 9 +++++- 2 files changed, 15 insertions(+), 25 deletions(-) diff --git a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchRefcountAndFallbackTests.java b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchRefcountAndFallbackTests.java index 1fd663b17c49c..f17be5da89b70 100644 --- a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchRefcountAndFallbackTests.java +++ b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchRefcountAndFallbackTests.java @@ -8,7 +8,6 @@ import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.ExceptionsHelper; -import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.DocWriteResponse; import org.elasticsearch.action.delete.DeleteResponse; import org.elasticsearch.action.search.SearchResponse; @@ -17,7 +16,6 @@ import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.util.BigArrays; -import org.elasticsearch.core.SuppressForbidden; import org.elasticsearch.core.TimeValue; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.search.SearchHits; @@ -34,8 +32,6 @@ import org.junit.After; import org.junit.Before; -import java.lang.reflect.Field; -import java.lang.reflect.Method; import java.util.Map; import static org.hamcrest.Matchers.instanceOf; @@ -115,23 +111,16 @@ public void testBuildSucceedsIfAnotherThreadHoldsRef() { * When the in-memory MutableSearchResponse has been released (tryIncRef == false), * the task falls back to the async-search store and returns the persisted response (200 OK). */ - @SuppressForbidden(reason = "access violation required in order to read private field for this test") public void testFallbackToStoreWhenInMemoryResponseReleased() throws Exception { // Create an AsyncSearchTask AsyncSearchTask task = createAsyncSearchTask(); assertNotNull(task); - // Get response instance and method from task - Field f = AsyncSearchTask.class.getDeclaredField("searchResponse"); - f.setAccessible(true); - Method getResponse = AsyncSearchTask.class.getDeclaredMethod("getResponse", boolean.class, ActionListener.class); - getResponse.setAccessible(true); - // Build a SearchResponse (ssr refCount -> 1) to be stored in the index SearchResponse storedSearchResponse = createSearchResponse(totalShards, totalShards, skippedShards); // Take a ref - (msr refCount -> 1, ssr refCount -> 2) - MutableSearchResponse msr = (MutableSearchResponse) f.get(task); + MutableSearchResponse msr = task.getSearchResponse(); msr.updateShardsAndClusters(totalShards, skippedShards, /*clusters*/ null); msr.updateFinalResponse(storedSearchResponse, /*ccsMinimizeRoundtrips*/ false); @@ -168,7 +157,7 @@ public void testFallbackToStoreWhenInMemoryResponseReleased() throws Exception { msr.decRef(); PlainActionFuture future = new PlainActionFuture<>(); - getResponse.invoke(task, true, future); + task.getResponse(future); AsyncSearchResponse resp = future.actionGet(); assertNotNull("Expected response loaded from store", resp); @@ -183,18 +172,11 @@ public void testFallbackToStoreWhenInMemoryResponseReleased() throws Exception { * When both the in-memory MutableSearchResponse has been released AND the stored * document has been deleted or not found, the task returns GONE (410). */ - @SuppressForbidden(reason = "access violation required in order to read private field for this test") public void testGoneWhenInMemoryReleasedAndStoreMissing() throws Exception { AsyncSearchTask task = createAsyncSearchTask(); - Field f = AsyncSearchTask.class.getDeclaredField("searchResponse"); - f.setAccessible(true); - Method getResponse = AsyncSearchTask.class.getDeclaredMethod("getResponse", boolean.class, ActionListener.class); - getResponse.setAccessible(true); - SearchResponse searchResponse = createSearchResponse(totalShards, totalShards, skippedShards); - - MutableSearchResponse msr = (MutableSearchResponse) f.get(task); + MutableSearchResponse msr = task.getSearchResponse(); msr.updateShardsAndClusters(totalShards, skippedShards, null); msr.updateFinalResponse(searchResponse, false); @@ -223,9 +205,10 @@ public void testGoneWhenInMemoryReleasedAndStoreMissing() throws Exception { del.actionGet(); // Now the task must surface GONE - PlainActionFuture fut = new PlainActionFuture<>(); - getResponse.invoke(task, /*restoreHeaders*/ true, fut); - Exception ex = expectThrows(Exception.class, fut::actionGet); + PlainActionFuture future = new PlainActionFuture<>(); + task.getResponse(future); + + Exception ex = expectThrows(Exception.class, future::actionGet); Throwable cause = ExceptionsHelper.unwrapCause(ex); assertThat(cause, instanceOf(ElasticsearchStatusException.class)); assertThat(ExceptionsHelper.status(cause), is(RestStatus.GONE)); diff --git a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java index b3815635836a0..9674d987a7bbe 100644 --- a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java +++ b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java @@ -370,8 +370,10 @@ private void executeCompletionListeners() { /** * Invokes the listener with the current {@link AsyncSearchResponse} * without restoring response headers into the calling thread context. + * + * Visible for testing */ - private void getResponse(ActionListener listener) { + void getResponse(ActionListener listener) { getResponse(false, listener); } @@ -426,6 +428,11 @@ private void getResponse(boolean restoreResponseHeaders, ActionListener Date: Wed, 22 Oct 2025 14:35:56 +0000 Subject: [PATCH 30/32] [CI] Auto commit changes from spotless --- .../java/org/elasticsearch/xpack/search/AsyncSearchTask.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java index 9674d987a7bbe..7e6e604397aa9 100644 --- a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java +++ b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java @@ -373,7 +373,7 @@ private void executeCompletionListeners() { * * Visible for testing */ - void getResponse(ActionListener listener) { + void getResponse(ActionListener listener) { getResponse(false, listener); } From 634e698dac6e397f16c2ee188c8a7ecb96c4c82d Mon Sep 17 00:00:00 2001 From: Dimitris Rempapis Date: Mon, 3 Nov 2025 12:02:27 +0200 Subject: [PATCH 31/32] update after review| --- .../search/AsyncSearchConcurrentStatusIT.java | 178 +++++++++--------- 1 file changed, 87 insertions(+), 91 deletions(-) diff --git a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java index 177c964d3a9c3..fb227d3b9f75e 100644 --- a/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java +++ b/x-pack/plugin/async-search/src/internalClusterTest/java/org/elasticsearch/xpack/search/AsyncSearchConcurrentStatusIT.java @@ -21,8 +21,10 @@ import java.util.ArrayList; import java.util.HashSet; import java.util.List; +import java.util.Queue; import java.util.Set; -import java.util.concurrent.CancellationException; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; @@ -33,6 +35,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.atomic.LongAdder; +import java.util.stream.IntStream; @SuiteScopeTestCase public class AsyncSearchConcurrentStatusIT extends AsyncSearchIntegTestCase { @@ -91,7 +94,12 @@ public void testConcurrentStatusFetchWhileTaskCloses() throws Exception { // Wait for pollers to be active CountDownLatch warmed = new CountDownLatch(1); - PollerGroup pollers = createPollers(id, pollerThreads, aggName, stats, warmed); + // Executor and coordination for pollers + ExecutorService pollerExec = Executors.newFixedThreadPool(pollerThreads); + AtomicBoolean running = new AtomicBoolean(true); + Queue failures = new ConcurrentLinkedQueue<>(); + + CompletableFuture pollers = createPollers(id, pollerThreads, stats, warmed, pollerExec, running, failures); // Wait until pollers are issuing requests (warming period) assertTrue("pollers did not warm up in time", warmed.await(timeout.millis(), TimeUnit.MILLISECONDS)); @@ -117,15 +125,30 @@ public void testConcurrentStatusFetchWhileTaskCloses() throws Exception { } catch (TimeoutException e) { consumer.cancel(true); fail(e, "Consumer thread did not finish within timeout"); - } catch (Exception ignored) {} finally { - + } catch (Exception ignored) { + // ignored + } finally { + // Stop pollers + running.set(false); try { - pollers.stopAndAwait(timeout); - } catch (InterruptedException e) { + pollers.get(timeout.millis(), TimeUnit.MILLISECONDS); + } catch (TimeoutException te) { + // The finally block will shut down the pollers forcibly + } catch (ExecutionException ee) { + failures.add(ExceptionsHelper.unwrapCause(ee.getCause())); + } catch (InterruptedException ie) { Thread.currentThread().interrupt(); - fail("Interrupted while stopping pollers: " + e.getMessage()); + } finally { + pollerExec.shutdownNow(); + try { + pollerExec.awaitTermination(timeout.millis(), TimeUnit.MILLISECONDS); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + fail("Interrupted while stopping pollers: " + ie.getMessage()); + } } + // Shut down the consumer executor consumerExec.shutdown(); try { consumerExec.awaitTermination(timeout.millis(), TimeUnit.MILLISECONDS); @@ -134,13 +157,12 @@ public void testConcurrentStatusFetchWhileTaskCloses() throws Exception { } } - assertNoWorkerFailures(pollers); + assertNoWorkerFailures(failures); assertStats(stats); } } - private void assertNoWorkerFailures(PollerGroup pollers) { - List failures = pollers.getFailures(); + private void assertNoWorkerFailures(Queue failures) { assertTrue( "Unexpected worker failures:\n" + failures.stream().map(ExceptionsHelper::stackTrace).reduce("", (a, b) -> a + "\n---\n" + b), failures.isEmpty() @@ -175,57 +197,68 @@ private void consumeAllResponses(SearchResponseIterator it, String aggName) thro } } - private PollerGroup createPollers(String id, int threads, String aggName, PollStats stats, CountDownLatch warmed) { - final ExecutorService exec = Executors.newFixedThreadPool(threads); - final List> futures = new ArrayList<>(threads); - final AtomicBoolean running = new AtomicBoolean(true); - - for (int i = 0; i < threads; i++) { - futures.add(exec.submit(() -> { - while (running.get()) { - AsyncSearchResponse resp = null; - try { - resp = getAsyncSearch(id); - stats.totalCalls.increment(); + private CompletableFuture createPollers( + String id, + int threads, + PollStats stats, + CountDownLatch warmed, + ExecutorService pollerExec, + AtomicBoolean running, + Queue failures + ) { + @SuppressWarnings("unchecked") + final CompletableFuture[] tasks = IntStream.range(0, threads).mapToObj(i -> CompletableFuture.runAsync(() -> { + while (running.get()) { + AsyncSearchResponse resp = null; + try { + resp = getAsyncSearch(id); + stats.totalCalls.increment(); - // Once enough requests have been sent, consider pollers "warmed". - if (stats.totalCalls.sum() >= threads) { - warmed.countDown(); - } + // Once enough requests have been sent, consider pollers "warmed". + if (stats.totalCalls.sum() >= threads) { + warmed.countDown(); + } - if (resp.isRunning()) { - stats.runningResponses.increment(); - } else { - // Success-only assertions: if reported completed, we must have a proper search response - assertNull("Async search reported completed with failure", resp.getFailure()); - assertNotNull("Completed async search must carry a SearchResponse", resp.getSearchResponse()); - assertNotNull("Completed async search must have aggregations", resp.getSearchResponse().getAggregations()); - assertNotNull( - "Completed async search must contain the expected aggregation", - resp.getSearchResponse().getAggregations().get(aggName) - ); - stats.completedResponses.increment(); - } - } catch (Exception e) { - Throwable cause = ExceptionsHelper.unwrapCause(e); - if (cause instanceof ElasticsearchStatusException) { - RestStatus status = ExceptionsHelper.status(cause); - if (status == RestStatus.GONE) { - stats.gone410.increment(); - } + if (resp.isRunning()) { + stats.runningResponses.increment(); + } else { + // Success-only assertions: if reported completed, we must have a proper search response + assertNull("Async search reported completed with failure", resp.getFailure()); + assertNotNull("Completed async search must carry a SearchResponse", resp.getSearchResponse()); + assertNotNull("Completed async search must have aggregations", resp.getSearchResponse().getAggregations()); + assertNotNull( + "Completed async search must contain the expected aggregation", + resp.getSearchResponse().getAggregations().get("terms") + ); + stats.completedResponses.increment(); + } + } catch (Exception e) { + Throwable cause = ExceptionsHelper.unwrapCause(e); + if (cause instanceof ElasticsearchStatusException) { + RestStatus status = ExceptionsHelper.status(cause); + if (status == RestStatus.GONE) { + stats.gone410.increment(); } else { stats.exceptions.increment(); + failures.add(cause); } - } finally { - if (resp != null) { - resp.decRef(); - } + } else { + stats.exceptions.increment(); + failures.add(cause); + } + } finally { + if (resp != null) { + resp.decRef(); } } - return null; - })); - } - return new PollerGroup(exec, futures, running); + } + }, pollerExec).whenComplete((v, ex) -> { + if (ex != null) { + failures.add(ExceptionsHelper.unwrapCause(ex)); + } + })).toArray(CompletableFuture[]::new); + + return CompletableFuture.allOf(tasks); } static final class PollStats { @@ -235,41 +268,4 @@ static final class PollStats { final LongAdder exceptions = new LongAdder(); final LongAdder gone410 = new LongAdder(); } - - static class PollerGroup { - private final ExecutorService exec; - private final List> futures; - // The threads are created and running right away - private final AtomicBoolean running; - - private PollerGroup(ExecutorService exec, List> futures, AtomicBoolean running) { - this.exec = exec; - this.futures = futures; - this.running = running; - } - - void stopAndAwait(TimeValue timeout) throws InterruptedException { - running.set(false); - exec.shutdown(); - if (exec.awaitTermination(timeout.millis(), TimeUnit.MILLISECONDS) == false) { - exec.shutdownNow(); - exec.awaitTermination(timeout.millis(), TimeUnit.MILLISECONDS); - } - } - - List getFailures() { - List failures = new ArrayList<>(); - for (Future f : futures) { - try { - f.get(); - } catch (CancellationException ignored) {} catch (ExecutionException ee) { - failures.add(ExceptionsHelper.unwrapCause(ee.getCause())); - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - if (failures.isEmpty()) failures.add(ie); - } - } - return failures; - } - } } From 063b5a8ee5cab13f0e75ca75111ff542f4522362 Mon Sep 17 00:00:00 2001 From: Dimitris Rempapis Date: Tue, 4 Nov 2025 15:55:27 +0200 Subject: [PATCH 32/32] Add comments for the given choice --- .../xpack/search/AsyncSearchTask.java | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java index 7e6e604397aa9..7eaf906ab13cf 100644 --- a/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java +++ b/x-pack/plugin/async-search/src/main/java/org/elasticsearch/xpack/search/AsyncSearchTask.java @@ -237,6 +237,17 @@ public void addCompletionListener(Consumer listener) { : ExceptionsHelper.convertToElastic(e); ex.setStackTrace(new StackTraceElement[0]); + // We are in the failure path where no MutableSearchResponse can be produced or retained. + // This happens only after the in-memory reference has been released and the stored response + // cannot be retrieved or retained (tryIncRef() failed or the doc is unavailable). At this point + // it is unsafe to call searchResponse.toAsyncSearchResponse(...): the underlying + // MutableSearchResponse is ref-counted and may already be closed (refcount == 0). + // + // Instead, we synthesize a minimal AsyncSearchResponse that carries the exception back to + // the caller. We set isRunning=false because both the in-memory state and stored document + // are unavailable and no further work can be observed. For isPartial we use false as a + // conservative choice: partial results may have existed earlier, but they are no longer + // accessible and we must not imply that any partial data is included. AsyncSearchResponse failureResponse = new AsyncSearchResponse( searchId.getEncoded(), null, @@ -348,6 +359,17 @@ private void executeCompletionListeners() { : ExceptionsHelper.convertToElastic(e); ex.setStackTrace(new StackTraceElement[0]); + // We are in the failure path where no MutableSearchResponse can be produced or retained. + // This happens only after the in-memory reference has been released and the stored response + // cannot be retrieved or retained (tryIncRef() failed or the doc is unavailable). At this point + // it is unsafe to call searchResponse.toAsyncSearchResponse(...): the underlying + // MutableSearchResponse is ref-counted and may already be closed (refcount == 0). + // + // Instead, we synthesize a minimal AsyncSearchResponse that carries the exception back to + // the caller. We set isRunning=false because both the in-memory state and stored document + // are unavailable and no further work can be observed. For isPartial we use false as a + // conservative choice: partial results may have existed earlier, but they are no longer + // accessible and we must not imply that any partial data is included. AsyncSearchResponse failureResponse = new AsyncSearchResponse( searchId.getEncoded(), null,