elastic · cbuescher · Nov 2, 2025 · Sep 22, 2025 · Sep 22, 2025 · Sep 25, 2025
diff --git a/docs/changelog/135231.yaml b/docs/changelog/135231.yaml
@@ -0,0 +1,5 @@
+pr: 135231
+summary: Improve retrying PIT contexts for read-only indices
+area: Search
+type: enhancement
+issues: []
diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/scroll/SearchScrollIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/scroll/SearchScrollIT.java
@@ -11,11 +11,12 @@
 
 import org.elasticsearch.ExceptionsHelper;
 import org.elasticsearch.action.search.ClearScrollResponse;
+import org.elasticsearch.action.search.ParsedScrollId;
 import org.elasticsearch.action.search.SearchPhaseExecutionException;
 import org.elasticsearch.action.search.SearchRequestBuilder;
 import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.action.search.SearchScrollRequestBuilder;
 import org.elasticsearch.action.search.SearchType;
-import org.elasticsearch.action.search.ShardSearchFailure;
 import org.elasticsearch.cluster.metadata.IndexMetadata;
 import org.elasticsearch.common.Priority;
 import org.elasticsearch.common.bytes.BytesReference;
@@ -28,6 +29,7 @@
 import org.elasticsearch.index.query.RangeQueryBuilder;
 import org.elasticsearch.rest.RestStatus;
 import org.elasticsearch.search.SearchHit;
+import org.elasticsearch.search.internal.ShardSearchContextId;
 import org.elasticsearch.search.sort.FieldSortBuilder;
 import org.elasticsearch.search.sort.SortOrder;
 import org.elasticsearch.test.ESIntegTestCase;
@@ -703,13 +705,15 @@ public void testRestartDataNodesDuringScrollSearch() throws Exception {
         } finally {
             respFromProdIndex.decRef();
         }
-        SearchPhaseExecutionException error = expectThrows(
-            SearchPhaseExecutionException.class,
-            client().prepareSearchScroll(respFromDemoIndexScrollId)
+        SearchScrollRequestBuilder searchScrollRequestBuilder = client().prepareSearchScroll(respFromDemoIndexScrollId);
+        SearchPhaseExecutionException error = expectThrows(SearchPhaseExecutionException.class, searchScrollRequestBuilder);
+        assertEquals(1, error.shardFailures().length);
+        ParsedScrollId parsedScrollId = searchScrollRequestBuilder.request().parseScrollId();
+        ShardSearchContextId shardSearchContextId = parsedScrollId.getContext()[0].getSearchContextId();
+        assertThat(
+            error.shardFailures()[0].getCause().getMessage(),
+            containsString("No search context found for id [" + shardSearchContextId + "]")
         );
-        for (ShardSearchFailure shardSearchFailure : error.shardFailures()) {
-            assertThat(shardSearchFailure.getCause().getMessage(), containsString("No search context found for id [1]"));
-        }
         client().prepareSearchScroll(respFromProdIndexScrollId).get().decRef();
     }
 

diff --git a/server/src/main/java/org/elasticsearch/action/search/AbstractSearchAsyncAction.java b/server/src/main/java/org/elasticsearch/action/search/AbstractSearchAsyncAction.java
@@ -13,6 +13,7 @@
 import org.apache.lucene.util.SetOnce;
 import org.elasticsearch.ElasticsearchException;
 import org.elasticsearch.ExceptionsHelper;
+import org.elasticsearch.TransportVersion;
 import org.elasticsearch.action.ActionListener;
 import org.elasticsearch.action.NoShardAvailableActionException;
 import org.elasticsearch.action.OriginalIndices;
@@ -21,6 +22,7 @@
 import org.elasticsearch.action.support.SubscribableListener;
 import org.elasticsearch.action.support.TransportActions;
 import org.elasticsearch.cluster.ClusterState;
+import org.elasticsearch.cluster.node.DiscoveryNodes;
 import org.elasticsearch.common.bytes.BytesReference;
 import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
 import org.elasticsearch.common.util.Maps;
@@ -30,8 +32,10 @@
 import org.elasticsearch.rest.action.search.SearchResponseMetrics;
 import org.elasticsearch.search.SearchContextMissingException;
 import org.elasticsearch.search.SearchPhaseResult;
+import org.elasticsearch.search.SearchService;
 import org.elasticsearch.search.SearchShardTarget;
 import org.elasticsearch.search.builder.PointInTimeBuilder;
+import org.elasticsearch.search.builder.SearchSourceBuilder;
 import org.elasticsearch.search.internal.AliasFilter;
 import org.elasticsearch.search.internal.SearchContext;
 import org.elasticsearch.search.internal.ShardSearchContextId;
@@ -40,6 +44,8 @@
 
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
@@ -53,6 +59,7 @@
 import java.util.function.Supplier;
 import java.util.stream.Collectors;
 
+import static org.elasticsearch.action.search.TransportClosePointInTimeAction.closeContexts;
 import static org.elasticsearch.core.Strings.format;
 
 /**
@@ -94,11 +101,13 @@ abstract class AbstractSearchAsyncAction<Result extends SearchPhaseResult> exten
     private final Map<String, PendingExecutions> pendingExecutionsPerNode;
     private final AtomicBoolean requestCancelled = new AtomicBoolean();
     private final int skippedCount;
+    private final TransportVersion mintransportVersion;
     protected final SearchResponseMetrics searchResponseMetrics;
     protected long phaseStartTimeInNanos;
 
     // protected for tests
     protected final SubscribableListener<Void> doneFuture = new SubscribableListener<>();
+    private final Supplier<DiscoveryNodes> discoveryNodes;
 
     AbstractSearchAsyncAction(
         String name,
@@ -153,6 +162,8 @@ abstract class AbstractSearchAsyncAction<Result extends SearchPhaseResult> exten
         this.nodeIdToConnection = nodeIdToConnection;
         this.concreteIndexBoosts = concreteIndexBoosts;
         this.clusterStateVersion = clusterState.version();
+        this.mintransportVersion = clusterState.getMinTransportVersion();
+        this.discoveryNodes = clusterState::nodes;
         this.aliasFilter = aliasFilter;
         this.results = resultConsumer;
         // register the release of the query consumer to free up the circuit breaker memory
@@ -422,6 +433,7 @@ protected final void onShardFailure(final int shardIndex, SearchShardTarget shar
             onShardGroupFailure(shardIndex, shard, e);
         }
         if (lastShard == false) {
+            logger.debug("Retrying shard [{}] with target [{}]", shard.getShardId(), nextShard);
             performPhaseOnShard(shardIndex, shardIt, nextShard);
         } else {
             // count down outstanding shards, we're done with this shard as there's no more copies to try
@@ -613,10 +625,87 @@ public void sendSearchResponse(SearchResponseSections internalSearchResponse, At
     }
 
     protected BytesReference buildSearchContextId(ShardSearchFailure[] failures) {
-        var source = request.source();
-        return source != null && source.pointInTimeBuilder() != null && source.pointInTimeBuilder().singleSession() == false
-            ? source.pointInTimeBuilder().getEncodedId()
-            : null;
+        SearchSourceBuilder source = request.source();
+        // only (re-)build a search context id if we are running a long-lived point-in-time request
+        if (source != null && source.pointInTimeBuilder() != null && source.pointInTimeBuilder().singleSession() == false) {
+            if (SearchService.PIT_RELOCATION_FEATURE_FLAG.isEnabled()) {
+                // we want to change node ids in the PIT id if any shards and its PIT context have moved
+                return maybeReEncodeNodeIds(
+                    source.pointInTimeBuilder(),
+                    results.getAtomicArray().asList(),
+                    namedWriteableRegistry,
+                    mintransportVersion,
+                    searchTransportService,
+                    discoveryNodes.get(),
+                    logger
+                );
+            } else {
+                return source.pointInTimeBuilder().getEncodedId();
+            }
+        } else {
+            return null;
+        }
+    }
+
+    static <Result extends SearchPhaseResult> BytesReference maybeReEncodeNodeIds(
+        PointInTimeBuilder originalPit,
+        List<Result> results,
+        NamedWriteableRegistry namedWriteableRegistry,
+        TransportVersion mintransportVersion,
+        SearchTransportService searchTransportService,
+        DiscoveryNodes nodes,
+        Logger logger
+    ) {
+        SearchContextId original = originalPit.getSearchContextId(namedWriteableRegistry);
+        // only create the following two collections if we detect an id change
+        Map<ShardId, SearchContextIdForNode> updatedShardMap = null;
+        Collection<SearchContextIdForNode> contextsToClose = null;
+        logger.debug("checking search result shards to detect PIT node changes");
+        for (Result result : results) {
+            SearchShardTarget searchShardTarget = result.getSearchShardTarget();
+            ShardId shardId = searchShardTarget.getShardId();
+            SearchContextIdForNode originalShard = original.shards().get(shardId);
+            if (originalShard != null && originalShard.getSearchContextId() != null && originalShard.getSearchContextId().isRetryable()) {
+                // check if the node is different, if so we need to re-encode the PIT
+                String originalNode = originalShard.getNode();
+                if (originalNode != null && originalNode.equals(searchShardTarget.getNodeId()) == false) {
+                    // the target node for this shard entry in the PIT has changed, we need to update it
+                    if (updatedShardMap == null) {
+                        // initialize the map with entries from old map to keep ids for shards that have not responded in this results
+                        updatedShardMap = new HashMap<>(original.shards());
+                        contextsToClose = new ArrayList<>();
+                    }
+                    SearchContextIdForNode updatedId = new SearchContextIdForNode(
+                        searchShardTarget.getClusterAlias(),
+                        searchShardTarget.getNodeId(),
+                        result.getContextId()
+                    );
+
+                    logger.debug("changing node for PIT shard id from [{}] to [{}]", originalShard, updatedId);
+                    updatedShardMap.put(shardId, updatedId);
+                    contextsToClose.add(original.shards().get(shardId));
+
+                }
+            }
+        }
+        if (updatedShardMap != null) {
+            // we free all old contexts that have moved, just in case we have re-tried them elsewhere
+            // but they still exist in the old location
+            closeContexts(nodes, searchTransportService, contextsToClose, new ActionListener<Integer>() {
+                @Override
+                public void onResponse(Integer integer) {
+                    // ignore
+                }
+
+                @Override
+                public void onFailure(Exception e) {
+                    logger.trace("Failure while freeing old point in time contexts", e);
+                }
+            });
+            return SearchContextId.encode(updatedShardMap, original.aliasFilter(), mintransportVersion, ShardSearchFailure.EMPTY_ARRAY);
+        } else {
+            return originalPit.getEncodedId();
+        }
     }
 
     /**

diff --git a/server/src/main/java/org/elasticsearch/action/search/ClearScrollController.java b/server/src/main/java/org/elasticsearch/action/search/ClearScrollController.java
@@ -13,20 +13,14 @@
 import org.elasticsearch.action.support.RefCountingRunnable;
 import org.elasticsearch.cluster.node.DiscoveryNode;
 import org.elasticsearch.cluster.node.DiscoveryNodes;
-import org.elasticsearch.common.Strings;
-import org.elasticsearch.common.util.concurrent.ListenableFuture;
 import org.elasticsearch.transport.Transport;
 import org.elasticsearch.transport.TransportResponse;
 
 import java.util.ArrayList;
-import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
-import java.util.Set;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
-import java.util.function.BiFunction;
-import java.util.stream.Collectors;
 
 import static org.elasticsearch.action.search.TransportSearchHelper.parseScrollId;
 
@@ -143,53 +137,4 @@ private void onFailedFreedContext(Throwable e, DiscoveryNode node) {
     private void finish() {
         listener.onResponse(new ClearScrollResponse(hasFailed.get() == false, freedSearchContexts.get()));
     }
-
-    /**
-     * Closes the given context id and reports the number of freed contexts via the listener
-     */
-    public static void closeContexts(
-        DiscoveryNodes nodes,
-        SearchTransportService searchTransportService,
-        Collection<SearchContextIdForNode> contextIds,
-        ActionListener<Integer> listener
-    ) {
-        final Set<String> clusters = contextIds.stream()
-            .map(SearchContextIdForNode::getClusterAlias)
-            .filter(clusterAlias -> Strings.isEmpty(clusterAlias) == false)
-            .collect(Collectors.toSet());
-        final ListenableFuture<BiFunction<String, String, DiscoveryNode>> lookupListener = new ListenableFuture<>();
-        if (clusters.isEmpty()) {
-            lookupListener.onResponse((cluster, nodeId) -> nodes.get(nodeId));
-        } else {
-            searchTransportService.getRemoteClusterService().collectNodes(clusters, lookupListener);
-        }
-        lookupListener.addListener(listener.delegateFailure((l, nodeLookup) -> {
-            final var successes = new AtomicInteger();
-            try (RefCountingRunnable refs = new RefCountingRunnable(() -> l.onResponse(successes.get()))) {
-                for (SearchContextIdForNode contextId : contextIds) {
-                    if (contextId.getNode() == null) {
-                        // the shard was missing when creating the PIT, ignore.
-                        continue;
-                    }
-                    final DiscoveryNode node = nodeLookup.apply(contextId.getClusterAlias(), contextId.getNode());
-                    if (node != null) {
-                        try {
-                            searchTransportService.sendFreeContext(
-                                searchTransportService.getConnection(contextId.getClusterAlias(), node),
-                                contextId.getSearchContextId(),
-                                refs.acquireListener().map(r -> {
-                                    if (r.isFreed()) {
-                                        successes.incrementAndGet();
-                                    }
-                                    return null;
-                                })
-                            );
-                        } catch (Exception e) {
-                            // ignored
-                        }
-                    }
-                }
-            }
-        }));
-    }
 }
diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchContextId.java b/server/src/main/java/org/elasticsearch/action/search/SearchContextId.java
@@ -21,7 +21,6 @@
 import org.elasticsearch.common.util.Maps;
 import org.elasticsearch.index.shard.ShardId;
 import org.elasticsearch.search.SearchPhaseResult;
-import org.elasticsearch.search.SearchShardTarget;
 import org.elasticsearch.search.internal.AliasFilter;
 import org.elasticsearch.search.internal.ShardSearchContextId;
 import org.elasticsearch.transport.RemoteClusterAware;
@@ -30,6 +29,7 @@
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;
 import java.util.Set;
 import java.util.TreeSet;
 import java.util.stream.Collectors;
@@ -62,6 +62,26 @@ public static BytesReference encode(
         Map<String, AliasFilter> aliasFilter,
         TransportVersion version,
         ShardSearchFailure[] shardFailures
+    ) {
+        Map<ShardId, SearchContextIdForNode> shards = searchPhaseResults.stream()
+            .collect(
+                Collectors.toMap(
+                    r -> r.getSearchShardTarget().getShardId(),
+                    r -> new SearchContextIdForNode(
+                        r.getSearchShardTarget().getClusterAlias(),
+                        r.getSearchShardTarget().getNodeId(),
+                        r.getContextId()
+                    )
+                )
+            );
+        return encode(shards, aliasFilter, version, shardFailures);
+    }
+
+    static BytesReference encode(
+        Map<ShardId, SearchContextIdForNode> shards,
+        Map<String, AliasFilter> aliasFilter,
+        TransportVersion version,
+        ShardSearchFailure[] shardFailures
     ) {
         assert shardFailures.length == 0 || version.onOrAfter(TransportVersions.V_8_16_0)
             : "[allow_partial_search_results] cannot be enabled on a cluster that has not been fully upgraded to version ["
@@ -71,12 +91,12 @@ public static BytesReference encode(
             out.setTransportVersion(version);
             TransportVersion.writeVersion(version, out);
             boolean allowNullContextId = out.getTransportVersion().onOrAfter(TransportVersions.V_8_16_0);
-            int shardSize = searchPhaseResults.size() + (allowNullContextId ? shardFailures.length : 0);
+            int shardSize = shards.size() + (allowNullContextId ? shardFailures.length : 0);
             out.writeVInt(shardSize);
-            for (var searchResult : searchPhaseResults) {
-                final SearchShardTarget target = searchResult.getSearchShardTarget();
-                target.getShardId().writeTo(out);
-                new SearchContextIdForNode(target.getClusterAlias(), target.getNodeId(), searchResult.getContextId()).writeTo(out);
+            for (ShardId shardId : shards.keySet()) {
+                shardId.writeTo(out);
+                SearchContextIdForNode searchContextIdForNode = shards.get(shardId);
+                searchContextIdForNode.writeTo(out);
             }
             if (allowNullContextId) {
                 for (var failure : shardFailures) {
@@ -142,4 +162,23 @@ public String[] getActualIndices() {
         }
         return indices.toArray(String[]::new);
     }
+
+    @Override
+    public boolean equals(Object o) {
+        if (o == null || getClass() != o.getClass()) return false;
+        SearchContextId that = (SearchContextId) o;
+        return Objects.equals(shards, that.shards)
+            && Objects.equals(aliasFilter, that.aliasFilter)
+            && Objects.equals(contextIds, that.contextIds);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(shards, aliasFilter, contextIds);
+    }
+
+    @Override
+    public String toString() {
+        return "SearchContextId{" + "shards=" + shards + ", aliasFilter=" + aliasFilter + '}';
+    }
 }