Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/135231.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 135231
summary: Improve PIT context relocation
area: Search
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,12 @@

import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.action.search.ClearScrollResponse;
import org.elasticsearch.action.search.ParsedScrollId;
import org.elasticsearch.action.search.SearchPhaseExecutionException;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchScrollRequestBuilder;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.action.search.ShardSearchFailure;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.common.Priority;
import org.elasticsearch.common.bytes.BytesReference;
Expand All @@ -28,6 +29,7 @@
import org.elasticsearch.index.query.RangeQueryBuilder;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.internal.ShardSearchContextId;
import org.elasticsearch.search.sort.FieldSortBuilder;
import org.elasticsearch.search.sort.SortOrder;
import org.elasticsearch.test.ESIntegTestCase;
Expand Down Expand Up @@ -703,13 +705,15 @@ public void testRestartDataNodesDuringScrollSearch() throws Exception {
} finally {
respFromProdIndex.decRef();
}
SearchPhaseExecutionException error = expectThrows(
SearchPhaseExecutionException.class,
client().prepareSearchScroll(respFromDemoIndexScrollId)
SearchScrollRequestBuilder searchScrollRequestBuilder = client().prepareSearchScroll(respFromDemoIndexScrollId);
SearchPhaseExecutionException error = expectThrows(SearchPhaseExecutionException.class, searchScrollRequestBuilder);
assertEquals(1, error.shardFailures().length);
ParsedScrollId parsedScrollId = searchScrollRequestBuilder.request().parseScrollId();
ShardSearchContextId shardSearchContextId = parsedScrollId.getContext()[0].getSearchContextId();
assertThat(
error.shardFailures()[0].getCause().getMessage(),
containsString("No search context found for id [" + shardSearchContextId + "]")
);
for (ShardSearchFailure shardSearchFailure : error.shardFailures()) {
assertThat(shardSearchFailure.getCause().getMessage(), containsString("No search context found for id [1]"));
}
client().prepareSearchScroll(respFromProdIndexScrollId).get().decRef();
}

Expand Down
1 change: 1 addition & 0 deletions server/src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
requires org.apache.lucene.queryparser;
requires org.apache.lucene.sandbox;
requires org.apache.lucene.suggest;
requires jdk.jdi;

exports org.elasticsearch;
exports org.elasticsearch.action;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import org.apache.lucene.util.SetOnce;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.TransportVersion;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.NoShardAvailableActionException;
import org.elasticsearch.action.OriginalIndices;
Expand All @@ -21,6 +22,7 @@
import org.elasticsearch.action.support.SubscribableListener;
import org.elasticsearch.action.support.TransportActions;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.node.DiscoveryNodes;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.util.Maps;
Expand All @@ -29,8 +31,10 @@
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.search.SearchContextMissingException;
import org.elasticsearch.search.SearchPhaseResult;
import org.elasticsearch.search.SearchService;
import org.elasticsearch.search.SearchShardTarget;
import org.elasticsearch.search.builder.PointInTimeBuilder;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.internal.AliasFilter;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.internal.ShardSearchContextId;
Expand All @@ -39,6 +43,8 @@

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
Expand Down Expand Up @@ -93,9 +99,11 @@ abstract class AbstractSearchAsyncAction<Result extends SearchPhaseResult> exten
private final Map<String, PendingExecutions> pendingExecutionsPerNode;
private final AtomicBoolean requestCancelled = new AtomicBoolean();
private final int skippedCount;
private final TransportVersion mintransportVersion;

// protected for tests
protected final SubscribableListener<Void> doneFuture = new SubscribableListener<>();
private final DiscoveryNodes discoveryNodes;

AbstractSearchAsyncAction(
String name,
Expand Down Expand Up @@ -149,6 +157,8 @@ abstract class AbstractSearchAsyncAction<Result extends SearchPhaseResult> exten
this.nodeIdToConnection = nodeIdToConnection;
this.concreteIndexBoosts = concreteIndexBoosts;
this.clusterStateVersion = clusterState.version();
this.mintransportVersion = clusterState.getMinTransportVersion();
this.discoveryNodes = clusterState.nodes();
this.aliasFilter = aliasFilter;
this.results = resultConsumer;
// register the release of the query consumer to free up the circuit breaker memory
Expand Down Expand Up @@ -416,6 +426,7 @@ protected final void onShardFailure(final int shardIndex, SearchShardTarget shar
onShardGroupFailure(shardIndex, shard, e);
}
if (lastShard == false) {
logger.debug("Retrying shard [{}] with target [{}]", shard.getShardId(), nextShard);
performPhaseOnShard(shardIndex, shardIt, nextShard);
} else {
// count down outstanding shards, we're done with this shard as there's no more copies to try
Expand Down Expand Up @@ -607,10 +618,93 @@ public void sendSearchResponse(SearchResponseSections internalSearchResponse, At
}

protected BytesReference buildSearchContextId(ShardSearchFailure[] failures) {
var source = request.source();
return source != null && source.pointInTimeBuilder() != null && source.pointInTimeBuilder().singleSession() == false
? source.pointInTimeBuilder().getEncodedId()
: null;
SearchSourceBuilder source = request.source();
// only (re-)build a search context id if we have a point in time
if (source != null && source.pointInTimeBuilder() != null && source.pointInTimeBuilder().singleSession() == false) {
if (SearchService.PIT_RELOCATION_FEATURE_FLAG.isEnabled()) {
// we want to change node ids in the PIT id if any shards and its PIT context have moved
return maybeReEncodeNodeIds(
source.pointInTimeBuilder(),
results.getAtomicArray().asList(),
namedWriteableRegistry,
mintransportVersion,
searchTransportService,
discoveryNodes,
logger
);
} else {
return source.pointInTimeBuilder().getEncodedId();
}
} else {
return null;
}
}

static <Result extends SearchPhaseResult> BytesReference maybeReEncodeNodeIds(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Question: I haven't looked into how we handle partial results with re-encoding. Have you considered this?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume you are referring to the functionality added with #111516? If I understand that PR correctly we will have SearchContextIdForNode entries in the PIT with a "null" node entry. I think in that case we won't add that shard to the shard iterators of any subsequent search, so we won't get a Result for that shard. That is one reason why I added copying PIT id entries for everything that has no Result from the old to the new encoded ID without change.
Is that what you mean?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I looked again at how we handle partial results. The way I see it, when opening the PIT we can tolerate partial results, i.e. failures from certain shards when they are not available. The result will be an entry in the PIT id that has a ShardId but a ShardContextIdForNode with empty node/contextId
The way I see it, we should not change any of these entries, which is what is already happening in this method.
In addition the re-encoding step here doesn't change any entries for shards that have failed in the last search. They shouldn't be included in the "results" list and for that reason not update the related part in the updated PIT id. In cases where these failures are temporary, subsequent searches with the updated ID will try to hit the "old" shard context locations, if any of these can be re-tried we will updated that part of the PIT in a later call.

PointInTimeBuilder originalPit,
List<Result> results,
NamedWriteableRegistry namedWriteableRegistry,
TransportVersion mintransportVersion,
SearchTransportService searchTransportService,
DiscoveryNodes nodes,
Logger logger
) {
SearchContextId original = originalPit.getSearchContextId(namedWriteableRegistry);
boolean idChanged = false;
Map<ShardId, SearchContextIdForNode> updatedShardMap = null; // only create this if we detect a change
for (Result result : results) {
SearchShardTarget searchShardTarget = result.getSearchShardTarget();
ShardId shardId = searchShardTarget.getShardId();
SearchContextIdForNode originalShard = original.shards().get(shardId);
if (originalShard != null && originalShard.getSearchContextId() != null && originalShard.getSearchContextId().isRetryable()) {
// check if the node is different, if so we need to re-encode the PIT
String originalNode = originalShard.getNode();
if (originalNode != null && originalNode.equals(searchShardTarget.getNodeId()) == false) {
// the target node for this shard entry in the PIT has changed, we need to update it
idChanged = true;
if (updatedShardMap == null) {
updatedShardMap = new HashMap<>(original.shards().size());
}
updatedShardMap.put(
shardId,
new SearchContextIdForNode(
searchShardTarget.getClusterAlias(),
searchShardTarget.getNodeId(),
result.getContextId()
)
);
}
}
}
if (idChanged) {
// we free all old contexts that have moved, just in case we have re-tried them elsewhere but they still exist in the old
// location
Collection<SearchContextIdForNode> contextsToClose = updatedShardMap.keySet()
.stream()
.map(shardId -> original.shards().get(shardId))
.collect(Collectors.toCollection(ArrayList::new));
TransportClosePointInTimeAction.closeContexts(nodes, searchTransportService, contextsToClose, new ActionListener<Integer>() {
@Override
public void onResponse(Integer integer) {
// ignore
}

@Override
public void onFailure(Exception e) {
logger.trace("Failure while freeing old point in time contexts", e);
}
});
// we also need to add shard that are not in the results for some reason (e.g. query rewrote to match none) but that
// were part of the original PIT
for (ShardId shardId : original.shards().keySet()) {
if (updatedShardMap.containsKey(shardId) == false) {
updatedShardMap.put(shardId, original.shards().get(shardId));
}
}
return SearchContextId.encode(updatedShardMap, original.aliasFilter(), mintransportVersion, ShardSearchFailure.EMPTY_ARRAY);
} else {
return originalPit.getEncodedId();
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,14 @@
import org.elasticsearch.action.support.RefCountingRunnable;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.node.DiscoveryNodes;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.util.concurrent.ListenableFuture;
import org.elasticsearch.transport.Transport;
import org.elasticsearch.transport.TransportResponse;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.BiFunction;
import java.util.stream.Collectors;

import static org.elasticsearch.action.search.TransportSearchHelper.parseScrollId;

Expand Down Expand Up @@ -143,53 +137,4 @@ private void onFailedFreedContext(Throwable e, DiscoveryNode node) {
private void finish() {
listener.onResponse(new ClearScrollResponse(hasFailed.get() == false, freedSearchContexts.get()));
}

/**
* Closes the given context id and reports the number of freed contexts via the listener
*/
public static void closeContexts(
DiscoveryNodes nodes,
SearchTransportService searchTransportService,
Collection<SearchContextIdForNode> contextIds,
ActionListener<Integer> listener
) {
final Set<String> clusters = contextIds.stream()
.map(SearchContextIdForNode::getClusterAlias)
.filter(clusterAlias -> Strings.isEmpty(clusterAlias) == false)
.collect(Collectors.toSet());
final ListenableFuture<BiFunction<String, String, DiscoveryNode>> lookupListener = new ListenableFuture<>();
if (clusters.isEmpty()) {
lookupListener.onResponse((cluster, nodeId) -> nodes.get(nodeId));
} else {
searchTransportService.getRemoteClusterService().collectNodes(clusters, lookupListener);
}
lookupListener.addListener(listener.delegateFailure((l, nodeLookup) -> {
final var successes = new AtomicInteger();
try (RefCountingRunnable refs = new RefCountingRunnable(() -> l.onResponse(successes.get()))) {
for (SearchContextIdForNode contextId : contextIds) {
if (contextId.getNode() == null) {
// the shard was missing when creating the PIT, ignore.
continue;
}
final DiscoveryNode node = nodeLookup.apply(contextId.getClusterAlias(), contextId.getNode());
if (node != null) {
try {
searchTransportService.sendFreeContext(
searchTransportService.getConnection(contextId.getClusterAlias(), node),
contextId.getSearchContextId(),
refs.acquireListener().map(r -> {
if (r.isFreed()) {
successes.incrementAndGet();
}
return null;
})
);
} catch (Exception e) {
// ignored
}
}
}
}
}));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import org.elasticsearch.common.util.Maps;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.search.SearchPhaseResult;
import org.elasticsearch.search.SearchShardTarget;
import org.elasticsearch.search.internal.AliasFilter;
import org.elasticsearch.search.internal.ShardSearchContextId;
import org.elasticsearch.transport.RemoteClusterAware;
Expand All @@ -30,6 +29,7 @@
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.TreeSet;
import java.util.stream.Collectors;
Expand Down Expand Up @@ -62,6 +62,26 @@ public static BytesReference encode(
Map<String, AliasFilter> aliasFilter,
TransportVersion version,
ShardSearchFailure[] shardFailures
) {
Map<ShardId, SearchContextIdForNode> shards = searchPhaseResults.stream()
.collect(
Collectors.toMap(
r -> r.getSearchShardTarget().getShardId(),
r -> new SearchContextIdForNode(
r.getSearchShardTarget().getClusterAlias(),
r.getSearchShardTarget().getNodeId(),
r.getContextId()
)
)
);
return encode(shards, aliasFilter, version, shardFailures);
}

static BytesReference encode(
Map<ShardId, SearchContextIdForNode> shards,
Map<String, AliasFilter> aliasFilter,
TransportVersion version,
ShardSearchFailure[] shardFailures
) {
assert shardFailures.length == 0 || version.onOrAfter(TransportVersions.V_8_16_0)
: "[allow_partial_search_results] cannot be enabled on a cluster that has not been fully upgraded to version ["
Expand All @@ -71,12 +91,12 @@ public static BytesReference encode(
out.setTransportVersion(version);
TransportVersion.writeVersion(version, out);
boolean allowNullContextId = out.getTransportVersion().onOrAfter(TransportVersions.V_8_16_0);
int shardSize = searchPhaseResults.size() + (allowNullContextId ? shardFailures.length : 0);
int shardSize = shards.size() + (allowNullContextId ? shardFailures.length : 0);
out.writeVInt(shardSize);
for (var searchResult : searchPhaseResults) {
final SearchShardTarget target = searchResult.getSearchShardTarget();
target.getShardId().writeTo(out);
new SearchContextIdForNode(target.getClusterAlias(), target.getNodeId(), searchResult.getContextId()).writeTo(out);
for (ShardId shardId : shards.keySet()) {
shardId.writeTo(out);
SearchContextIdForNode searchContextIdForNode = shards.get(shardId);
searchContextIdForNode.writeTo(out);
}
if (allowNullContextId) {
for (var failure : shardFailures) {
Expand Down Expand Up @@ -142,4 +162,23 @@ public String[] getActualIndices() {
}
return indices.toArray(String[]::new);
}

@Override
public boolean equals(Object o) {
if (o == null || getClass() != o.getClass()) return false;
SearchContextId that = (SearchContextId) o;
return Objects.equals(shards, that.shards)
&& Objects.equals(aliasFilter, that.aliasFilter)
&& Objects.equals(contextIds, that.contextIds);
}

@Override
public int hashCode() {
return Objects.hash(shards, aliasFilter, contextIds);
}

@Override
public String toString() {
return "SearchContextId{" + "shards=" + shards + ", aliasFilter=" + aliasFilter + '}';
}
}
Loading