Skip to content

Commit e1995a7

Browse files
authored
Increase concurrent request of opening point-in-time (#96959)
* Increase concurrent request of opening point-in-time (#96782) (#96957) Today, we mistakenly throttle the opening point-in-time API to 1 request per node. As a result, when attempting to open a point-in-time across large clusters, it can take a significant amount of time and eventually fails due to relocated target shards or deleted target indices managed by ILM. Ideally, we should batch the requests per node and eliminate this throttle completely. However, this requires all clusters to be on the latest version. This PR increases the number of concurrent requests from 1 to 5, which is the default of search. * Fix tests * Fix tests
1 parent 1fe30f5 commit e1995a7

File tree

8 files changed

+163
-3
lines changed

8 files changed

+163
-3
lines changed

docs/changelog/96782.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 96782
2+
summary: Increase concurrent request of opening point-in-time
3+
area: Search
4+
type: bug
5+
issues: []

server/src/internalClusterTest/java/org/elasticsearch/action/search/PointInTimeIT.java

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,20 @@
1212
import org.elasticsearch.ExceptionsHelper;
1313
import org.elasticsearch.action.admin.indices.stats.CommonStats;
1414
import org.elasticsearch.action.support.IndicesOptions;
15+
import org.elasticsearch.action.support.PlainActionFuture;
1516
import org.elasticsearch.cluster.metadata.IndexMetadata;
17+
import org.elasticsearch.cluster.node.DiscoveryNode;
1618
import org.elasticsearch.cluster.routing.ShardRouting;
1719
import org.elasticsearch.common.settings.Settings;
20+
import org.elasticsearch.common.util.CollectionUtils;
1821
import org.elasticsearch.core.TimeValue;
1922
import org.elasticsearch.index.IndexService;
2023
import org.elasticsearch.index.IndexSettings;
2124
import org.elasticsearch.index.query.MatchAllQueryBuilder;
2225
import org.elasticsearch.index.query.RangeQueryBuilder;
2326
import org.elasticsearch.index.shard.IndexShard;
2427
import org.elasticsearch.indices.IndicesService;
28+
import org.elasticsearch.plugins.Plugin;
2529
import org.elasticsearch.rest.RestStatus;
2630
import org.elasticsearch.search.SearchContextMissingException;
2731
import org.elasticsearch.search.SearchHit;
@@ -32,10 +36,14 @@
3236
import org.elasticsearch.search.sort.SortOrder;
3337
import org.elasticsearch.tasks.TaskInfo;
3438
import org.elasticsearch.test.ESIntegTestCase;
39+
import org.elasticsearch.test.transport.MockTransportService;
40+
import org.elasticsearch.transport.TransportService;
3541

42+
import java.util.Collection;
3643
import java.util.HashSet;
3744
import java.util.List;
3845
import java.util.Set;
46+
import java.util.concurrent.CountDownLatch;
3947
import java.util.concurrent.TimeUnit;
4048
import java.util.stream.Collectors;
4149

@@ -53,6 +61,11 @@
5361

5462
public class PointInTimeIT extends ESIntegTestCase {
5563

64+
@Override
65+
protected Collection<Class<? extends Plugin>> nodePlugins() {
66+
return CollectionUtils.appendToCopy(super.nodePlugins(), MockTransportService.TestPlugin.class);
67+
}
68+
5669
@Override
5770
protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) {
5871
return Settings.builder()
@@ -433,6 +446,55 @@ public void testCloseInvalidPointInTime() {
433446
assertThat(tasks, empty());
434447
}
435448

449+
public void testOpenPITConcurrentShardRequests() throws Exception {
450+
DiscoveryNode dataNode = randomFrom(clusterService().state().nodes().getDataNodes().values());
451+
int numShards = randomIntBetween(5, 10);
452+
int maxConcurrentRequests = randomIntBetween(2, 5);
453+
assertAcked(
454+
client().admin()
455+
.indices()
456+
.prepareCreate("test")
457+
.setSettings(
458+
Settings.builder()
459+
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numShards)
460+
.put("index.routing.allocation.require._id", dataNode.getId())
461+
.build()
462+
)
463+
);
464+
MockTransportService transportService = (MockTransportService) internalCluster().getInstance(
465+
TransportService.class,
466+
dataNode.getName()
467+
);
468+
try {
469+
CountDownLatch sentLatch = new CountDownLatch(maxConcurrentRequests);
470+
CountDownLatch readyLatch = new CountDownLatch(1);
471+
transportService.addRequestHandlingBehavior(
472+
TransportOpenPointInTimeAction.OPEN_SHARD_READER_CONTEXT_NAME,
473+
(handler, request, channel, task) -> {
474+
sentLatch.countDown();
475+
Thread thread = new Thread(() -> {
476+
try {
477+
assertTrue(readyLatch.await(1, TimeUnit.MINUTES));
478+
handler.messageReceived(request, channel, task);
479+
} catch (Exception e) {
480+
throw new AssertionError(e);
481+
}
482+
});
483+
thread.start();
484+
}
485+
);
486+
OpenPointInTimeRequest request = new OpenPointInTimeRequest("test").keepAlive(TimeValue.timeValueMinutes(1));
487+
request.maxConcurrentShardRequests(maxConcurrentRequests);
488+
PlainActionFuture<OpenPointInTimeResponse> future = new PlainActionFuture<>();
489+
client().execute(OpenPointInTimeAction.INSTANCE, request, future);
490+
assertTrue(sentLatch.await(1, TimeUnit.MINUTES));
491+
readyLatch.countDown();
492+
closePointInTime(future.actionGet().getPointInTimeId());
493+
} finally {
494+
transportService.clearAllRules();
495+
}
496+
}
497+
436498
@SuppressWarnings({ "rawtypes", "unchecked" })
437499
private void assertPagination(PointInTimeBuilder pit, int expectedNumDocs, int size, SortBuilder<?>... sorts) throws Exception {
438500
Set<String> seen = new HashSet<>();

server/src/main/java/org/elasticsearch/action/search/OpenPointInTimeRequest.java

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,11 @@
2626
import static org.elasticsearch.action.ValidateActions.addValidationError;
2727

2828
public final class OpenPointInTimeRequest extends ActionRequest implements IndicesRequest.Replaceable {
29+
2930
private String[] indices;
3031
private IndicesOptions indicesOptions = DEFAULT_INDICES_OPTIONS;
3132
private TimeValue keepAlive;
32-
33+
private int maxConcurrentShardRequests = SearchRequest.DEFAULT_MAX_CONCURRENT_SHARD_REQUESTS;
3334
@Nullable
3435
private String routing;
3536
@Nullable
@@ -123,6 +124,27 @@ public OpenPointInTimeRequest preference(String preference) {
123124
return this;
124125
}
125126

127+
/**
128+
* Similar to {@link SearchRequest#getMaxConcurrentShardRequests()}, this returns the number of shard requests that should be
129+
* executed concurrently on a single node . This value should be used as a protection mechanism to reduce the number of shard
130+
* requests fired per open point-in-time request. The default is {@code 5}
131+
*/
132+
public int maxConcurrentShardRequests() {
133+
return maxConcurrentShardRequests;
134+
}
135+
136+
/**
137+
* Similar to {@link SearchRequest#setMaxConcurrentShardRequests(int)}, this sets the number of shard requests that should be
138+
* executed concurrently on a single node. This value should be used as a protection mechanism to reduce the number of shard
139+
* requests fired per open point-in-time request.
140+
*/
141+
public void maxConcurrentShardRequests(int maxConcurrentShardRequests) {
142+
if (maxConcurrentShardRequests < 1) {
143+
throw new IllegalArgumentException("maxConcurrentShardRequests must be >= 1");
144+
}
145+
this.maxConcurrentShardRequests = maxConcurrentShardRequests;
146+
}
147+
126148
@Override
127149
public boolean allowsRemoteIndices() {
128150
return true;

server/src/main/java/org/elasticsearch/action/search/RestOpenPointInTimeAction.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,13 @@ public RestChannelConsumer prepareRequest(final RestRequest request, final NodeC
4242
openRequest.routing(request.param("routing"));
4343
openRequest.preference(request.param("preference"));
4444
openRequest.keepAlive(TimeValue.parseTimeValue(request.param("keep_alive"), null, "keep_alive"));
45+
if (request.hasParam("max_concurrent_shard_requests")) {
46+
final int maxConcurrentShardRequests = request.paramAsInt(
47+
"max_concurrent_shard_requests",
48+
openRequest.maxConcurrentShardRequests()
49+
);
50+
openRequest.maxConcurrentShardRequests(maxConcurrentShardRequests);
51+
}
4552
return channel -> client.execute(OpenPointInTimeAction.INSTANCE, openRequest, new RestToXContentListener<>(channel));
4653
}
4754
}

server/src/main/java/org/elasticsearch/action/search/SearchRequest.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ public class SearchRequest extends ActionRequest implements IndicesRequest.Repla
8686
private int batchedReduceSize = DEFAULT_BATCHED_REDUCE_SIZE;
8787

8888
private int maxConcurrentShardRequests = 0;
89+
public static final int DEFAULT_MAX_CONCURRENT_SHARD_REQUESTS = 5;
8990

9091
private Integer preFilterShardSize;
9192

@@ -717,7 +718,7 @@ public int getBatchedReduceSize() {
717718
* cluster can be throttled with this number to reduce the cluster load. The default is {@code 5}
718719
*/
719720
public int getMaxConcurrentShardRequests() {
720-
return maxConcurrentShardRequests == 0 ? 5 : maxConcurrentShardRequests;
721+
return maxConcurrentShardRequests == 0 ? DEFAULT_MAX_CONCURRENT_SHARD_REQUESTS : maxConcurrentShardRequests;
721722
}
722723

723724
/**

server/src/main/java/org/elasticsearch/action/search/TransportOpenPointInTimeAction.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ protected void doExecute(Task task, OpenPointInTimeRequest request, ActionListen
8888
.routing(request.routing())
8989
.allowPartialSearchResults(false);
9090
searchRequest.setCcsMinimizeRoundtrips(false);
91+
searchRequest.setMaxConcurrentShardRequests(request.maxConcurrentShardRequests());
9192
transportSearchAction.executeRequest(
9293
(SearchTask) task,
9394
searchRequest,

server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,7 @@ public AbstractSearchAsyncAction<? extends SearchPhaseResult> asyncSearchAction(
334334
clusterState,
335335
task,
336336
new ArraySearchPhaseResults<>(shardsIts.size()),
337-
1,
337+
searchRequest.getMaxConcurrentShardRequests(),
338338
clusters
339339
) {
340340
@Override
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0 and the Server Side Public License, v 1; you may not use this file except
5+
* in compliance with, at your election, the Elastic License 2.0 or the Server
6+
* Side Public License, v 1.
7+
*/
8+
9+
package org.elasticsearch.action.search;
10+
11+
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
12+
import org.elasticsearch.rest.RestRequest;
13+
import org.elasticsearch.test.rest.FakeRestRequest;
14+
import org.elasticsearch.test.rest.RestActionTestCase;
15+
16+
import java.util.HashMap;
17+
import java.util.Map;
18+
import java.util.Queue;
19+
20+
import static org.hamcrest.Matchers.equalTo;
21+
import static org.hamcrest.Matchers.hasSize;
22+
import static org.hamcrest.Matchers.instanceOf;
23+
24+
public class RestOpenPointInTimeActionTests extends RestActionTestCase {
25+
26+
public void testMaxConcurrentSearchRequests() {
27+
RestOpenPointInTimeAction action = new RestOpenPointInTimeAction();
28+
controller().registerHandler(action);
29+
Queue<OpenPointInTimeRequest> transportRequests = ConcurrentCollections.newQueue();
30+
verifyingClient.setExecuteVerifier(((actionType, transportRequest) -> {
31+
assertThat(transportRequest, instanceOf(OpenPointInTimeRequest.class));
32+
transportRequests.add((OpenPointInTimeRequest) transportRequest);
33+
return new OpenPointInTimeResponse("n/a");
34+
}));
35+
{
36+
Map<String, String> params = new HashMap<>();
37+
params.put("keep_alive", "1m");
38+
RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withMethod(RestRequest.Method.POST)
39+
.withPath("/_pit")
40+
.withParams(params)
41+
.build();
42+
dispatchRequest(request);
43+
assertThat(transportRequests, hasSize(1));
44+
OpenPointInTimeRequest transportRequest = transportRequests.remove();
45+
assertThat(transportRequest.maxConcurrentShardRequests(), equalTo(5));
46+
}
47+
{
48+
int maxConcurrentRequests = randomIntBetween(1, 100);
49+
Map<String, String> params = new HashMap<>();
50+
params.put("max_concurrent_shard_requests", Integer.toString(maxConcurrentRequests));
51+
params.put("keep_alive", "1m");
52+
RestRequest request = new FakeRestRequest.Builder(xContentRegistry()).withMethod(RestRequest.Method.POST)
53+
.withPath("/_pit")
54+
.withParams(params)
55+
.build();
56+
dispatchRequest(request);
57+
assertThat(transportRequests, hasSize(1));
58+
OpenPointInTimeRequest transportRequest = transportRequests.remove();
59+
assertThat(transportRequest.maxConcurrentShardRequests(), equalTo(maxConcurrentRequests));
60+
}
61+
}
62+
}

0 commit comments

Comments
 (0)