Skip to content

Commit eec2039

Browse files
committed
Limit concurrent node requests
1 parent 7f7967b commit eec2039

File tree

4 files changed

+108
-39
lines changed

4 files changed

+108
-39
lines changed

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/DataNodeComputeHandler.java

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,13 @@ void startComputeOnDataNodes(
9898
Runnable runOnTaskFailure,
9999
ActionListener<ComputeResponse> outListener
100100
) {
101-
final boolean allowPartialResults = configuration.allowPartialResults();
102-
DataNodeRequestSender sender = new DataNodeRequestSender(transportService, esqlExecutor, parentTask, allowPartialResults) {
101+
new DataNodeRequestSender(
102+
transportService,
103+
esqlExecutor,
104+
parentTask,
105+
configuration.allowPartialResults(),
106+
configuration.pragmas().maxConcurrentNodePerCluster()
107+
) {
103108
@Override
104109
protected void sendRequest(
105110
DiscoveryNode node,
@@ -129,7 +134,7 @@ protected void sendRequest(
129134
listener.delegateFailureAndWrap((l, unused) -> {
130135
final Runnable onGroupFailure;
131136
final CancellableTask groupTask;
132-
if (allowPartialResults) {
137+
if (configuration.allowPartialResults()) {
133138
groupTask = RemoteListenerGroup.createGroupTask(
134139
transportService,
135140
parentTask,
@@ -148,7 +153,7 @@ protected void sendRequest(
148153
final var remoteSink = exchangeService.newRemoteSink(groupTask, childSessionId, transportService, connection);
149154
exchangeSource.addRemoteSink(
150155
remoteSink,
151-
allowPartialResults == false,
156+
configuration.allowPartialResults() == false,
152157
pagesFetched::incrementAndGet,
153158
queryPragmas.concurrentExchangeClients(),
154159
computeListener.acquireAvoid()
@@ -180,8 +185,7 @@ protected void sendRequest(
180185
})
181186
);
182187
}
183-
};
184-
sender.startComputeOnDataNodes(
188+
}.startComputeOnDataNodes(
185189
clusterAlias,
186190
concreteIndices,
187191
originalIndices,

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/DataNodeRequestSender.java

Lines changed: 34 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -56,18 +56,26 @@ abstract class DataNodeRequestSender {
5656
private final Executor esqlExecutor;
5757
private final CancellableTask rootTask;
5858
private final boolean allowPartialResults;
59+
private final Semaphore concurrentRequests;
5960
private final ReentrantLock sendingLock = new ReentrantLock();
6061
private final Queue<ShardId> pendingShardIds = ConcurrentCollections.newQueue();
6162
private final Map<DiscoveryNode, Semaphore> nodePermits = new HashMap<>();
6263
private final Map<ShardId, ShardFailure> shardFailures = ConcurrentCollections.newConcurrentMap();
6364
private final AtomicBoolean changed = new AtomicBoolean();
6465
private boolean reportedFailure = false; // guarded by sendingLock
6566

66-
DataNodeRequestSender(TransportService transportService, Executor esqlExecutor, CancellableTask rootTask, boolean allowPartialResults) {
67+
DataNodeRequestSender(
68+
TransportService transportService,
69+
Executor esqlExecutor,
70+
CancellableTask rootTask,
71+
boolean allowPartialResults,
72+
int concurrentRequests
73+
) {
6774
this.transportService = transportService;
6875
this.esqlExecutor = esqlExecutor;
6976
this.rootTask = rootTask;
7077
this.allowPartialResults = allowPartialResults;
78+
this.concurrentRequests = new Semaphore(concurrentRequests);
7179
}
7280

7381
final void startComputeOnDataNodes(
@@ -128,8 +136,7 @@ private void trySendingRequestsForPendingShards(TargetShards targetShards, Compu
128136
reportedFailure = true;
129137
reportFailures(computeListener);
130138
} else {
131-
var nodeRequests = selectNodeRequests(targetShards);
132-
for (NodeRequest request : nodeRequests) {
139+
for (NodeRequest request : selectNodeRequests(targetShards)) {
133140
sendOneNodeRequest(targetShards, computeListener, request);
134141
}
135142
}
@@ -161,6 +168,7 @@ private void sendOneNodeRequest(TargetShards targetShards, ComputeListener compu
161168
sendRequest(request.node, request.shardIds, request.aliasFilters, new NodeListener() {
162169
void onAfter(List<DriverProfile> profiles) {
163170
nodePermits.get(request.node).release();
171+
concurrentRequests.release();
164172
trySendingRequestsForPendingShards(targetShards, computeListener);
165173
listener.onResponse(profiles);
166174
}
@@ -243,17 +251,11 @@ TargetShard getShard(ShardId shardId) {
243251
/**
244252
* (Remaining) allocated nodes of a given shard id and its alias filter
245253
*/
246-
record TargetShard(ShardId shardId, List<DiscoveryNode> remainingNodes, AliasFilter aliasFilter) {
247-
248-
}
249-
250-
record NodeRequest(DiscoveryNode node, List<ShardId> shardIds, Map<Index, AliasFilter> aliasFilters) {
251-
252-
}
254+
record TargetShard(ShardId shardId, List<DiscoveryNode> remainingNodes, AliasFilter aliasFilter) {}
253255

254-
private record ShardFailure(boolean fatal, Exception failure) {
256+
record NodeRequest(DiscoveryNode node, List<ShardId> shardIds, Map<Index, AliasFilter> aliasFilters) {}
255257

256-
}
258+
private record ShardFailure(boolean fatal, Exception failure) {}
257259

258260
/**
259261
* Selects the next nodes to send requests to. Limits to at most one outstanding request per node.
@@ -287,17 +289,28 @@ private List<NodeRequest> selectNodeRequests(TargetShards targetShards) {
287289
nodeToShardIds.computeIfAbsent(selectedNode, unused -> new ArrayList<>()).add(shard.shardId);
288290
}
289291
}
290-
final List<NodeRequest> nodeRequests = new ArrayList<>(nodeToShardIds.size());
291-
for (var e : nodeToShardIds.entrySet()) {
292-
List<ShardId> shardIds = e.getValue();
293-
Map<Index, AliasFilter> aliasFilters = new HashMap<>();
294-
for (ShardId shardId : shardIds) {
295-
var aliasFilter = targetShards.getShard(shardId).aliasFilter;
296-
if (aliasFilter != null) {
297-
aliasFilters.put(shardId.getIndex(), aliasFilter);
292+
293+
var size = Math.min(concurrentRequests.availablePermits(), nodeToShardIds.size());
294+
final List<NodeRequest> nodeRequests = new ArrayList<>(size);
295+
for (var entry : nodeToShardIds.entrySet()) {
296+
var node = entry.getKey();
297+
var shardIds = entry.getValue();
298+
if (concurrentRequests.tryAcquire()) {
299+
Map<Index, AliasFilter> aliasFilters = new HashMap<>();
300+
for (ShardId shardId : shardIds) {
301+
var aliasFilter = targetShards.getShard(shardId).aliasFilter;
302+
if (aliasFilter != null) {
303+
aliasFilters.put(shardId.getIndex(), aliasFilter);
304+
}
305+
}
306+
nodeRequests.add(new NodeRequest(node, shardIds, aliasFilters));
307+
} else {
308+
pendingShardIds.addAll(shardIds);
309+
for (ShardId shardId : shardIds) {
310+
targetShards.getShard(shardId).remainingNodes.add(node);
298311
}
312+
nodePermits.get(node).release();
299313
}
300-
nodeRequests.add(new NodeRequest(e.getKey(), shardIds, aliasFilters));
301314
}
302315
return nodeRequests;
303316
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/QueryPragmas.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,10 @@ public final class QueryPragmas implements Writeable {
5555
*/
5656
public static final Setting<TimeValue> STATUS_INTERVAL = Setting.timeSetting("status_interval", Driver.DEFAULT_STATUS_INTERVAL);
5757

58-
public static final Setting<Integer> MAX_CONCURRENT_SHARDS_PER_NODE = Setting.intSetting("max_concurrent_shards_per_node", 10, 1, 100);
58+
public static final Setting<Integer> MAX_CONCURRENT_NODES_PER_CLUSTER = //
59+
Setting.intSetting("max_concurrent_nodes_per_cluster", 10, 1, 100);
60+
public static final Setting<Integer> MAX_CONCURRENT_SHARDS_PER_NODE = //
61+
Setting.intSetting("max_concurrent_shards_per_node", 10, 1, 100);
5962

6063
public static final Setting<Boolean> NODE_LEVEL_REDUCTION = Setting.boolSetting("node_level_reduction", true);
6164

@@ -122,6 +125,13 @@ public int enrichMaxWorkers() {
122125
return ENRICH_MAX_WORKERS.get(settings);
123126
}
124127

128+
/**
129+
* The maximum number of nodes to be queried at once by this query. This is safeguard to avoid overloading the cluster.
130+
*/
131+
public int maxConcurrentNodePerCluster() {
132+
return MAX_CONCURRENT_NODES_PER_CLUSTER.get(settings);
133+
}
134+
125135
/**
126136
* The maximum number of shards can be executed concurrently on a single node by this query. This is a safeguard to avoid
127137
* opening and holding many shards (equivalent to many file descriptors) or having too many field infos created by a single query.

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plugin/DataNodeRequestSenderTests.java

Lines changed: 53 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
import java.util.concurrent.Executor;
4646
import java.util.concurrent.TimeUnit;
4747
import java.util.concurrent.atomic.AtomicBoolean;
48+
import java.util.concurrent.atomic.AtomicInteger;
4849
import java.util.function.Function;
4950
import java.util.stream.Collectors;
5051

@@ -85,14 +86,15 @@ public void setThreadPool() {
8586
}
8687

8788
@After
88-
public void shutdownThreadPool() throws Exception {
89+
public void shutdownThreadPool() {
8990
terminate(threadPool);
9091
}
9192

9293
public void testEmpty() {
9394
var future = sendRequests(
9495
List.of(),
9596
randomBoolean(),
97+
10,
9698
(node, shardIds, aliasFilters, listener) -> fail("expect no data-node request is sent")
9799
);
98100
var resp = safeGet(future);
@@ -107,10 +109,9 @@ public void testOnePass() {
107109
targetShard(shard4, node2, node3)
108110
);
109111
Queue<NodeRequest> sent = ConcurrentCollections.newQueue();
110-
var future = sendRequests(targetShards, randomBoolean(), (node, shardIds, aliasFilters, listener) -> {
112+
var future = sendRequests(targetShards, randomBoolean(), 10, (node, shardIds, aliasFilters, listener) -> {
111113
sent.add(new NodeRequest(node, shardIds, aliasFilters));
112-
var resp = new DataNodeComputeResponse(List.of(), Map.of());
113-
runWithDelay(() -> listener.onResponse(resp));
114+
runWithDelay(() -> listener.onResponse(new DataNodeComputeResponse(List.of(), Map.of())));
114115
});
115116
safeGet(future);
116117
assertThat(sent.size(), equalTo(2));
@@ -120,15 +121,15 @@ public void testOnePass() {
120121
public void testMissingShards() {
121122
{
122123
var targetShards = List.of(targetShard(shard1, node1), targetShard(shard3), targetShard(shard4, node2, node3));
123-
var future = sendRequests(targetShards, false, (node, shardIds, aliasFilters, listener) -> {
124+
var future = sendRequests(targetShards, false, 10, (node, shardIds, aliasFilters, listener) -> {
124125
fail("expect no data-node request is sent when target shards are missing");
125126
});
126127
var error = expectThrows(NoShardAvailableActionException.class, future::actionGet);
127128
assertThat(error.getMessage(), containsString("no shard copies found"));
128129
}
129130
{
130131
var targetShards = List.of(targetShard(shard1, node1), targetShard(shard3), targetShard(shard4, node2, node3));
131-
var future = sendRequests(targetShards, true, (node, shardIds, aliasFilters, listener) -> {
132+
var future = sendRequests(targetShards, true, 10, (node, shardIds, aliasFilters, listener) -> {
132133
assertThat(shard3, not(in(shardIds)));
133134
runWithDelay(() -> listener.onResponse(new DataNodeComputeResponse(List.of(), Map.of())));
134135
});
@@ -148,7 +149,7 @@ public void testRetryThenSuccess() {
148149
targetShard(shard5, node1, node3, node2)
149150
);
150151
Queue<NodeRequest> sent = ConcurrentCollections.newQueue();
151-
var future = sendRequests(targetShards, randomBoolean(), (node, shardIds, aliasFilters, listener) -> {
152+
var future = sendRequests(targetShards, randomBoolean(), 10, (node, shardIds, aliasFilters, listener) -> {
152153
sent.add(new NodeRequest(node, shardIds, aliasFilters));
153154
Map<ShardId, Exception> failures = new HashMap<>();
154155
if (node.equals(node1) && shardIds.contains(shard5)) {
@@ -180,7 +181,7 @@ public void testRetryButFail() {
180181
targetShard(shard5, node1, node3, node2)
181182
);
182183
Queue<NodeRequest> sent = ConcurrentCollections.newQueue();
183-
var future = sendRequests(targetShards, false, (node, shardIds, aliasFilters, listener) -> {
184+
var future = sendRequests(targetShards, false, 10, (node, shardIds, aliasFilters, listener) -> {
184185
sent.add(new NodeRequest(node, shardIds, aliasFilters));
185186
Map<ShardId, Exception> failures = new HashMap<>();
186187
if (shardIds.contains(shard5)) {
@@ -206,7 +207,7 @@ public void testDoNotRetryOnRequestLevelFailure() {
206207
var targetShards = List.of(targetShard(shard1, node1), targetShard(shard2, node2), targetShard(shard3, node1));
207208
Queue<NodeRequest> sent = ConcurrentCollections.newQueue();
208209
AtomicBoolean failed = new AtomicBoolean();
209-
var future = sendRequests(targetShards, false, (node, shardIds, aliasFilters, listener) -> {
210+
var future = sendRequests(targetShards, false, 10, (node, shardIds, aliasFilters, listener) -> {
210211
sent.add(new NodeRequest(node, shardIds, aliasFilters));
211212
if (node1.equals(node) && failed.compareAndSet(false, true)) {
212213
runWithDelay(() -> listener.onFailure(new IOException("test request level failure"), true));
@@ -226,7 +227,7 @@ public void testAllowPartialResults() {
226227
var targetShards = List.of(targetShard(shard1, node1), targetShard(shard2, node2), targetShard(shard3, node1, node2));
227228
Queue<NodeRequest> sent = ConcurrentCollections.newQueue();
228229
AtomicBoolean failed = new AtomicBoolean();
229-
var future = sendRequests(targetShards, true, (node, shardIds, aliasFilters, listener) -> {
230+
var future = sendRequests(targetShards, true, 10, (node, shardIds, aliasFilters, listener) -> {
230231
sent.add(new NodeRequest(node, shardIds, aliasFilters));
231232
if (node1.equals(node) && failed.compareAndSet(false, true)) {
232233
runWithDelay(() -> listener.onFailure(new IOException("test request level failure"), true));
@@ -244,6 +245,40 @@ public void testAllowPartialResults() {
244245
assertThat(resp.successfulShards, equalTo(1));
245246
}
246247

248+
public void testLimitConcurrentNodes() {
249+
var targetShards = List.of(
250+
targetShard(shard1, node1),
251+
targetShard(shard2, node2),
252+
targetShard(shard3, node3),
253+
targetShard(shard4, node4),
254+
targetShard(shard5, node5)
255+
);
256+
257+
AtomicInteger maxConcurrentRequests = new AtomicInteger(0);
258+
AtomicInteger concurrentRequests = new AtomicInteger(0);
259+
Queue<NodeRequest> sent = ConcurrentCollections.newQueue();
260+
var future = sendRequests(targetShards, randomBoolean(), 2, (node, shardIds, aliasFilters, listener) -> {
261+
concurrentRequests.incrementAndGet();
262+
263+
while (true) {
264+
var priorMax = maxConcurrentRequests.get();
265+
var newMax = Math.max(priorMax, concurrentRequests.get());
266+
if (newMax <= priorMax || maxConcurrentRequests.compareAndSet(priorMax, newMax)) {
267+
break;
268+
}
269+
}
270+
271+
sent.add(new NodeRequest(node, shardIds, aliasFilters));
272+
runWithDelay(() -> {
273+
concurrentRequests.decrementAndGet();
274+
listener.onResponse(new DataNodeComputeResponse(List.of(), Map.of()));
275+
});
276+
});
277+
safeGet(future);
278+
assertThat(sent.size(), equalTo(5));
279+
assertThat(maxConcurrentRequests.get(), equalTo(2));
280+
}
281+
247282
static DataNodeRequestSender.TargetShard targetShard(ShardId shardId, DiscoveryNode... nodes) {
248283
return new DataNodeRequestSender.TargetShard(shardId, new ArrayList<>(Arrays.asList(nodes)), null);
249284
}
@@ -268,6 +303,7 @@ void runWithDelay(Runnable runnable) {
268303
PlainActionFuture<ComputeResponse> sendRequests(
269304
List<DataNodeRequestSender.TargetShard> shards,
270305
boolean allowPartialResults,
306+
int concurrentRequests,
271307
Sender sender
272308
) {
273309
PlainActionFuture<ComputeResponse> future = new PlainActionFuture<>();
@@ -281,7 +317,13 @@ PlainActionFuture<ComputeResponse> sendRequests(
281317
TaskId.EMPTY_TASK_ID,
282318
Collections.emptyMap()
283319
);
284-
DataNodeRequestSender requestSender = new DataNodeRequestSender(transportService, executor, task, allowPartialResults) {
320+
DataNodeRequestSender requestSender = new DataNodeRequestSender(
321+
transportService,
322+
executor,
323+
task,
324+
allowPartialResults,
325+
concurrentRequests
326+
) {
285327
@Override
286328
void searchShards(
287329
Task parentTask,

0 commit comments

Comments
 (0)