3030import org .elasticsearch .index .Index ;
3131import org .elasticsearch .index .query .QueryBuilder ;
3232import org .elasticsearch .index .shard .ShardId ;
33+ import org .elasticsearch .logging .LogManager ;
34+ import org .elasticsearch .logging .Logger ;
3335import org .elasticsearch .search .SearchShardTarget ;
3436import org .elasticsearch .search .internal .AliasFilter ;
3537import org .elasticsearch .tasks .CancellableTask ;
6769 */
6870abstract class DataNodeRequestSender {
6971
72+ private static final Logger LOGGER = LogManager .getLogger (DataNodeRequestSender .class );
73+
7074 /**
7175 * Query order according to the
7276 * <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/node-roles-overview.html">node roles</a>.
@@ -283,38 +287,53 @@ private void sendOneNodeRequest(TargetShards targetShards, ComputeListener compu
283287 final ActionListener <DriverCompletionInfo > listener = computeListener .acquireCompute ();
284288 sendRequest (request .node , request .shards , request .aliasFilters , new NodeListener () {
285289
286- void onAfter ( DriverCompletionInfo info ) {
290+ void onAfterRequest ( ) {
287291 nodePermits .get (request .node ).release ();
288292 if (concurrentRequests != null ) {
289293 concurrentRequests .release ();
290294 }
291295 trySendingRequestsForPendingShards (targetShards , computeListener );
292- listener .onResponse (info );
293296 }
294297
295298 @ Override
296299 public void onResponse (DataNodeComputeResponse response ) {
297- // remove failures of successful shards
298- for (DataNodeRequest .Shard shard : request .shards ()) {
299- if (response .shardLevelFailures ().containsKey (shard .shardId ()) == false ) {
300- shardFailures .remove (shard .shardId ());
300+ try {
301+ // remove failures of successful shards
302+ for (var shard : request .shards ) {
303+ ShardId shardId = shard .shardId ();
304+ if (response .shardLevelFailures ().containsKey (shardId ) == false ) {
305+ shardFailures .remove (shardId );
306+ }
301307 }
308+ for (var entry : response .shardLevelFailures ().entrySet ()) {
309+ final ShardId shardId = entry .getKey ();
310+ trackShardLevelFailure (shardId , false , entry .getValue ());
311+ pendingShardIds .add (shardId );
312+ }
313+ onAfterRequest ();
314+ } catch (Exception ex ) {
315+ expectNoFailure ("expect no failure while handling data node response" , ex );
316+ listener .onFailure (ex );
317+ return ;
302318 }
303- for (var entry : response .shardLevelFailures ().entrySet ()) {
304- final ShardId shardId = entry .getKey ();
305- trackShardLevelFailure (shardId , false , entry .getValue ());
306- pendingShardIds .add (shardId );
307- }
308- onAfter (response .completionInfo ());
319+ listener .onResponse (response .completionInfo ());
309320 }
310321
311322 @ Override
312323 public void onFailure (Exception e , boolean receivedData ) {
313- for (DataNodeRequest .Shard shard : request .shards ) {
314- trackShardLevelFailure (shard .shardId (), receivedData , e );
315- pendingShardIds .add (shard .shardId ());
324+ try {
325+ for (var shard : request .shards ) {
326+ ShardId shardId = shard .shardId ();
327+ trackShardLevelFailure (shardId , receivedData , e );
328+ pendingShardIds .add (shardId );
329+ }
330+ onAfterRequest ();
331+ } catch (Exception ex ) {
332+ expectNoFailure ("expect no failure while handling failure of data node request" , ex );
333+ listener .onFailure (ex );
334+ return ;
316335 }
317- onAfter (DriverCompletionInfo .EMPTY );
336+ listener . onResponse (DriverCompletionInfo .EMPTY );
318337 }
319338
320339 @ Override
@@ -326,6 +345,11 @@ public void onSkip() {
326345 onResponse (new DataNodeComputeResponse (DriverCompletionInfo .EMPTY , Map .of ()));
327346 }
328347 }
348+
349+ private void expectNoFailure (String message , Exception e ) {
350+ LOGGER .error (message , e );
351+ assert false : new AssertionError (message , e );
352+ }
329353 });
330354 }
331355
@@ -527,15 +551,20 @@ Map<ShardId, List<DiscoveryNode>> resolveShards(Set<ShardId> shardIds) {
527551 var project = projectResolver .getProjectState (clusterService .state ());
528552 var nodes = Maps .<ShardId , List <DiscoveryNode >>newMapWithExpectedSize (shardIds .size ());
529553 for (var shardId : shardIds ) {
530- nodes . put (
531- shardId ,
532- project .routingTable ()
554+ List < DiscoveryNode > allocatedNodes ;
555+ try {
556+ allocatedNodes = project .routingTable ()
533557 .shardRoutingTable (shardId )
534558 .allShards ()
535559 .filter (shard -> shard .active () && shard .isSearchable ())
536560 .map (shard -> project .cluster ().nodes ().get (shard .currentNodeId ()))
537- .toList ()
538- );
561+ .toList ();
562+ } catch (Exception ignored ) {
563+ // If the target index is deleted or the target shard is not found after the query has started,
564+ // we skip resolving its new shard routing, and that shard will not be retried.
565+ continue ;
566+ }
567+ nodes .put (shardId , allocatedNodes );
539568 }
540569 return nodes ;
541570 }
0 commit comments