Skip to content

Commit 0c16c45

Browse files
committed
Merge branch '09162025/ReshardSplitRequestOnSourceTwoPass' of github.com:ankikuma/elasticsearch into ankikuma-09162025/ReshardSplitRequestOnSourceTwoPass
2 parents 2b91dd5 + f1c4b2d commit 0c16c45

File tree

7 files changed

+275
-41
lines changed

7 files changed

+275
-41
lines changed

server/src/main/java/org/elasticsearch/action/DocWriteRequest.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,8 @@ default void postRoutingProcess(IndexRouting indexRouting) {}
176176
*/
177177
int route(IndexRouting indexRouting);
178178

179+
int rerouteAtSourceDuringResharding(IndexRouting indexRouting);
180+
179181
/**
180182
* Resolves the write index that should receive this request
181183
* based on the provided index abstraction.

server/src/main/java/org/elasticsearch/action/bulk/TransportShardBulkAction.java

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,9 @@
3333
import org.elasticsearch.cluster.ClusterStateObserver;
3434
import org.elasticsearch.cluster.action.index.MappingUpdatedAction;
3535
import org.elasticsearch.cluster.action.shard.ShardStateAction;
36+
import org.elasticsearch.cluster.metadata.ProjectMetadata;
3637
import org.elasticsearch.cluster.project.ProjectResolver;
38+
import org.elasticsearch.cluster.routing.IndexRouting;
3739
import org.elasticsearch.cluster.service.ClusterService;
3840
import org.elasticsearch.common.bytes.BytesReference;
3941
import org.elasticsearch.common.compress.CompressedXContent;
@@ -44,6 +46,7 @@
4446
import org.elasticsearch.core.Strings;
4547
import org.elasticsearch.core.TimeValue;
4648
import org.elasticsearch.core.Tuple;
49+
import org.elasticsearch.index.Index;
4750
import org.elasticsearch.index.IndexingPressure;
4851
import org.elasticsearch.index.engine.Engine;
4952
import org.elasticsearch.index.engine.VersionConflictEngineException;
@@ -55,6 +58,7 @@
5558
import org.elasticsearch.index.mapper.SourceToParse;
5659
import org.elasticsearch.index.seqno.SequenceNumbers;
5760
import org.elasticsearch.index.shard.IndexShard;
61+
import org.elasticsearch.index.shard.ShardId;
5862
import org.elasticsearch.index.translog.Translog;
5963
import org.elasticsearch.indices.ExecutorSelector;
6064
import org.elasticsearch.indices.IndicesService;
@@ -70,6 +74,9 @@
7074
import org.elasticsearch.xcontent.XContentType;
7175

7276
import java.io.IOException;
77+
import java.util.ArrayList;
78+
import java.util.HashMap;
79+
import java.util.List;
7380
import java.util.Map;
7481
import java.util.concurrent.Executor;
7582
import java.util.function.Consumer;
@@ -163,6 +170,71 @@ protected void shardOperationOnPrimary(
163170
primary.ensureMutable(listener.delegateFailure((l, ignored) -> super.shardOperationOnPrimary(request, primary, l)), true);
164171
}
165172

173+
@Override
174+
protected Map<ShardId, BulkShardRequest> splitRequestOnPrimary(BulkShardRequest request) {
175+
// System.out.println("I am splitting");
176+
ClusterState clusterState = clusterService.state();
177+
ProjectMetadata project = projectResolver.getProjectMetadata(clusterState);
178+
Index index = request.shardId().getIndex();
179+
// IndexMetadata indexMetadata = clusterState.getMetadata().indexMetadata(index);
180+
IndexRouting routing = IndexRouting.fromIndexMetadata(project.getIndexSafe(index));
181+
Map<ShardId, List<BulkItemRequest>> requestsByShard = new HashMap<>();
182+
Map<ShardId, BulkShardRequest> bulkRequestsPerShard = new HashMap<>();
183+
184+
// Iterate through the items in the input request and split them based on the
185+
// current resharding-split state.
186+
BulkItemRequest[] items = request.items();
187+
if (items.length == 0) { // Nothing to split
188+
return Map.of(request.shardId(), request);
189+
}
190+
191+
for (int i = 0; i < items.length; i++) {
192+
BulkItemRequest bulkItemRequest = items[i];
193+
DocWriteRequest<?> docWriteRequest = bulkItemRequest.request();
194+
int shardId = docWriteRequest.rerouteAtSourceDuringResharding(routing);
195+
// int shardId = docWriteRequest.route(routing);
196+
// System.out.println("shardId = " + shardId);
197+
List<BulkItemRequest> shardRequests = requestsByShard.computeIfAbsent(
198+
new ShardId(index, shardId),
199+
shardNum -> new ArrayList<>()
200+
);
201+
shardRequests.add(bulkItemRequest);
202+
}
203+
204+
// System.out.println("requestsByShard = " + requestsByShard.size());
205+
// All items belong to either the source shard or target shard.
206+
if (requestsByShard.size() == 1) {
207+
ShardId targetShard = requestsByShard.entrySet().iterator().next().getKey();
208+
// Return original request if no items were split to target.
209+
if (targetShard.equals(request.shardId())) {
210+
return Map.of(request.shardId(), request);
211+
} else {
212+
// Create new bulk request that is identical to the original request except the shardId.
213+
// TODO: Verify that this is alright because each BulkItemRequest also contains shardId
214+
BulkShardRequest bulkShardRequest = new BulkShardRequest(
215+
targetShard,
216+
request.getRefreshPolicy(),
217+
request.items(),
218+
request.isSimulated()
219+
);
220+
return Map.of(targetShard, bulkShardRequest);
221+
}
222+
}
223+
224+
for (Map.Entry<ShardId, List<BulkItemRequest>> entry : requestsByShard.entrySet()) {
225+
final ShardId shardId = entry.getKey();
226+
final List<BulkItemRequest> requests = entry.getValue();
227+
BulkShardRequest bulkShardRequest = new BulkShardRequest(
228+
shardId,
229+
request.getRefreshPolicy(),
230+
requests.toArray(new BulkItemRequest[0]),
231+
request.isSimulated()
232+
);
233+
bulkRequestsPerShard.put(shardId, bulkShardRequest);
234+
}
235+
return (bulkRequestsPerShard);
236+
}
237+
166238
@Override
167239
protected void dispatchedShardOperationOnPrimary(
168240
BulkShardRequest request,

server/src/main/java/org/elasticsearch/action/delete/DeleteRequest.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,11 @@ public int route(IndexRouting indexRouting) {
236236
return indexRouting.deleteShard(id, routing);
237237
}
238238

239+
@Override
240+
public int rerouteAtSourceDuringResharding(IndexRouting indexRouting) {
241+
return indexRouting.rerouteDeleteRequestIfResharding(id, routing);
242+
}
243+
239244
@Override
240245
public void writeTo(StreamOutput out) throws IOException {
241246
super.writeTo(out);

server/src/main/java/org/elasticsearch/action/index/IndexRequest.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -928,6 +928,11 @@ public int route(IndexRouting indexRouting) {
928928
return indexRouting.indexShard(this);
929929
}
930930

931+
@Override
932+
public int rerouteAtSourceDuringResharding(IndexRouting indexRouting) {
933+
return indexRouting.rerouteIndexingRequestIfResharding(this);
934+
}
935+
931936
public IndexRequest setRequireAlias(boolean requireAlias) {
932937
this.requireAlias = requireAlias;
933938
return this;

server/src/main/java/org/elasticsearch/action/support/replication/TransportReplicationAction.java

Lines changed: 121 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,13 @@ protected abstract void shardOperationOnReplica(
317317
ActionListener<ReplicaResult> listener
318318
);
319319

320+
/**
321+
* During Resharding, we might need to split the primary request.
322+
*/
323+
protected Map<ShardId, Request> splitRequestOnPrimary(Request request) {
324+
return Map.of(request.shardId(), request);
325+
}
326+
320327
/**
321328
* Cluster level block to check before request execution. Returning null means that no blocks need to be checked.
322329
*/
@@ -508,54 +515,127 @@ public void handleException(TransportException exp) {
508515
}
509516
}
510517
);
511-
} else {
512-
setPhase(replicationTask, "primary");
513-
514-
final ActionListener<Response> responseListener = ActionListener.wrap(response -> {
515-
adaptResponse(response, primaryShardReference.indexShard);
516-
517-
if (syncGlobalCheckpointAfterOperation) {
518-
try {
519-
primaryShardReference.indexShard.maybeSyncGlobalCheckpoint("post-operation");
520-
} catch (final Exception e) {
521-
// only log non-closed exceptions
522-
if (ExceptionsHelper.unwrap(e, AlreadyClosedException.class, IndexShardClosedException.class) == null) {
523-
// intentionally swallow, a missed global checkpoint sync should not fail this operation
524-
logger.info(
525-
() -> format(
526-
"%s failed to execute post-operation global checkpoint sync",
527-
primaryShardReference.indexShard.shardId()
528-
),
529-
e
530-
);
531-
}
518+
} else if (reshardSplitShardCountSummary.isUnset()
519+
|| reshardSplitShardCountSummary.equals(
520+
SplitShardCountSummary.forIndexing(indexMetadata, primaryRequest.getRequest().shardId().getId())
521+
) == false) {
522+
// Split Request
523+
Map<ShardId, Request> splitRequests = splitRequestOnPrimary(primaryRequest.getRequest());
524+
int numSplitRequests = splitRequests.size();
525+
526+
// splitRequestOnPrimary must handle the case when the request has no items
527+
assert numSplitRequests > 0 : "expected atleast 1 split request";
528+
assert numSplitRequests <= 2 : "number of split requests too many";
529+
530+
// System.out.println("numSplitRequests = " + numSplitRequests);
531+
// System.out.println("source shardId = " + primaryRequest.getRequest().shardId().toString());
532+
if (numSplitRequests == 1) {
533+
// System.out.println("shardId = " + splitRequests.entrySet().iterator().next().getKey().toString());
534+
// If the request is for source, same behaviour as before
535+
if (splitRequests.containsKey(primaryRequest.getRequest().shardId())) {
536+
// System.out.println("Execute request on source");
537+
executePrimaryRequest(primaryShardReference, "primary");
538+
// executePrimaryRequest(primaryShardReference, "primary_reshardSplit");
539+
} else {
540+
// System.out.println("Execute request on target");
541+
// If the request is for target, forward request to target.
542+
// TODO: Note that the request still contains the original shardId. We need to test if this will be a
543+
// problem.
544+
setPhase(replicationTask, "primary_reshardSplit_delegation");
545+
// If the request is for target, send request to target node
546+
ShardId targetShardId = splitRequests.entrySet().iterator().next().getKey();
547+
final IndexShard targetShard = getIndexShard(targetShardId);
548+
final ShardRouting target = targetShard.routingEntry();
549+
final Writeable.Reader<Response> reader = TransportReplicationAction.this::newResponseInstance;
550+
DiscoveryNode targetNode = clusterState.nodes().get(target.currentNodeId());
551+
transportService.sendRequest(
552+
targetNode,
553+
transportPrimaryAction,
554+
new ConcreteShardRequest<>(
555+
primaryRequest.getRequest(),
556+
target.allocationId().getRelocationId(),
557+
primaryRequest.getPrimaryTerm()
558+
),
559+
transportOptions,
560+
new ActionListenerResponseHandler<>(
561+
onCompletionListener,
562+
reader,
563+
TransportResponseHandler.TRANSPORT_WORKER
564+
) {
565+
566+
@Override
567+
public void handleResponse(Response response) {
568+
setPhase(replicationTask, "finished");
569+
super.handleResponse(response);
570+
}
571+
572+
@Override
573+
public void handleException(TransportException exp) {
574+
setPhase(replicationTask, "finished");
575+
super.handleException(exp);
576+
}
577+
}
578+
);
532579
}
533-
}
580+
} else {
581+
// TODO:
582+
// We have requests for both source and target shards.
583+
// Use a refcounted listener to run both requests async in parallel and collect the responses from both requests
534584

535-
assert primaryShardReference.indexShard.isPrimaryMode();
536-
primaryShardReference.close(); // release shard operation lock before responding to caller
537-
setPhase(replicationTask, "finished");
538-
onCompletionListener.onResponse(response);
539-
}, e -> handleException(primaryShardReference, e));
540-
541-
new ReplicationOperation<>(
542-
primaryRequest.getRequest(),
543-
primaryShardReference,
544-
responseListener.map(result -> result.replicationResponse),
545-
newReplicasProxy(),
546-
logger,
547-
threadPool,
548-
actionName,
549-
primaryRequest.getPrimaryTerm(),
550-
initialRetryBackoffBound,
551-
retryTimeout
552-
).execute();
553-
}
585+
// Merge responses from source and target before calling onCompletionListener
586+
}
587+
} else {
588+
executePrimaryRequest(primaryShardReference, "primary");
589+
}
554590
} catch (Exception e) {
555591
handleException(primaryShardReference, e);
556592
}
557593
}
558594

595+
private void executePrimaryRequest(final PrimaryShardReference primaryShardReference, String phase) throws Exception {
596+
setPhase(replicationTask, phase);
597+
598+
final ActionListener<Response> responseListener = ActionListener.wrap(response -> {
599+
adaptResponse(response, primaryShardReference.indexShard);
600+
601+
if (syncGlobalCheckpointAfterOperation) {
602+
try {
603+
primaryShardReference.indexShard.maybeSyncGlobalCheckpoint("post-operation");
604+
} catch (final Exception e) {
605+
// only log non-closed exceptions
606+
if (ExceptionsHelper.unwrap(e, AlreadyClosedException.class, IndexShardClosedException.class) == null) {
607+
// intentionally swallow, a missed global checkpoint sync should not fail this operation
608+
logger.info(
609+
() -> format(
610+
"%s failed to execute post-operation global checkpoint sync",
611+
primaryShardReference.indexShard.shardId()
612+
),
613+
e
614+
);
615+
}
616+
}
617+
}
618+
619+
assert primaryShardReference.indexShard.isPrimaryMode();
620+
primaryShardReference.close(); // release shard operation lock before responding to caller
621+
setPhase(replicationTask, "finished");
622+
onCompletionListener.onResponse(response);
623+
}, e -> handleException(primaryShardReference, e));
624+
625+
new ReplicationOperation<>(
626+
primaryRequest.getRequest(),
627+
primaryShardReference,
628+
responseListener.map(result -> result.replicationResponse),
629+
newReplicasProxy(),
630+
logger,
631+
threadPool,
632+
actionName,
633+
primaryRequest.getPrimaryTerm(),
634+
initialRetryBackoffBound,
635+
retryTimeout
636+
).execute();
637+
}
638+
559639
private void handleException(PrimaryShardReference primaryShardReference, Exception e) {
560640
Releasables.closeWhileHandlingException(primaryShardReference); // release shard operation lock before responding to caller
561641
onFailure(e);

server/src/main/java/org/elasticsearch/action/update/UpdateRequest.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -688,6 +688,11 @@ public int route(IndexRouting indexRouting) {
688688
return indexRouting.updateShard(id, routing);
689689
}
690690

691+
@Override
692+
public int rerouteAtSourceDuringResharding(IndexRouting indexRouting) {
693+
return indexRouting.rerouteUpdateRequestIfResharding(id, routing);
694+
}
695+
691696
public UpdateRequest setRequireAlias(boolean requireAlias) {
692697
this.requireAlias = requireAlias;
693698
return this;

0 commit comments

Comments
 (0)