Skip to content

Commit 9af414b

Browse files
Add retry for inactivePeer of region migration (#15294) (#15303)
* done * fix (cherry picked from commit b1a0dee)
1 parent 743e2e7 commit 9af414b

File tree

2 files changed

+30
-18
lines changed

2 files changed

+30
-18
lines changed

iotdb-core/consensus/src/main/java/org/apache/iotdb/consensus/iot/IoTConsensus.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ public void addRemotePeer(ConsensusGroupId groupId, Peer peer) throws ConsensusE
331331
try {
332332
// step 1: inactive new Peer to prepare for following steps
333333
logger.info("[IoTConsensus] inactivate new peer: {}", peer);
334-
impl.inactivePeer(peer, false);
334+
impl.inactivatePeer(peer, false);
335335

336336
// step 2: notify all the other Peers to build the sync connection to newPeer
337337
logger.info("[IoTConsensus] notify current peers to build sync log...");
@@ -397,7 +397,7 @@ public void removeRemotePeer(ConsensusGroupId groupId, Peer peer) throws Consens
397397

398398
try {
399399
// let target peer reject new write
400-
impl.inactivePeer(peer, true);
400+
impl.inactivatePeer(peer, true);
401401
KillPoint.setKillPoint(IoTConsensusRemovePeerCoordinatorKillPoints.AFTER_INACTIVE_PEER);
402402
// wait its SyncLog to complete
403403
impl.waitTargetPeerUntilSyncLogCompleted(peer);

iotdb-core/consensus/src/main/java/org/apache/iotdb/consensus/iot/IoTConsensusServerImpl.java

Lines changed: 28 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -413,26 +413,38 @@ public interface ThrowableFunction<T, R> {
413413
R apply(T t) throws Exception;
414414
}
415415

416-
public void inactivePeer(Peer peer, boolean forDeletionPurpose)
416+
public void inactivatePeer(Peer peer, boolean forDeletionPurpose)
417417
throws ConsensusGroupModifyPeerException {
418-
try (SyncIoTConsensusServiceClient client =
419-
syncClientManager.borrowClient(peer.getEndpoint())) {
420-
try {
421-
TInactivatePeerRes res =
422-
client.inactivatePeer(
423-
new TInactivatePeerReq(peer.getGroupId().convertToTConsensusGroupId())
424-
.setForDeletionPurpose(forDeletionPurpose));
425-
if (!isSuccess(res.status)) {
426-
throw new ConsensusGroupModifyPeerException(
427-
String.format("error when inactivating %s. %s", peer, res.getStatus()));
418+
ConsensusGroupModifyPeerException lastException = null;
419+
// In region migration, if the target node restarts before the “addRegionPeer” phase within 1
420+
// minutes,
421+
// the client in the ClientManager will become invalid.
422+
// This PR adds 1 retry at this point to ensure that region migration can still proceed
423+
// correctly in such cases.
424+
for (int i = 0; i < 2; i++) {
425+
try (SyncIoTConsensusServiceClient client =
426+
syncClientManager.borrowClient(peer.getEndpoint())) {
427+
try {
428+
TInactivatePeerRes res =
429+
client.inactivatePeer(
430+
new TInactivatePeerReq(peer.getGroupId().convertToTConsensusGroupId())
431+
.setForDeletionPurpose(forDeletionPurpose));
432+
if (isSuccess(res.status)) {
433+
return;
434+
}
435+
lastException =
436+
new ConsensusGroupModifyPeerException(
437+
String.format("error when inactivating %s. %s", peer, res.getStatus()));
438+
} catch (Exception e) {
439+
lastException =
440+
new ConsensusGroupModifyPeerException(
441+
String.format("error when inactivating %s", peer), e);
428442
}
429-
} catch (Exception e) {
430-
throw new ConsensusGroupModifyPeerException(
431-
String.format("error when inactivating %s", peer), e);
443+
} catch (ClientManagerException e) {
444+
lastException = new ConsensusGroupModifyPeerException(e);
432445
}
433-
} catch (ClientManagerException e) {
434-
throw new ConsensusGroupModifyPeerException(e);
435446
}
447+
throw lastException;
436448
}
437449

438450
public void triggerSnapshotLoad(Peer peer) throws ConsensusGroupModifyPeerException {

0 commit comments

Comments
 (0)