Skip to content

Commit 07e146f

Browse files
[region migration] Handle InterruptedException during waitTaskFinish() more properly apache#14305
1 parent a1a972c commit 07e146f

File tree

2 files changed

+16
-16
lines changed

2 files changed

+16
-16
lines changed

iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/procedure/env/RegionMaintainHandler.java

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -336,27 +336,26 @@ public TRegionMigrateResult waitTaskFinish(long taskId, TDataNodeLocation dataNo
336336
MAX_DISCONNECTION_TOLERATE_MS);
337337
long disconnectionTime = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - lastReportTime);
338338
if (disconnectionTime > waitTime) {
339-
break;
339+
LOGGER.warn(
340+
"{} task {} cannot get task report from DataNode {}, last report time is {} ago",
341+
REGION_MIGRATE_PROCESS,
342+
taskId,
343+
dataNodeLocation,
344+
CommonDateTimeUtils.convertMillisecondToDurationStr(
345+
TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - lastReportTime)));
346+
TRegionMigrateResult report = new TRegionMigrateResult();
347+
report.setTaskStatus(TRegionMaintainTaskStatus.FAIL);
348+
report.setFailedNodeAndReason(new HashMap<>());
349+
report.getFailedNodeAndReason().put(dataNodeLocation, TRegionMigrateFailedType.Disconnect);
350+
return report;
340351
}
341352
try {
342353
TimeUnit.SECONDS.sleep(1);
343354
} catch (InterruptedException ignore) {
344355
Thread.currentThread().interrupt();
345-
break;
356+
return new TRegionMigrateResult(TRegionMaintainTaskStatus.PROCESSING);
346357
}
347358
}
348-
LOGGER.warn(
349-
"{} task {} cannot get task report from DataNode {}, last report time is {} ago",
350-
REGION_MIGRATE_PROCESS,
351-
taskId,
352-
dataNodeLocation,
353-
CommonDateTimeUtils.convertMillisecondToDurationStr(
354-
TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - lastReportTime)));
355-
TRegionMigrateResult report = new TRegionMigrateResult();
356-
report.setTaskStatus(TRegionMaintainTaskStatus.FAIL);
357-
report.setFailedNodeAndReason(new HashMap<>());
358-
report.getFailedNodeAndReason().put(dataNodeLocation, TRegionMigrateFailedType.Disconnect);
359-
return report;
360359
}
361360

362361
public void addRegionLocation(TConsensusGroupId regionId, TDataNodeLocation newLocation) {

iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/procedure/impl/region/AddRegionPeerProcedure.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,8 +123,9 @@ protected Flow executeFromState(ConfigNodeProcedureEnv env, AddRegionPeerState s
123123
return warnAndRollBackAndNoMoreState(
124124
env, handler, String.format("%s result is %s", state, result.getTaskStatus()));
125125
case PROCESSING:
126-
// should never happen
127-
return warnAndRollBackAndNoMoreState(env, handler, "should never return PROCESSING");
126+
LOGGER.info(
127+
"waitTaskFinish() returns PROCESSING, which means the waiting has been interrupted, this procedure will end without rollback");
128+
return Flow.NO_MORE_STATE;
128129
case SUCCESS:
129130
setNextState(UPDATE_REGION_LOCATION_CACHE);
130131
break outerSwitch;

0 commit comments

Comments
 (0)