Skip to content

Commit 299c3df

Browse files
Improve ConfigNode removing apache#15189
1 parent 5f635bf commit 299c3df

File tree

6 files changed

+46
-19
lines changed

6 files changed

+46
-19
lines changed

iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/conf/ConfigNodeConfig.java

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ public class ConfigNodeConfig {
114114
private int dataRegionPerDataNode = 0;
115115

116116
/** each dataNode automatically has the number of CPU cores / 2 regions. */
117-
private double dataRegionPerDataNodeProportion = 0.5;
117+
private final double dataRegionPerDataNodeProportion = 0.5;
118118

119119
/** RegionGroup allocate policy. */
120120
private RegionBalancer.RegionGroupAllocatePolicy regionGroupAllocatePolicy =
@@ -193,9 +193,6 @@ public class ConfigNodeConfig {
193193
/** Acceptable pause duration for Phi accrual failure detector */
194194
private long failureDetectorPhiAcceptablePauseInMs = 10000;
195195

196-
/** The unknown DataNode detect interval in milliseconds. */
197-
private long unknownDataNodeDetectInterval = heartbeatIntervalInMs;
198-
199196
/** The policy of cluster RegionGroups' leader distribution. */
200197
private String leaderDistributionPolicy = AbstractLeaderBalancer.CFD_POLICY;
201198

@@ -660,14 +657,6 @@ public void setHeartbeatIntervalInMs(long heartbeatIntervalInMs) {
660657
this.heartbeatIntervalInMs = heartbeatIntervalInMs;
661658
}
662659

663-
public long getUnknownDataNodeDetectInterval() {
664-
return unknownDataNodeDetectInterval;
665-
}
666-
667-
public void setUnknownDataNodeDetectInterval(long unknownDataNodeDetectInterval) {
668-
this.unknownDataNodeDetectInterval = unknownDataNodeDetectInterval;
669-
}
670-
671660
public String getLeaderDistributionPolicy() {
672661
return leaderDistributionPolicy;
673662
}

iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/manager/ConfigManager.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -547,7 +547,7 @@ public TSStatus reportDataNodeShutdown(TDataNodeLocation dataNodeLocation) {
547547
dataNodeLocation.getDataNodeId(),
548548
new NodeHeartbeatSample(NodeStatus.Unknown));
549549
LOGGER.info(
550-
"[ShutdownHook] The DataNode-{} will be shutdown soon, mark it as Unknown",
550+
"The DataNode-{} will be shutdown soon, mark it as Unknown",
551551
dataNodeLocation.getDataNodeId());
552552
}
553553
return status;
@@ -1194,6 +1194,11 @@ private void printNewCreatedDataPartition(
11941194
}
11951195

11961196
protected TSStatus confirmLeader() {
1197+
if (NodeStatus.Removing == CommonDescriptor.getInstance().getConfig().getNodeStatus()) {
1198+
TSStatus status = new TSStatus(TSStatusCode.REDIRECTION_RECOMMEND.getStatusCode());
1199+
status.setMessage("ConfigNode is Removing");
1200+
return status;
1201+
}
11971202
// Make sure the consensus layer has been initialized
11981203
if (getConsensusManager() == null) {
11991204
return new TSStatus(TSStatusCode.CONSENSUS_NOT_INITIALIZED.getStatusCode())
@@ -1511,7 +1516,7 @@ public TSStatus reportConfigNodeShutdown(TConfigNodeLocation configNodeLocation)
15111516
configNodeLocation.getConfigNodeId(),
15121517
new NodeHeartbeatSample(NodeStatus.Unknown));
15131518
LOGGER.info(
1514-
"[ShutdownHook] The ConfigNode-{} will be shutdown soon, mark it as Unknown",
1519+
"The ConfigNode-{} will be shutdown soon, mark it as Unknown",
15151520
configNodeLocation.getConfigNodeId());
15161521
}
15171522
return status;

iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/manager/load/service/StatisticsService.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ public class StatisticsService {
3737

3838
private static final Logger LOGGER = LoggerFactory.getLogger(StatisticsService.class);
3939

40-
private static final long HEARTBEAT_INTERVAL =
40+
public static final long STATISTICS_UPDATE_INTERVAL =
4141
ConfigNodeDescriptor.getInstance().getConf().getHeartbeatIntervalInMs();
4242

4343
private final LoadCache loadCache;
@@ -63,7 +63,7 @@ public void startLoadStatisticsService() {
6363
loadStatisticsExecutor,
6464
this::updateLoadStatistics,
6565
0,
66-
HEARTBEAT_INTERVAL,
66+
STATISTICS_UPDATE_INTERVAL,
6767
TimeUnit.MILLISECONDS);
6868
LOGGER.info("LoadStatistics service is started successfully.");
6969
}

iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/manager/node/NodeManager.java

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
import org.apache.iotdb.confignode.manager.consensus.ConsensusManager;
7070
import org.apache.iotdb.confignode.manager.load.LoadManager;
7171
import org.apache.iotdb.confignode.manager.load.cache.node.ConfigNodeHeartbeatCache;
72+
import org.apache.iotdb.confignode.manager.load.service.StatisticsService;
7273
import org.apache.iotdb.confignode.manager.partition.PartitionManager;
7374
import org.apache.iotdb.confignode.manager.partition.PartitionMetrics;
7475
import org.apache.iotdb.confignode.manager.pipe.coordinator.PipeManager;
@@ -110,6 +111,7 @@
110111
import java.util.Optional;
111112
import java.util.Set;
112113
import java.util.concurrent.ConcurrentHashMap;
114+
import java.util.concurrent.TimeUnit;
113115
import java.util.concurrent.atomic.AtomicInteger;
114116
import java.util.concurrent.locks.ReentrantLock;
115117
import java.util.function.Function;
@@ -801,13 +803,34 @@ public void applyConfigNode(
801803
public TSStatus checkConfigNodeBeforeRemove(RemoveConfigNodePlan removeConfigNodePlan) {
802804
removeConfigNodeLock.lock();
803805
try {
804-
// Check OnlineConfigNodes number
805-
if (filterConfigNodeThroughStatus(NodeStatus.Running).size() <= 1) {
806+
// Check ConfigNodes number
807+
if (getRegisteredConfigNodes().size() <= 1) {
806808
return new TSStatus(TSStatusCode.REMOVE_CONFIGNODE_ERROR.getStatusCode())
807809
.setMessage(
808810
"Remove ConfigNode failed because there is only one ConfigNode in current Cluster.");
809811
}
810812

813+
// Check OnlineConfigNodes number
814+
final long deadline =
815+
System.nanoTime()
816+
+ TimeUnit.MILLISECONDS.toNanos(
817+
(CONF.getHeartbeatIntervalInMs() + StatisticsService.STATISTICS_UPDATE_INTERVAL)
818+
* 3);
819+
while (filterConfigNodeThroughStatus(NodeStatus.Running).size() <= 1) {
820+
if (System.nanoTime() > deadline) {
821+
return new TSStatus(TSStatusCode.REMOVE_CONFIGNODE_ERROR.getStatusCode())
822+
.setMessage(
823+
"Remove ConfigNode failed because there is no other ConfigNode in Running status in current Cluster.");
824+
}
825+
try {
826+
Thread.sleep(1000);
827+
} catch (InterruptedException e) {
828+
Thread.currentThread().interrupt();
829+
return new TSStatus(TSStatusCode.REMOVE_CONFIGNODE_ERROR.getStatusCode())
830+
.setMessage("Remove ConfigNode failed due to thread interruption.");
831+
}
832+
}
833+
811834
// Check whether the registeredConfigNodes contain the ConfigNode to be removed.
812835
if (!getRegisteredConfigNodes().contains(removeConfigNodePlan.getConfigNodeLocation())) {
813836
return new TSStatus(TSStatusCode.REMOVE_CONFIGNODE_ERROR.getStatusCode())

iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/persistence/node/NodeInfo.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
import org.apache.iotdb.common.rpc.thrift.TConfigNodeLocation;
2525
import org.apache.iotdb.common.rpc.thrift.TDataNodeConfiguration;
2626
import org.apache.iotdb.common.rpc.thrift.TSStatus;
27+
import org.apache.iotdb.commons.cluster.NodeStatus;
28+
import org.apache.iotdb.commons.conf.CommonDescriptor;
2729
import org.apache.iotdb.commons.snapshot.SnapshotProcessor;
2830
import org.apache.iotdb.confignode.conf.ConfigNodeDescriptor;
2931
import org.apache.iotdb.confignode.conf.SystemPropertiesUtils;
@@ -446,6 +448,11 @@ public TSStatus applyConfigNode(ApplyConfigNodePlan applyConfigNodePlan) {
446448
* @return {@link TSStatusCode#REMOVE_CONFIGNODE_ERROR} if remove online ConfigNode failed.
447449
*/
448450
public TSStatus removeConfigNode(RemoveConfigNodePlan removeConfigNodePlan) {
451+
if (removeConfigNodePlan.getConfigNodeLocation().getConfigNodeId()
452+
== ConfigNodeDescriptor.getInstance().getConf().getConfigNodeId()) {
453+
// set myself to Removing status
454+
CommonDescriptor.getInstance().getConfig().setNodeStatus(NodeStatus.Removing);
455+
}
449456
TSStatus status = new TSStatus();
450457
configNodeInfoReadWriteLock.writeLock().lock();
451458
versionInfoReadWriteLock.writeLock().lock();

iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/service/thrift/ConfigNodeRPCServiceProcessor.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -859,7 +859,10 @@ public TSStatus removeConfigNode(TConfigNodeLocation configNodeLocation) throws
859859
RemoveConfigNodePlan removeConfigNodePlan = new RemoveConfigNodePlan(configNodeLocation);
860860
TSStatus status = configManager.removeConfigNode(removeConfigNodePlan);
861861
// Print log to record the ConfigNode that performs the RemoveConfigNodeRequest
862-
LOGGER.info("Execute RemoveConfigNodeRequest {} with result {}", configNodeLocation, status);
862+
LOGGER.info(
863+
"The result of submitting RemoveConfigNode job is {}. RemoveConfigNodeRequest: {}",
864+
status,
865+
configNodeLocation);
863866

864867
return status;
865868
}

0 commit comments

Comments
 (0)