Skip to content

Commit 684e873

Browse files
Merge pull request ClickHouse#88656 from ClickHouse/backport/25.8/88154
Backport ClickHouse#88154 to 25.8: Cleanup stale replicas from DDL Worker replicas set
2 parents 7a4076f + a39fb56 commit 684e873

File tree

2 files changed

+28
-0
lines changed

2 files changed

+28
-0
lines changed

src/Interpreters/DDLWorker.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1396,6 +1396,32 @@ void DDLWorker::markReplicasActive(bool /*reinitialized*/)
13961396
}
13971397
}
13981398

1399+
void DDLWorker::cleanupStaleReplicas(Int64 current_time_seconds, const ZooKeeperPtr & zookeeper)
1400+
{
1401+
auto replicas = zookeeper->getChildren(replicas_dir);
1402+
static constexpr Int64 REPLICA_MAX_INACTIVE_SECONDS = 86400;
1403+
for (const auto & replica : replicas)
1404+
{
1405+
auto replica_path = fs::path(replicas_dir) / replica;
1406+
auto responses = zookeeper->tryGet({replica_path, fs::path(replica_path) / "active"});
1407+
/// Replica not active
1408+
if (responses[1].error == Coordination::Error::ZNONODE)
1409+
{
1410+
auto stat = responses[0].stat;
1411+
/// Replica was not active for too long, let's cleanup to avoid polluting Keeper with
1412+
/// removed replicas
1413+
if (stat.mtime / 1000 + REPLICA_MAX_INACTIVE_SECONDS < current_time_seconds)
1414+
{
1415+
LOG_INFO(log, "Replica {} is stale, removing it", replica);
1416+
auto code = zookeeper->tryRemove(replica_path, -1);
1417+
if (code != Coordination::Error::ZOK)
1418+
LOG_WARNING(log, "Cannot remove stale replica {}, code {}", replica, Coordination::errorMessage(code));
1419+
}
1420+
}
1421+
}
1422+
1423+
}
1424+
13991425
void DDLWorker::runCleanupThread()
14001426
{
14011427
setThreadName("DDLWorkerClnr");
@@ -1423,6 +1449,7 @@ void DDLWorker::runCleanupThread()
14231449
continue;
14241450

14251451
cleanupQueue(current_time_seconds, zookeeper);
1452+
cleanupStaleReplicas(current_time_seconds, zookeeper);
14261453
last_cleanup_time_seconds = current_time_seconds;
14271454
}
14281455
catch (...)

src/Interpreters/DDLWorker.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ class DDLWorker
157157

158158
/// Checks and cleanups queue's nodes
159159
void cleanupQueue(Int64 current_time_seconds, const ZooKeeperPtr & zookeeper);
160+
void cleanupStaleReplicas(Int64 current_time_seconds, const ZooKeeperPtr & zookeeper);
160161
virtual bool canRemoveQueueEntry(const String & entry_name, const Coordination::Stat & stat);
161162

162163
/// Init task node

0 commit comments

Comments
 (0)