Skip to content

Commit 9642c42

Browse files
committed
SYSTEM PRESHUTDOWN to allow graceful shutdown node
1 parent a5892c1 commit 9642c42

31 files changed

+227
-18
lines changed

docs/en/sql-reference/statements/system.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,12 @@ SYSTEM RELOAD USERS [ON CLUSTER cluster_name]
205205
206206
Normally shuts down ClickHouse (like `service clickhouse-server stop` / `kill {$pid_clickhouse-server}`)
207207
208+
## PRESHUTDOWN {#preshutdown}
209+
210+
<CloudNotSupportedBadge/>
211+
212+
Prepare node for graceful shutdown. Unregister in autodiscovered clusters, stop accepting distributed requests to object storages (s3Cluster, icebergCluster, etc.).
213+
208214
## KILL {#kill}
209215
210216
Aborts ClickHouse process (like `kill -9 {$ pid_clickhouse-server}`)

src/Access/Common/AccessType.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ enum class AccessType : uint8_t
162162
\
163163
M(TABLE_ENGINE, "TABLE ENGINE", TABLE_ENGINE, ALL) \
164164
\
165-
M(SYSTEM_SHUTDOWN, "SYSTEM KILL, SHUTDOWN", GLOBAL, SYSTEM) \
165+
M(SYSTEM_SHUTDOWN, "SYSTEM KILL, SHUTDOWN, PRESHUTDOWN", GLOBAL, SYSTEM) \
166166
M(SYSTEM_DROP_DNS_CACHE, "SYSTEM DROP DNS, DROP DNS CACHE, DROP DNS", GLOBAL, SYSTEM_DROP_CACHE) \
167167
M(SYSTEM_DROP_CONNECTIONS_CACHE, "SYSTEM DROP CONNECTIONS CACHE, DROP CONNECTIONS CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
168168
M(SYSTEM_PREWARM_MARK_CACHE, "SYSTEM PREWARM MARK, PREWARM MARK CACHE, PREWARM MARKS", GLOBAL, SYSTEM_DROP_CACHE) \

src/Interpreters/ClusterDiscovery.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -454,12 +454,30 @@ void ClusterDiscovery::registerInZk(zkutil::ZooKeeperPtr & zk, ClusterInfo & inf
454454
return;
455455
}
456456

457+
if (context->isPreShutdownCalled())
458+
{
459+
LOG_DEBUG(log, "PreShutdown called, skip self-registering current node {} in cluster {}", current_node_name, info.name);
460+
return;
461+
}
462+
457463
LOG_DEBUG(log, "Registering current node {} in cluster {}", current_node_name, info.name);
458464

459465
zk->createOrUpdate(node_path, info.current_node.serialize(), zkutil::CreateMode::Ephemeral);
460466
LOG_DEBUG(log, "Current node {} registered in cluster {}", current_node_name, info.name);
461467
}
462468

469+
void ClusterDiscovery::unregisterFromZk(zkutil::ZooKeeperPtr & zk, ClusterInfo & info)
470+
{
471+
if (info.current_node_is_observer)
472+
return;
473+
474+
String node_path = getShardsListPath(info.zk_root) / current_node_name;
475+
LOG_DEBUG(log, "Removing current node {} from cluster {}", current_node_name, info.name);
476+
477+
zk->remove(node_path);
478+
LOG_DEBUG(log, "Current node {} removed from cluster {}", current_node_name, info.name);
479+
}
480+
463481
void ClusterDiscovery::initialUpdate()
464482
{
465483
LOG_DEBUG(log, "Initializing");
@@ -505,6 +523,15 @@ void ClusterDiscovery::initialUpdate()
505523
is_initialized = true;
506524
}
507525

526+
void ClusterDiscovery::unregisterAll()
527+
{
528+
for (auto & [_, info] : clusters_info)
529+
{
530+
auto zk = context->getDefaultOrAuxiliaryZooKeeper(info.zk_name);
531+
unregisterFromZk(zk, info);
532+
}
533+
}
534+
508535
void ClusterDiscovery::findDynamicClusters(
509536
std::unordered_map<String, ClusterDiscovery::ClusterInfo> & info,
510537
std::unordered_set<size_t> * unchanged_roots)

src/Interpreters/ClusterDiscovery.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ class ClusterDiscovery
3838

3939
~ClusterDiscovery();
4040

41+
void unregisterAll();
42+
4143
private:
4244
struct NodeInfo
4345
{
@@ -125,6 +127,7 @@ class ClusterDiscovery
125127
void initialUpdate();
126128

127129
void registerInZk(zkutil::ZooKeeperPtr & zk, ClusterInfo & info);
130+
void unregisterFromZk(zkutil::ZooKeeperPtr & zk, ClusterInfo & info);
128131

129132
Strings getNodeNames(zkutil::ZooKeeperPtr & zk,
130133
const String & zk_root,

src/Interpreters/Context.cpp

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -566,6 +566,7 @@ struct ContextSharedPart : boost::noncopyable
566566
std::map<String, UInt16> server_ports;
567567

568568
std::atomic<bool> shutdown_called = false;
569+
std::atomic<bool> preshutdown_called = false;
569570

570571
Stopwatch uptime_watch TSA_GUARDED_BY(mutex);
571572

@@ -913,6 +914,11 @@ struct ContextSharedPart : boost::noncopyable
913914
total_memory_tracker.resetPageCache();
914915
}
915916

917+
void preShutdown()
918+
{
919+
preshutdown_called = true;
920+
}
921+
916922
bool hasTraceCollector() const
917923
{
918924
return trace_collector.has_value();
@@ -4481,7 +4487,6 @@ std::shared_ptr<Cluster> Context::getCluster(const std::string & cluster_name) c
44814487
throw Exception(ErrorCodes::CLUSTER_DOESNT_EXIST, "Requested cluster '{}' not found", cluster_name);
44824488
}
44834489

4484-
44854490
std::shared_ptr<Cluster> Context::tryGetCluster(const std::string & cluster_name) const
44864491
{
44874492
std::shared_ptr<Cluster> res = nullptr;
@@ -4500,6 +4505,13 @@ std::shared_ptr<Cluster> Context::tryGetCluster(const std::string & cluster_name
45004505
return res;
45014506
}
45024507

4508+
void Context::unregisterInDynamicClusters()
4509+
{
4510+
std::lock_guard lock(shared->clusters_mutex);
4511+
if (!shared->cluster_discovery)
4512+
return;
4513+
shared->cluster_discovery->unregisterAll();
4514+
}
45034515

45044516
void Context::reloadClusterConfig() const
45054517
{
@@ -5350,12 +5362,20 @@ void Context::stopServers(const ServerType & server_type) const
53505362
shared->stop_servers_callback(server_type);
53515363
}
53525364

5353-
53545365
void Context::shutdown() TSA_NO_THREAD_SAFETY_ANALYSIS
53555366
{
53565367
shared->shutdown();
53575368
}
53585369

5370+
void Context::preShutdown()
5371+
{
5372+
shared->preshutdown_called = true;
5373+
}
5374+
5375+
bool Context::isPreShutdownCalled() const
5376+
{
5377+
return shared->preshutdown_called;
5378+
}
53595379

53605380
Context::ApplicationType Context::getApplicationType() const
53615381
{

src/Interpreters/Context.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1225,6 +1225,7 @@ class Context: public ContextData, public std::enable_shared_from_this<Context>
12251225
size_t getClustersVersion() const;
12261226

12271227
void startClusterDiscovery();
1228+
void unregisterInDynamicClusters();
12281229

12291230
/// Sets custom cluster, but doesn't update configuration
12301231
void setCluster(const String & cluster_name, const std::shared_ptr<Cluster> & cluster);
@@ -1335,6 +1336,10 @@ class Context: public ContextData, public std::enable_shared_from_this<Context>
13351336

13361337
void shutdown();
13371338

1339+
/// Stop some works to allow graceful shutdown later
1340+
void preShutdown();
1341+
bool isPreShutdownCalled() const;
1342+
13381343
bool isInternalQuery() const { return is_internal_query; }
13391344
void setInternalQuery(bool internal) { is_internal_query = internal; }
13401345

src/Interpreters/InterpreterSystemQuery.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,13 @@ BlockIO InterpreterSystemQuery::execute()
330330
throw ErrnoException(ErrorCodes::CANNOT_KILL, "System call kill(0, SIGTERM) failed");
331331
break;
332332
}
333+
case Type::PRESHUTDOWN:
334+
{
335+
getContext()->checkAccess(AccessType::SYSTEM_SHUTDOWN);
336+
getContext()->preShutdown();
337+
getContext()->unregisterInDynamicClusters();
338+
break;
339+
}
333340
case Type::KILL:
334341
{
335342
getContext()->checkAccess(AccessType::SYSTEM_SHUTDOWN);
@@ -1468,6 +1475,7 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster()
14681475
switch (query.type)
14691476
{
14701477
case Type::SHUTDOWN:
1478+
case Type::PRESHUTDOWN:
14711479
case Type::KILL:
14721480
case Type::SUSPEND:
14731481
{

src/Parsers/ASTSystemQuery.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,7 @@ void ASTSystemQuery::formatImpl(WriteBuffer & ostr, const FormatSettings & setti
443443
}
444444
case Type::KILL:
445445
case Type::SHUTDOWN:
446+
case Type::PRESHUTDOWN:
446447
case Type::DROP_DNS_CACHE:
447448
case Type::DROP_CONNECTIONS_CACHE:
448449
case Type::DROP_MMAP_CACHE:

src/Parsers/ASTSystemQuery.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ class ASTSystemQuery : public IAST, public ASTQueryWithOnCluster
1717
{
1818
UNKNOWN,
1919
SHUTDOWN,
20+
PRESHUTDOWN,
2021
KILL,
2122
SUSPEND,
2223
DROP_DNS_CACHE,

src/QueryPipeline/RemoteQueryExecutor.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -984,6 +984,11 @@ void RemoteQueryExecutor::setProfileInfoCallback(ProfileInfoCallback callback)
984984
profile_info_callback = std::move(callback);
985985
}
986986

987+
bool RemoteQueryExecutor::skipUnavailableShards() const
988+
{
989+
return context->getSettingsRef()[Setting::skip_unavailable_shards];
990+
}
991+
987992
bool RemoteQueryExecutor::needToSkipUnavailableShard() const
988993
{
989994
return context->getSettingsRef()[Setting::skip_unavailable_shards] && (0 == connections->size());

0 commit comments

Comments
 (0)