MaterializeInc · aljoscha · Mar 18, 2025 · Mar 17, 2025
@@ -35,12 +35,14 @@ contact your Materialize support.
 {{</ note >}}
 
 
-### Graceful cluster resizing
+### Zero-downtime cluster resizing
 
-For clusters that do not contain sources or sinks, Materialize supports altering
-the cluster size with no downtime (i.e., graceful cluster resizing).
+For clusters that do not contain sources or sinks, Materialize supports
+altering the cluster size with no downtime (i.e., zero-downtime cluster
+resizing).
 
-For more information, see [Graceful cluster resizing](/sql/alter-cluster/#graceful-cluster-resizing).
+For more information, see [Zero-downtime cluster
+resizing](/sql/alter-cluster/#zero-downtime-cluster-resizing).
 
 ### Real-time recency
 

@@ -140,17 +140,18 @@ Depending on the type of objects in a cluster, a resizing operation might incur
   all objects in the cluster to hydrate.
 
 * For clusters that **do not contain sources or sinks**, it's possible to avoid
-  downtime by performing a [graceful cluster resizing](#graceful-cluster-resizing).
+  downtime by performing a [zero-downtime cluster
+  resizing](#zero-downtime-cluster-resizing).
 
-#### Graceful cluster resizing
+#### Zero-downtime cluster resizing
 
 {{< private-preview />}}
 
 For clusters that do not contain sources or sinks, you can use the `WAIT UNTIL
-READY` option to perform a graceful resizing, which incurs **no downtime**.
-Instead of restarting the cluster, this approach spins up an additional cluster
-replica under the covers with the desired new size, waits for the replica to be
-hydrated, and then replaces the original replica.
+READY` option to perform a zero-downtime resizing, which incurs **no
+downtime**. Instead of restarting the cluster, this approach spins up an
+additional cluster replica under the covers with the desired new size, waits
+for the replica to be hydrated, and then replaces the original replica.
 
 ```sql
 ALTER CLUSTER c1
@@ -255,19 +256,19 @@ or `1`.
 ### Resizing
 
 - For clusters **without any sources or sinks**, you can alter the cluster size
-  with **no downtime** (i.e., [graceful cluster
-  resizing](#graceful-cluster-resizing)) by running the `ALTER CLUSTER` command
-  with the `WAIT UNTIL READY` [option](#with-options):
+  with **no downtime** (i.e., [zero-downtime cluster
+  resizing](#zero-downtime-cluster-resizing)) by running the `ALTER CLUSTER`
+  command with the `WAIT UNTIL READY` [option](#with-options):
 
   ```mzsql
   ALTER CLUSTER c1
   SET (SIZE '100CC') WITH (WAIT UNTIL READY (TIMEOUT = '10m', ON TIMEOUT = 'COMMIT'));
   ````
 
 - For clusters **with sources or sinks**, it's not yet possible to perform
-  graceful cluster resizing. This means that resizing clusters with sources or
-  sinks requires a cluster **restart**, which incurs **downtime**. You can
-  alter the cluster size by running the `ALTER CLUSTER` command:
+  zero-downtime cluster resizing. This means that resizing clusters with
+  sources or sinks requires a cluster **restart**, which incurs **downtime**.
+  You can alter the cluster size by running the `ALTER CLUSTER` command:
 
   ```mzsql
   ALTER CLUSTER c1 SET (SIZE '100cc');

@@ -109,8 +109,9 @@ You can change the size of a cluster to respond to changes in your workload
 using [`ALTER CLUSTER`](/sql/alter-cluster). Depending on the type of objects
 the cluster is hosting, this operation **might incur downtime**.
 
-See the reference documentation for [`ALTER CLUSTER`](/sql/alter-cluster#graceful-cluster-resizing)
-for more details on cluster resizing.
+See the reference documentation for [`ALTER
+CLUSTER`](/sql/alter-cluster#zero-downtime-cluster-resizing) for more details
+on cluster resizing.
 
 #### Legacy sizes
 

@@ -282,8 +282,8 @@ impl Coordinator {
                     )
                     .await?;
                 if alter_followup == NeedsFinalization::Yes {
-                    // For non backgrounded graceful alters, store the cluster_id in the ConnMeta
-                    // to allow for cancellation.
+                    // For non backgrounded zero-downtime alters, store the
+                    // cluster_id in the ConnMeta to allow for cancellation.
                     self.active_conns
                         .get_mut(session.conn_id())
                         .expect("There must be an active connection")
@@ -1185,10 +1185,9 @@ impl Coordinator {
             || new_disk != disk
         {
             self.ensure_valid_azs(new_availability_zones.iter())?;
-            // If we're not doing a graceful reconfig
-            // tear down all replicas, create new ones
-            // else create the pending replicas and return
-            // early asking for finalization
+            // If we're not doing a zero-downtime reconfig tear down all
+            // replicas, create new ones else create the pending replicas and
+            // return early asking for finalization
             match strategy {
                 AlterClusterPlanStrategy::None => {
                     let replica_ids_and_reasons = (0..*replication_factor)

@@ -5895,7 +5895,7 @@ pub fn plan_alter_cluster(
                         AlterClusterPlanStrategy::None => {}
                         _ => {
                             scx.require_feature_flag(
-                                &crate::session::vars::ENABLE_GRACEFUL_CLUSTER_RECONFIGURATION,
+                                &crate::session::vars::ENABLE_ZERO_DOWNTIME_CLUSTER_RECONFIGURATION,
                             )?;
                         }
                     }

@@ -2126,8 +2126,8 @@ feature_flags!(
         enable_for_item_parsing: false,
     },
     {
-        name: enable_graceful_cluster_reconfiguration,
-        desc: "Enable graceful reconfiguration for alter cluster",
+        name: enable_zero_downtime_cluster_reconfiguration,
+        desc: "Enable zero-downtime reconfiguration for alter cluster",
         default: false,
         enable_for_item_parsing: false,
     },

diff --git a/test/cloudtest/test_managed_cluster.py b/test/cloudtest/test_managed_cluster.py
@@ -115,10 +115,10 @@ def test_managed_cluster_sizing(mz: MaterializeApplication) -> None:
         )
 
 
-def test_graceful_reconfiguration(mz: MaterializeApplication) -> None:
+def test_zero_downtime_reconfiguration(mz: MaterializeApplication) -> None:
     mz.environmentd.sql(
         """
-        ALTER SYSTEM SET enable_graceful_cluster_reconfiguration = true;
+        ALTER SYSTEM SET enable_zero_downtime_cluster_reconfiguration = true;
         """,
         port="internal",
         user="mz_system",
@@ -130,7 +130,7 @@ def assert_replica_names(names, allow_pending=False):
             SELECT mz_cluster_replicas.name
             FROM mz_cluster_replicas, mz_clusters
             WHERE mz_cluster_replicas.cluster_id = mz_clusters.id
-            AND mz_clusters.name = 'gracefulatlertest';
+            AND mz_clusters.name = 'zdtaltertest';
             """
         )
         assert [replica[0] for replica in replicas] == names
@@ -143,14 +143,14 @@ def assert_replica_names(names, allow_pending=False):
                         FROM mz_internal.mz_pending_cluster_replicas  ur
                         INNER join mz_cluster_replicas cr ON cr.id=ur.id
                         INNER join mz_clusters c ON c.id=cr.cluster_id
-                        WHERE c.name = 'gracefulatlertest';
+                        WHERE c.name = 'zdtaltertest';
                         """
                     )
                 )
                 == 0
             ), "There should be no pending replicas"
 
-    # Basic Graceful reocnfig test cases matrix
+    # Basic zero-downtime reconfig test cases matrix
     # - size change, no replica change
     # - replica size up, no other change
     # - replica size down, with size change
@@ -161,16 +161,16 @@ def assert_replica_names(names, allow_pending=False):
     # - names should match r# patter, not end with `-pending`
     # - cancelled statements correctly roll back
     # - timedout until ready queries take the appropriate action
-    # - Fails to gracefully alter cluster with source
+    # - Fails to zero-downtime alter cluster with source
     mz.environmentd.sql(
-        'CREATE CLUSTER gracefulatlertest ( SIZE = "1" )',
+        'CREATE CLUSTER zdtaltertest ( SIZE = "1" )',
         port="internal",
         user="mz_system",
     )
 
     mz.environmentd.sql(
         """
-        ALTER CLUSTER gracefulatlertest SET ( SIZE = '2' ) WITH ( WAIT FOR '1ms' )
+        ALTER CLUSTER zdtaltertest SET ( SIZE = '2' ) WITH ( WAIT FOR '1ms' )
         """,
         port="internal",
         user="mz_system",
@@ -179,7 +179,7 @@ def assert_replica_names(names, allow_pending=False):
 
     mz.environmentd.sql(
         """
-        ALTER CLUSTER gracefulatlertest SET ( SIZE = '1', REPLICATION FACTOR 2 ) WITH ( WAIT FOR '1ms' )
+        ALTER CLUSTER zdtaltertest SET ( SIZE = '1', REPLICATION FACTOR 2 ) WITH ( WAIT FOR '1ms' )
         """,
         port="internal",
         user="mz_system",
@@ -188,7 +188,7 @@ def assert_replica_names(names, allow_pending=False):
 
     mz.environmentd.sql(
         """
-        ALTER CLUSTER gracefulatlertest SET ( SIZE = '1', REPLICATION FACTOR 1 ) WITH ( WAIT FOR '1ms' )
+        ALTER CLUSTER zdtaltertest SET ( SIZE = '1', REPLICATION FACTOR 1 ) WITH ( WAIT FOR '1ms' )
         """,
         port="internal",
         user="mz_system",
@@ -197,7 +197,7 @@ def assert_replica_names(names, allow_pending=False):
 
     mz.environmentd.sql(
         """
-        ALTER CLUSTER gracefulatlertest SET ( SIZE = '2', REPLICATION FACTOR 2 ) WITH ( WAIT FOR '1ms' )
+        ALTER CLUSTER zdtaltertest SET ( SIZE = '2', REPLICATION FACTOR 2 ) WITH ( WAIT FOR '1ms' )
         """,
         port="internal",
         user="mz_system",
@@ -206,7 +206,7 @@ def assert_replica_names(names, allow_pending=False):
 
     mz.environmentd.sql(
         """
-        ALTER CLUSTER gracefulatlertest SET ( SIZE = '1', REPLICATION FACTOR 1 ) WITH ( WAIT FOR '1ms' )
+        ALTER CLUSTER zdtaltertest SET ( SIZE = '1', REPLICATION FACTOR 1 ) WITH ( WAIT FOR '1ms' )
         """,
         port="internal",
         user="mz_system",
@@ -217,42 +217,42 @@ def assert_replica_names(names, allow_pending=False):
     # replica checks during alter
     mz.environmentd.sql(
         """
-        DROP CLUSTER IF EXISTS gracefulatlertest CASCADE;
+        DROP CLUSTER IF EXISTS zdtaltertest CASCADE;
         DROP TABLE IF EXISTS t CASCADE;
 
-        CREATE CLUSTER gracefulatlertest ( SIZE = '1');
+        CREATE CLUSTER zdtaltertest ( SIZE = '1');
 
-        SET CLUSTER = gracefulatlertest;
+        SET CLUSTER = zdtaltertest;
 
         -- now let's give it another go with user-defined objects
         CREATE TABLE t (a int);
         CREATE DEFAULT INDEX ON t;
         INSERT INTO t VALUES (42);
-        GRANT ALL ON CLUSTER gracefulatlertest TO materialize;
+        GRANT ALL ON CLUSTER zdtaltertest TO materialize;
         """,
         port="internal",
         user="mz_system",
     )
 
     # Valudate replicas are correct during an ongoing alter
-    def gracefully_alter():
+    def zero_downtime_alter():
         mz.environmentd.sql(
             """
-            ALTER CLUSTER gracefulatlertest SET (SIZE = '2') WITH ( WAIT FOR '5s')
+            ALTER CLUSTER zdtaltertest SET (SIZE = '2') WITH ( WAIT FOR '5s')
             """,
             port="internal",
             user="mz_system",
         )
 
-    thread = Thread(target=gracefully_alter)
+    thread = Thread(target=zero_downtime_alter)
     thread.start()
     time.sleep(1)
 
     assert_replica_names(["r1", "r1-pending"], allow_pending=True)
     assert (
         mz.environmentd.sql_query(
             """
-        SELECT size FROM mz_clusters WHERE name='gracefulatlertest';
+        SELECT size FROM mz_clusters WHERE name='zdtaltertest';
         """
         )
         == (["1"],)
@@ -264,7 +264,7 @@ def gracefully_alter():
     assert (
         mz.environmentd.sql_query(
             """
-        SELECT size FROM mz_clusters WHERE name='gracefulatlertest';
+        SELECT size FROM mz_clusters WHERE name='zdtaltertest';
         """
         )
         == (["2"],)
@@ -332,11 +332,11 @@ def query_with_conn(
         == (["1"],)
     ), "Cluster should not have updated if canceled during alter"
 
-    # Test graceful reconfig wait until ready
+    # Test zero-downtime reconfig wait until ready
     mz.environmentd.sql(
         """
         DROP CLUSTER IF EXISTS cluster1 CASCADE;
-        DROP CLUSTER IF EXISTS gracefulaltertest CASCADE;
+        DROP CLUSTER IF EXISTS zdtaltertest CASCADE;
         """,
         port="internal",
         user="mz_system",

diff --git a/test/cluster/mzcompose.py b/test/cluster/mzcompose.py
@@ -4934,11 +4934,11 @@ def workflow_test_unified_introspection_during_replica_disconnect(c: Composition
         )
 
 
-def workflow_test_graceful_reconfigure(
+def workflow_test_zero_downtime_reconfigure(
     c: Composition, parser: WorkflowArgumentParser
 ) -> None:
     """
-    Tests gracefully reconfiguring a managed cluster
+    Tests reconfiguring a managed cluster with zero downtime
     """
     c.down(destroy_volumes=True)
     with c.override(
@@ -4949,7 +4949,7 @@ def workflow_test_graceful_reconfigure(
         c.up("clusterd1")
         c.sql(
             """
-            ALTER SYSTEM SET enable_graceful_cluster_reconfiguration = true;
+            ALTER SYSTEM SET enable_zero_downtime_cluster_reconfiguration = true;
 
             DROP CLUSTER IF EXISTS cluster1 CASCADE;
             DROP TABLE IF EXISTS t CASCADE;
@@ -4991,7 +4991,7 @@ def workflow_test_graceful_reconfigure(
             len(replicas) == 0
         ), f"Cluster should only have no pending replica prior to alter, found {replicas}"
 
-        def gracefully_alter():
+        def zero_downtime_alter():
             try:
                 c.sql(
                     """
@@ -5005,7 +5005,7 @@ def gracefully_alter():
                 pass
 
         # Run a reconfigure
-        thread = Thread(target=gracefully_alter)
+        thread = Thread(target=zero_downtime_alter)
         thread.start()
         time.sleep(3)
 
@@ -5059,7 +5059,7 @@ def gracefully_alter():
         )
         c.sql(
             """
-            ALTER SYSTEM RESET enable_graceful_cluster_reconfiguration;
+            ALTER SYSTEM RESET enable_zero_downtime_cluster_reconfiguration;
             """,
             port=6877,
             user="mz_system",

diff --git a/test/sqllogictest/managed_cluster.slt b/test/sqllogictest/managed_cluster.slt
@@ -446,7 +446,7 @@ DROP CLUSTER foo
 
 
 simple conn=mz_system,user=mz_system
-ALTER SYSTEM SET enable_graceful_cluster_reconfiguration = true;
+ALTER SYSTEM SET enable_zero_downtime_cluster_reconfiguration = true;
 ----
 COMPLETE 0