Skip to content

Commit 05cb032

Browse files
SangJunBakdef-
authored andcommitted
conditionally create mz_system and mz_probe cluster replicas (MaterializeInc#31452)
See commit messages for details <!-- Describe the contents of the PR briefly but completely. If you write detailed commit messages, it is acceptable to copy/paste them here, or write "see commit messages for details." If there is only one commit in the PR, GitHub will have already added its commit message above. --> * This PR adds a known-desirable feature. MaterializeInc/database-issues#8954 <!-- Which of the following best describes the motivation behind this PR? * This PR fixes a recognized bug. [Ensure issue is linked somewhere.] [Ensure issue is linked somewhere.] * This PR fixes a previously unreported bug. [Describe the bug in detail, as if you were filing a bug report.] * This PR adds a feature that has not yet been specified. [Write a brief specification for the feature, including justification for its inclusion in Materialize, as if you were writing the original feature specification.] * This PR refactors existing code. [Describe what was wrong with the existing code, if it is not obvious.] --> <!-- Leave some tips for your reviewer, like: * The diff is much smaller if viewed with whitespace hidden. * [Some function/module/file] deserves extra attention. * [Some function/module/file] is pure code movement and only needs a skim. Delete this section if no tips. --> - [ ] This PR has adequate test coverage / QA involvement has been duly considered. ([trigger-ci for additional test/nightly runs](https://trigger-ci.dev.materialize.com/)) - [ ] This PR has an associated up-to-date [design doc](https://github.com/MaterializeInc/materialize/blob/main/doc/developer/design/README.md), is a design doc ([template](https://github.com/MaterializeInc/materialize/blob/main/doc/developer/design/00000000_template.md)), or is sufficiently small to not require a design. <!-- Reference the design in the description. --> - [ ] If this PR evolves [an existing `$T ⇔ Proto$T` mapping](https://github.com/MaterializeInc/materialize/blob/main/doc/developer/command-and-response-binary-encoding.md) (possibly in a backwards-incompatible way), then it is tagged with a `T-proto` label. - [ ] If this PR will require changes to cloud orchestration or tests, there is a companion cloud PR to account for those changes that is tagged with the release-blocker label ([example](MaterializeInc/cloud#5021)). <!-- Ask in #team-cloud on Slack if you need help preparing the cloud PR. --> - [ ] If this PR includes major [user-facing behavior changes](https://github.com/MaterializeInc/materialize/blob/main/doc/developer/guide-changes.md#what-changes-require-a-release-note), I have pinged the relevant PM to schedule a changelog post.
1 parent ce24938 commit 05cb032

File tree

24 files changed

+475
-139
lines changed

24 files changed

+475
-139
lines changed

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

misc/helm-charts/operator/README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,10 @@ The following table lists the configurable parameters of the Materialize operato
125125
| `operator.cloudProvider.providers.gcp` | GCP Configuration (placeholder for future use) | ``{"enabled":false}`` |
126126
| `operator.cloudProvider.region` | Common cloud provider settings | ``"kind"`` |
127127
| `operator.cloudProvider.type` | Specifies cloud provider. Valid values are 'aws', 'gcp', 'azure' , 'generic', or 'local' | ``"local"`` |
128+
| `operator.clusters.defaultReplicationFactor.analytics` | | ``0`` |
129+
| `operator.clusters.defaultReplicationFactor.probe` | | ``0`` |
130+
| `operator.clusters.defaultReplicationFactor.support` | | ``0`` |
131+
| `operator.clusters.defaultReplicationFactor.system` | | ``0`` |
128132
| `operator.clusters.defaultSizes.analytics` | | ``"25cc"`` |
129133
| `operator.clusters.defaultSizes.catalogServer` | | ``"50cc"`` |
130134
| `operator.clusters.defaultSizes.default` | | ``"25cc"`` |

misc/helm-charts/operator/templates/deployment.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,18 @@ spec:
103103
{{ if .Values.operator.clusters.defaultSizes.analytics }}
104104
- "--bootstrap-builtin-analytics-cluster-replica-size={{ .Values.operator.clusters.defaultSizes.analytics }}"
105105
{{- end }}
106+
{{ if ne .Values.operator.clusters.defaultReplicationFactor.system nil }}
107+
- "--bootstrap-builtin-system-cluster-replication-factor={{ .Values.operator.clusters.defaultReplicationFactor.system }}"
108+
{{- end }}
109+
{{ if ne .Values.operator.clusters.defaultReplicationFactor.probe nil }}
110+
- "--bootstrap-builtin-probe-cluster-replication-factor={{ .Values.operator.clusters.defaultReplicationFactor.probe }}"
111+
{{- end }}
112+
{{ if ne .Values.operator.clusters.defaultReplicationFactor.support nil }}
113+
- "--bootstrap-builtin-support-cluster-replication-factor={{ .Values.operator.clusters.defaultReplicationFactor.support }}"
114+
{{- end }}
115+
{{ if ne .Values.operator.clusters.defaultReplicationFactor.analytics nil }}
116+
- "--bootstrap-builtin-analytics-cluster-replication-factor={{ .Values.operator.clusters.defaultReplicationFactor.analytics }}"
117+
{{- end }}
106118
{{- end }}
107119
- "--image-pull-policy={{ kebabcase .Values.operator.image.pullPolicy }}"
108120
{{- range $key, $value := .Values.environmentd.nodeSelector }}

misc/helm-charts/operator/values.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,11 @@ operator:
199199
support: 25cc
200200
catalogServer: 50cc
201201
analytics: 25cc
202+
defaultReplicationFactor:
203+
system: 0
204+
probe: 0
205+
support: 0
206+
analytics: 0
202207

203208
# Node selector to use for the operator pod
204209
nodeSelector: {}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// Copyright Materialize, Inc. and contributors. All rights reserved.
2+
//
3+
// Use of this software is governed by the Business Source License
4+
// included in the LICENSE file.
5+
//
6+
// As of the Change Date specified in that file, in accordance with
7+
// the Business Source License, use of this software will be governed
8+
// by the Apache License, Version 2.0.
9+
10+
//! Types for bootstrap builtin cluster configuration.
11+
12+
#[derive(Debug, Clone)]
13+
pub struct BootstrapBuiltinClusterConfig {
14+
pub size: String,
15+
pub replication_factor: u32,
16+
}
17+
18+
pub const SYSTEM_CLUSTER_DEFAULT_REPLICATION_FACTOR: u32 = 1;
19+
pub const CATALOG_SERVER_CLUSTER_DEFAULT_REPLICATION_FACTOR: u32 = 1;
20+
pub const PROBE_CLUSTER_DEFAULT_REPLICATION_FACTOR: u32 = 1;
21+
// Support and analytics clusters are ephemeral - they are only spun up temporarily when needed.
22+
// Since they are short-lived, they don't need replication by default.
23+
pub const SUPPORT_CLUSTER_DEFAULT_REPLICATION_FACTOR: u32 = 0;
24+
pub const ANALYTICS_CLUSTER_DEFAULT_REPLICATION_FACTOR: u32 = 0;

src/adapter-types/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
//! Types for the adapter.
1111
12+
pub mod bootstrap_builtin_cluster_config;
1213
pub mod compaction;
1314
pub mod connection;
1415
pub mod dyncfgs;

src/adapter/src/catalog.rs

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@ use std::sync::Arc;
1919
use futures::future::BoxFuture;
2020
use futures::{Future, FutureExt};
2121
use itertools::Itertools;
22+
use mz_adapter_types::bootstrap_builtin_cluster_config::{
23+
BootstrapBuiltinClusterConfig, ANALYTICS_CLUSTER_DEFAULT_REPLICATION_FACTOR,
24+
CATALOG_SERVER_CLUSTER_DEFAULT_REPLICATION_FACTOR, PROBE_CLUSTER_DEFAULT_REPLICATION_FACTOR,
25+
SUPPORT_CLUSTER_DEFAULT_REPLICATION_FACTOR, SYSTEM_CLUSTER_DEFAULT_REPLICATION_FACTOR,
26+
};
2227
use mz_adapter_types::connection::ConnectionId;
2328
use mz_audit_log::{EventType, FullNameV1, ObjectType, VersionedStorageUsage};
2429
use mz_build_info::DUMMY_BUILD_INFO;
@@ -691,11 +696,26 @@ impl Catalog {
691696
boot_ts: previous_ts,
692697
skip_migrations: true,
693698
cluster_replica_sizes: bootstrap_args.cluster_replica_size_map.clone(),
694-
builtin_system_cluster_replica_size: replica_size.clone(),
695-
builtin_catalog_server_cluster_replica_size: replica_size.clone(),
696-
builtin_probe_cluster_replica_size: replica_size.clone(),
697-
builtin_support_cluster_replica_size: replica_size.clone(),
698-
builtin_analytics_cluster_replica_size: replica_size.clone(),
699+
builtin_system_cluster_config: BootstrapBuiltinClusterConfig {
700+
size: replica_size.clone(),
701+
replication_factor: SYSTEM_CLUSTER_DEFAULT_REPLICATION_FACTOR,
702+
},
703+
builtin_catalog_server_cluster_config: BootstrapBuiltinClusterConfig {
704+
size: replica_size.clone(),
705+
replication_factor: CATALOG_SERVER_CLUSTER_DEFAULT_REPLICATION_FACTOR,
706+
},
707+
builtin_probe_cluster_config: BootstrapBuiltinClusterConfig {
708+
size: replica_size.clone(),
709+
replication_factor: PROBE_CLUSTER_DEFAULT_REPLICATION_FACTOR,
710+
},
711+
builtin_support_cluster_config: BootstrapBuiltinClusterConfig {
712+
size: replica_size.clone(),
713+
replication_factor: SUPPORT_CLUSTER_DEFAULT_REPLICATION_FACTOR,
714+
},
715+
builtin_analytics_cluster_config: BootstrapBuiltinClusterConfig {
716+
size: replica_size.clone(),
717+
replication_factor: ANALYTICS_CLUSTER_DEFAULT_REPLICATION_FACTOR,
718+
},
699719
system_parameter_defaults,
700720
remote_system_parameters: None,
701721
availability_zones: vec![],

src/adapter/src/catalog/open.rs

Lines changed: 53 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ use std::time::{Duration, Instant};
1717

1818
use futures::future::{BoxFuture, FutureExt};
1919
use itertools::{Either, Itertools};
20+
use mz_adapter_types::bootstrap_builtin_cluster_config::BootstrapBuiltinClusterConfig;
2021
use mz_adapter_types::compaction::CompactionWindow;
2122
use mz_adapter_types::dyncfgs::{ENABLE_CONTINUAL_TASK_BUILTINS, ENABLE_EXPRESSION_CACHE};
2223
use mz_catalog::builtin::{
@@ -317,22 +318,22 @@ impl Catalog {
317318
// Add any new builtin objects and remove old ones.
318319
let (migrated_builtins, new_builtin_collections) =
319320
add_new_remove_old_builtin_items_migration(&state.config().builtins_cfg, &mut txn)?;
320-
let cluster_sizes = BuiltinBootstrapClusterSizes {
321-
system_cluster: config.builtin_system_cluster_replica_size,
322-
catalog_server_cluster: config.builtin_catalog_server_cluster_replica_size,
323-
probe_cluster: config.builtin_probe_cluster_replica_size,
324-
support_cluster: config.builtin_support_cluster_replica_size,
325-
analytics_cluster: config.builtin_analytics_cluster_replica_size,
321+
let builtin_bootstrap_cluster_config_map = BuiltinBootstrapClusterConfigMap {
322+
system_cluster: config.builtin_system_cluster_config,
323+
catalog_server_cluster: config.builtin_catalog_server_cluster_config,
324+
probe_cluster: config.builtin_probe_cluster_config,
325+
support_cluster: config.builtin_support_cluster_config,
326+
analytics_cluster: config.builtin_analytics_cluster_config,
326327
};
327328
add_new_remove_old_builtin_clusters_migration(
328329
&mut txn,
329-
&cluster_sizes,
330+
&builtin_bootstrap_cluster_config_map,
330331
&state.cluster_replica_sizes,
331332
)?;
332333
add_new_remove_old_builtin_introspection_source_migration(&mut txn)?;
333334
add_new_remove_old_builtin_cluster_replicas_migration(
334335
&mut txn,
335-
&cluster_sizes,
336+
&builtin_bootstrap_cluster_config_map,
336337
&state.cluster_replica_sizes,
337338
)?;
338339
add_new_remove_old_builtin_roles_migration(&mut txn)?;
@@ -1222,7 +1223,7 @@ fn add_new_remove_old_builtin_items_migration(
12221223

12231224
fn add_new_remove_old_builtin_clusters_migration(
12241225
txn: &mut mz_catalog::durable::Transaction<'_>,
1225-
builtin_cluster_sizes: &BuiltinBootstrapClusterSizes,
1226+
builtin_cluster_config_map: &BuiltinBootstrapClusterConfigMap,
12261227
cluster_sizes: &ClusterReplicaSizeMap,
12271228
) -> Result<(), mz_catalog::durable::CatalogError> {
12281229
let mut durable_clusters: BTreeMap<_, _> = txn
@@ -1234,18 +1235,19 @@ fn add_new_remove_old_builtin_clusters_migration(
12341235
// Add new clusters.
12351236
for builtin_cluster in BUILTIN_CLUSTERS {
12361237
if durable_clusters.remove(builtin_cluster.name).is_none() {
1237-
let cluster_size = builtin_cluster_sizes.get_size(builtin_cluster.name)?;
1238-
let cluster_allocation = cluster_sizes.get_allocation_by_name(&cluster_size)?;
1238+
let cluster_config = builtin_cluster_config_map.get_config(builtin_cluster.name)?;
1239+
let cluster_allocation = cluster_sizes.get_allocation_by_name(&cluster_config.size)?;
1240+
12391241
txn.insert_system_cluster(
12401242
builtin_cluster.name,
12411243
vec![],
12421244
builtin_cluster.privileges.to_vec(),
12431245
builtin_cluster.owner_id.to_owned(),
12441246
mz_catalog::durable::ClusterConfig {
12451247
variant: mz_catalog::durable::ClusterVariant::Managed(ClusterVariantManaged {
1246-
size: cluster_size,
1248+
size: cluster_config.size,
12471249
availability_zones: vec![],
1248-
replication_factor: builtin_cluster.replication_factor,
1250+
replication_factor: cluster_config.replication_factor,
12491251
disk: cluster_allocation.is_cc,
12501252
logging: default_logging_config(),
12511253
optimizer_feature_overrides: Default::default(),
@@ -1335,7 +1337,7 @@ fn add_new_remove_old_builtin_roles_migration(
13351337

13361338
fn add_new_remove_old_builtin_cluster_replicas_migration(
13371339
txn: &mut Transaction<'_>,
1338-
builtin_cluster_sizes: &BuiltinBootstrapClusterSizes,
1340+
builtin_cluster_config_map: &BuiltinBootstrapClusterConfigMap,
13391341
cluster_sizes: &ClusterReplicaSizeMap,
13401342
) -> Result<(), AdapterError> {
13411343
let cluster_lookup: BTreeMap<_, _> = txn
@@ -1363,12 +1365,18 @@ fn add_new_remove_old_builtin_cluster_replicas_migration(
13631365
let replica_names = durable_replicas
13641366
.get_mut(&cluster.id)
13651367
.unwrap_or(&mut empty_map);
1366-
if replica_names.remove(builtin_replica.name).is_none() {
1368+
1369+
let builtin_cluster_boostrap_config =
1370+
builtin_cluster_config_map.get_config(builtin_replica.cluster_name)?;
1371+
if replica_names.remove(builtin_replica.name).is_none()
1372+
// NOTE(SangJunBak): We need to explicitly check the replication factor because
1373+
// BUILT_IN_CLUSTER_REPLICAS is constant throughout all deployments but the replication
1374+
// factor is configurable on bootstrap.
1375+
&& builtin_cluster_boostrap_config.replication_factor > 0
1376+
{
13671377
let replica_size = match cluster.config.variant {
13681378
ClusterVariant::Managed(ClusterVariantManaged { ref size, .. }) => size.clone(),
1369-
ClusterVariant::Unmanaged => {
1370-
builtin_cluster_sizes.get_size(builtin_replica.cluster_name)?
1371-
}
1379+
ClusterVariant::Unmanaged => builtin_cluster_boostrap_config.size,
13721380
};
13731381
let replica_allocation = cluster_sizes.get_allocation_by_name(&replica_size)?;
13741382

@@ -1482,37 +1490,43 @@ fn default_logging_config() -> ReplicaLogging {
14821490
interval: Some(Duration::from_secs(1)),
14831491
}
14841492
}
1485-
pub struct BuiltinBootstrapClusterSizes {
1486-
/// Size to default system_cluster on bootstrap
1487-
pub system_cluster: String,
1488-
/// Size to default catalog_server_cluster on bootstrap
1489-
pub catalog_server_cluster: String,
1490-
/// Size to default probe_cluster on bootstrap
1491-
pub probe_cluster: String,
1492-
/// Size to default support_cluster on bootstrap
1493-
pub support_cluster: String,
1493+
1494+
#[derive(Debug)]
1495+
pub struct BuiltinBootstrapClusterConfigMap {
1496+
/// Size and replication factor to default system_cluster on bootstrap
1497+
pub system_cluster: BootstrapBuiltinClusterConfig,
1498+
/// Size and replication factor to default catalog_server_cluster on bootstrap
1499+
pub catalog_server_cluster: BootstrapBuiltinClusterConfig,
1500+
/// Size and replication factor to default probe_cluster on bootstrap
1501+
pub probe_cluster: BootstrapBuiltinClusterConfig,
1502+
/// Size and replication factor to default support_cluster on bootstrap
1503+
pub support_cluster: BootstrapBuiltinClusterConfig,
14941504
/// Size to default analytics_cluster on bootstrap
1495-
pub analytics_cluster: String,
1505+
pub analytics_cluster: BootstrapBuiltinClusterConfig,
14961506
}
14971507

1498-
impl BuiltinBootstrapClusterSizes {
1508+
impl BuiltinBootstrapClusterConfigMap {
14991509
/// Gets the size of the builtin cluster based on the provided name
1500-
fn get_size(&self, cluster_name: &str) -> Result<String, mz_catalog::durable::CatalogError> {
1501-
if cluster_name == mz_catalog::builtin::MZ_SYSTEM_CLUSTER.name {
1502-
Ok(self.system_cluster.clone())
1510+
fn get_config(
1511+
&self,
1512+
cluster_name: &str,
1513+
) -> Result<BootstrapBuiltinClusterConfig, mz_catalog::durable::CatalogError> {
1514+
let cluster_config = if cluster_name == mz_catalog::builtin::MZ_SYSTEM_CLUSTER.name {
1515+
&self.system_cluster
15031516
} else if cluster_name == mz_catalog::builtin::MZ_CATALOG_SERVER_CLUSTER.name {
1504-
Ok(self.catalog_server_cluster.clone())
1517+
&self.catalog_server_cluster
15051518
} else if cluster_name == mz_catalog::builtin::MZ_PROBE_CLUSTER.name {
1506-
Ok(self.probe_cluster.clone())
1519+
&self.probe_cluster
15071520
} else if cluster_name == mz_catalog::builtin::MZ_SUPPORT_CLUSTER.name {
1508-
Ok(self.support_cluster.clone())
1521+
&self.support_cluster
15091522
} else if cluster_name == mz_catalog::builtin::MZ_ANALYTICS_CLUSTER.name {
1510-
Ok(self.analytics_cluster.clone())
1523+
&self.analytics_cluster
15111524
} else {
1512-
Err(mz_catalog::durable::CatalogError::Catalog(
1525+
return Err(mz_catalog::durable::CatalogError::Catalog(
15131526
SqlCatalogError::UnexpectedBuiltinCluster(cluster_name.to_owned()),
1514-
))
1515-
}
1527+
));
1528+
};
1529+
Ok(cluster_config.clone())
15161530
}
15171531
}
15181532

src/adapter/src/coord.rs

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ use futures::StreamExt;
8888
use http::Uri;
8989
use ipnet::IpNet;
9090
use itertools::{Either, Itertools};
91+
use mz_adapter_types::bootstrap_builtin_cluster_config::BootstrapBuiltinClusterConfig;
9192
use mz_adapter_types::compaction::CompactionWindow;
9293
use mz_adapter_types::connection::ConnectionId;
9394
use mz_adapter_types::dyncfgs::WITH_0DT_DEPLOYMENT_CAUGHT_UP_CHECK_INTERVAL;
@@ -991,11 +992,11 @@ pub struct Config {
991992
pub cloud_resource_controller: Option<Arc<dyn CloudResourceController>>,
992993
pub availability_zones: Vec<String>,
993994
pub cluster_replica_sizes: ClusterReplicaSizeMap,
994-
pub builtin_system_cluster_replica_size: String,
995-
pub builtin_catalog_server_cluster_replica_size: String,
996-
pub builtin_probe_cluster_replica_size: String,
997-
pub builtin_support_cluster_replica_size: String,
998-
pub builtin_analytics_cluster_replica_size: String,
995+
pub builtin_system_cluster_config: BootstrapBuiltinClusterConfig,
996+
pub builtin_catalog_server_cluster_config: BootstrapBuiltinClusterConfig,
997+
pub builtin_probe_cluster_config: BootstrapBuiltinClusterConfig,
998+
pub builtin_support_cluster_config: BootstrapBuiltinClusterConfig,
999+
pub builtin_analytics_cluster_config: BootstrapBuiltinClusterConfig,
9991000
pub system_parameter_defaults: BTreeMap<String, String>,
10001001
pub storage_usage_client: StorageUsageClient,
10011002
pub storage_usage_collection_interval: Duration,
@@ -3816,11 +3817,11 @@ pub fn serve(
38163817
secrets_controller,
38173818
cloud_resource_controller,
38183819
cluster_replica_sizes,
3819-
builtin_system_cluster_replica_size,
3820-
builtin_catalog_server_cluster_replica_size,
3821-
builtin_probe_cluster_replica_size,
3822-
builtin_support_cluster_replica_size,
3823-
builtin_analytics_cluster_replica_size,
3820+
builtin_system_cluster_config,
3821+
builtin_catalog_server_cluster_config,
3822+
builtin_probe_cluster_config,
3823+
builtin_support_cluster_config,
3824+
builtin_analytics_cluster_config,
38243825
system_parameter_defaults,
38253826
availability_zones,
38263827
storage_usage_client,
@@ -3974,11 +3975,11 @@ pub fn serve(
39743975
boot_ts: boot_ts.clone(),
39753976
skip_migrations: false,
39763977
cluster_replica_sizes,
3977-
builtin_system_cluster_replica_size,
3978-
builtin_catalog_server_cluster_replica_size,
3979-
builtin_probe_cluster_replica_size,
3980-
builtin_support_cluster_replica_size,
3981-
builtin_analytics_cluster_replica_size,
3978+
builtin_system_cluster_config,
3979+
builtin_catalog_server_cluster_config,
3980+
builtin_probe_cluster_config,
3981+
builtin_support_cluster_config,
3982+
builtin_analytics_cluster_config,
39823983
system_parameter_defaults,
39833984
remote_system_parameters,
39843985
availability_zones,

src/catalog-debug/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ rust_binary(
3232
version = "0.130.3",
3333
deps = [
3434
"//src/adapter:mz_adapter",
35+
"//src/adapter-types:mz_adapter_types",
3536
"//src/build-info:mz_build_info",
3637
"//src/catalog:mz_catalog",
3738
"//src/cloud-resources:mz_cloud_resources",

0 commit comments

Comments
 (0)