Skip to content

Commit b2128e7

Browse files
authored
conditionally create mz_system and mz_probe cluster replicas (#31452)
See commit messages for details <!-- Describe the contents of the PR briefly but completely. If you write detailed commit messages, it is acceptable to copy/paste them here, or write "see commit messages for details." If there is only one commit in the PR, GitHub will have already added its commit message above. --> ### Motivation * This PR adds a known-desirable feature. MaterializeInc/database-issues#8954 <!-- Which of the following best describes the motivation behind this PR? * This PR fixes a recognized bug. [Ensure issue is linked somewhere.] [Ensure issue is linked somewhere.] * This PR fixes a previously unreported bug. [Describe the bug in detail, as if you were filing a bug report.] * This PR adds a feature that has not yet been specified. [Write a brief specification for the feature, including justification for its inclusion in Materialize, as if you were writing the original feature specification.] * This PR refactors existing code. [Describe what was wrong with the existing code, if it is not obvious.] --> ### Tips for reviewer <!-- Leave some tips for your reviewer, like: * The diff is much smaller if viewed with whitespace hidden. * [Some function/module/file] deserves extra attention. * [Some function/module/file] is pure code movement and only needs a skim. Delete this section if no tips. --> ### Checklist - [ ] This PR has adequate test coverage / QA involvement has been duly considered. ([trigger-ci for additional test/nightly runs](https://trigger-ci.dev.materialize.com/)) - [ ] This PR has an associated up-to-date [design doc](https://github.com/MaterializeInc/materialize/blob/main/doc/developer/design/README.md), is a design doc ([template](https://github.com/MaterializeInc/materialize/blob/main/doc/developer/design/00000000_template.md)), or is sufficiently small to not require a design. <!-- Reference the design in the description. --> - [ ] If this PR evolves [an existing `$T ⇔ Proto$T` mapping](https://github.com/MaterializeInc/materialize/blob/main/doc/developer/command-and-response-binary-encoding.md) (possibly in a backwards-incompatible way), then it is tagged with a `T-proto` label. - [ ] If this PR will require changes to cloud orchestration or tests, there is a companion cloud PR to account for those changes that is tagged with the release-blocker label ([example](MaterializeInc/cloud#5021)). <!-- Ask in #team-cloud on Slack if you need help preparing the cloud PR. --> - [ ] If this PR includes major [user-facing behavior changes](https://github.com/MaterializeInc/materialize/blob/main/doc/developer/guide-changes.md#what-changes-require-a-release-note), I have pinged the relevant PM to schedule a changelog post.
1 parent a101e58 commit b2128e7

File tree

24 files changed

+475
-139
lines changed

24 files changed

+475
-139
lines changed

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

misc/helm-charts/operator/README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,10 @@ The following table lists the configurable parameters of the Materialize operato
125125
| `operator.cloudProvider.providers.gcp` | GCP Configuration (placeholder for future use) | ``{"enabled":false}`` |
126126
| `operator.cloudProvider.region` | Common cloud provider settings | ``"kind"`` |
127127
| `operator.cloudProvider.type` | Specifies cloud provider. Valid values are 'aws', 'gcp', 'azure' , 'generic', or 'local' | ``"local"`` |
128+
| `operator.clusters.defaultReplicationFactor.analytics` | | ``0`` |
129+
| `operator.clusters.defaultReplicationFactor.probe` | | ``0`` |
130+
| `operator.clusters.defaultReplicationFactor.support` | | ``0`` |
131+
| `operator.clusters.defaultReplicationFactor.system` | | ``0`` |
128132
| `operator.clusters.defaultSizes.analytics` | | ``"25cc"`` |
129133
| `operator.clusters.defaultSizes.catalogServer` | | ``"50cc"`` |
130134
| `operator.clusters.defaultSizes.default` | | ``"25cc"`` |

misc/helm-charts/operator/templates/deployment.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,18 @@ spec:
103103
{{ if .Values.operator.clusters.defaultSizes.analytics }}
104104
- "--bootstrap-builtin-analytics-cluster-replica-size={{ .Values.operator.clusters.defaultSizes.analytics }}"
105105
{{- end }}
106+
{{ if ne .Values.operator.clusters.defaultReplicationFactor.system nil }}
107+
- "--bootstrap-builtin-system-cluster-replication-factor={{ .Values.operator.clusters.defaultReplicationFactor.system }}"
108+
{{- end }}
109+
{{ if ne .Values.operator.clusters.defaultReplicationFactor.probe nil }}
110+
- "--bootstrap-builtin-probe-cluster-replication-factor={{ .Values.operator.clusters.defaultReplicationFactor.probe }}"
111+
{{- end }}
112+
{{ if ne .Values.operator.clusters.defaultReplicationFactor.support nil }}
113+
- "--bootstrap-builtin-support-cluster-replication-factor={{ .Values.operator.clusters.defaultReplicationFactor.support }}"
114+
{{- end }}
115+
{{ if ne .Values.operator.clusters.defaultReplicationFactor.analytics nil }}
116+
- "--bootstrap-builtin-analytics-cluster-replication-factor={{ .Values.operator.clusters.defaultReplicationFactor.analytics }}"
117+
{{- end }}
106118
{{- end }}
107119
- "--image-pull-policy={{ kebabcase .Values.operator.image.pullPolicy }}"
108120
{{- range $key, $value := .Values.environmentd.nodeSelector }}

misc/helm-charts/operator/values.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,11 @@ operator:
175175
support: 25cc
176176
catalogServer: 50cc
177177
analytics: 25cc
178+
defaultReplicationFactor:
179+
system: 0
180+
probe: 0
181+
support: 0
182+
analytics: 0
178183

179184
# Node selector to use for the operator pod
180185
nodeSelector: {}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// Copyright Materialize, Inc. and contributors. All rights reserved.
2+
//
3+
// Use of this software is governed by the Business Source License
4+
// included in the LICENSE file.
5+
//
6+
// As of the Change Date specified in that file, in accordance with
7+
// the Business Source License, use of this software will be governed
8+
// by the Apache License, Version 2.0.
9+
10+
//! Types for bootstrap builtin cluster configuration.
11+
12+
#[derive(Debug, Clone)]
13+
pub struct BootstrapBuiltinClusterConfig {
14+
pub size: String,
15+
pub replication_factor: u32,
16+
}
17+
18+
pub const SYSTEM_CLUSTER_DEFAULT_REPLICATION_FACTOR: u32 = 1;
19+
pub const CATALOG_SERVER_CLUSTER_DEFAULT_REPLICATION_FACTOR: u32 = 1;
20+
pub const PROBE_CLUSTER_DEFAULT_REPLICATION_FACTOR: u32 = 1;
21+
// Support and analytics clusters are ephemeral - they are only spun up temporarily when needed.
22+
// Since they are short-lived, they don't need replication by default.
23+
pub const SUPPORT_CLUSTER_DEFAULT_REPLICATION_FACTOR: u32 = 0;
24+
pub const ANALYTICS_CLUSTER_DEFAULT_REPLICATION_FACTOR: u32 = 0;

src/adapter-types/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
//! Types for the adapter.
1111
12+
pub mod bootstrap_builtin_cluster_config;
1213
pub mod compaction;
1314
pub mod connection;
1415
pub mod dyncfgs;

src/adapter/src/catalog.rs

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@ use std::sync::Arc;
1919
use futures::future::BoxFuture;
2020
use futures::{Future, FutureExt};
2121
use itertools::Itertools;
22+
use mz_adapter_types::bootstrap_builtin_cluster_config::{
23+
BootstrapBuiltinClusterConfig, ANALYTICS_CLUSTER_DEFAULT_REPLICATION_FACTOR,
24+
CATALOG_SERVER_CLUSTER_DEFAULT_REPLICATION_FACTOR, PROBE_CLUSTER_DEFAULT_REPLICATION_FACTOR,
25+
SUPPORT_CLUSTER_DEFAULT_REPLICATION_FACTOR, SYSTEM_CLUSTER_DEFAULT_REPLICATION_FACTOR,
26+
};
2227
use mz_adapter_types::connection::ConnectionId;
2328
use mz_audit_log::{EventType, FullNameV1, ObjectType, VersionedStorageUsage};
2429
use mz_build_info::DUMMY_BUILD_INFO;
@@ -690,11 +695,26 @@ impl Catalog {
690695
boot_ts: previous_ts,
691696
skip_migrations: true,
692697
cluster_replica_sizes: bootstrap_args.cluster_replica_size_map.clone(),
693-
builtin_system_cluster_replica_size: replica_size.clone(),
694-
builtin_catalog_server_cluster_replica_size: replica_size.clone(),
695-
builtin_probe_cluster_replica_size: replica_size.clone(),
696-
builtin_support_cluster_replica_size: replica_size.clone(),
697-
builtin_analytics_cluster_replica_size: replica_size.clone(),
698+
builtin_system_cluster_config: BootstrapBuiltinClusterConfig {
699+
size: replica_size.clone(),
700+
replication_factor: SYSTEM_CLUSTER_DEFAULT_REPLICATION_FACTOR,
701+
},
702+
builtin_catalog_server_cluster_config: BootstrapBuiltinClusterConfig {
703+
size: replica_size.clone(),
704+
replication_factor: CATALOG_SERVER_CLUSTER_DEFAULT_REPLICATION_FACTOR,
705+
},
706+
builtin_probe_cluster_config: BootstrapBuiltinClusterConfig {
707+
size: replica_size.clone(),
708+
replication_factor: PROBE_CLUSTER_DEFAULT_REPLICATION_FACTOR,
709+
},
710+
builtin_support_cluster_config: BootstrapBuiltinClusterConfig {
711+
size: replica_size.clone(),
712+
replication_factor: SUPPORT_CLUSTER_DEFAULT_REPLICATION_FACTOR,
713+
},
714+
builtin_analytics_cluster_config: BootstrapBuiltinClusterConfig {
715+
size: replica_size.clone(),
716+
replication_factor: ANALYTICS_CLUSTER_DEFAULT_REPLICATION_FACTOR,
717+
},
698718
system_parameter_defaults,
699719
remote_system_parameters: None,
700720
availability_zones: vec![],

src/adapter/src/catalog/open.rs

Lines changed: 53 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ use std::time::{Duration, Instant};
1717

1818
use futures::future::{BoxFuture, FutureExt};
1919
use itertools::{Either, Itertools};
20+
use mz_adapter_types::bootstrap_builtin_cluster_config::BootstrapBuiltinClusterConfig;
2021
use mz_adapter_types::dyncfgs::{ENABLE_CONTINUAL_TASK_BUILTINS, ENABLE_EXPRESSION_CACHE};
2122
use mz_catalog::builtin::{
2223
Builtin, Fingerprint, BUILTINS, BUILTIN_CLUSTERS, BUILTIN_CLUSTER_REPLICAS, BUILTIN_PREFIXES,
@@ -200,22 +201,22 @@ impl Catalog {
200201
// Add any new builtin objects and remove old ones.
201202
let (migrated_builtins, new_builtin_collections) =
202203
add_new_remove_old_builtin_items_migration(&state.config().builtins_cfg, &mut txn)?;
203-
let cluster_sizes = BuiltinBootstrapClusterSizes {
204-
system_cluster: config.builtin_system_cluster_replica_size,
205-
catalog_server_cluster: config.builtin_catalog_server_cluster_replica_size,
206-
probe_cluster: config.builtin_probe_cluster_replica_size,
207-
support_cluster: config.builtin_support_cluster_replica_size,
208-
analytics_cluster: config.builtin_analytics_cluster_replica_size,
204+
let builtin_bootstrap_cluster_config_map = BuiltinBootstrapClusterConfigMap {
205+
system_cluster: config.builtin_system_cluster_config,
206+
catalog_server_cluster: config.builtin_catalog_server_cluster_config,
207+
probe_cluster: config.builtin_probe_cluster_config,
208+
support_cluster: config.builtin_support_cluster_config,
209+
analytics_cluster: config.builtin_analytics_cluster_config,
209210
};
210211
add_new_remove_old_builtin_clusters_migration(
211212
&mut txn,
212-
&cluster_sizes,
213+
&builtin_bootstrap_cluster_config_map,
213214
&state.cluster_replica_sizes,
214215
)?;
215216
add_new_remove_old_builtin_introspection_source_migration(&mut txn)?;
216217
add_new_remove_old_builtin_cluster_replicas_migration(
217218
&mut txn,
218-
&cluster_sizes,
219+
&builtin_bootstrap_cluster_config_map,
219220
&state.cluster_replica_sizes,
220221
)?;
221222
add_new_remove_old_builtin_roles_migration(&mut txn)?;
@@ -855,7 +856,7 @@ fn add_new_remove_old_builtin_items_migration(
855856

856857
fn add_new_remove_old_builtin_clusters_migration(
857858
txn: &mut mz_catalog::durable::Transaction<'_>,
858-
builtin_cluster_sizes: &BuiltinBootstrapClusterSizes,
859+
builtin_cluster_config_map: &BuiltinBootstrapClusterConfigMap,
859860
cluster_sizes: &ClusterReplicaSizeMap,
860861
) -> Result<(), mz_catalog::durable::CatalogError> {
861862
let mut durable_clusters: BTreeMap<_, _> = txn
@@ -867,18 +868,19 @@ fn add_new_remove_old_builtin_clusters_migration(
867868
// Add new clusters.
868869
for builtin_cluster in BUILTIN_CLUSTERS {
869870
if durable_clusters.remove(builtin_cluster.name).is_none() {
870-
let cluster_size = builtin_cluster_sizes.get_size(builtin_cluster.name)?;
871-
let cluster_allocation = cluster_sizes.get_allocation_by_name(&cluster_size)?;
871+
let cluster_config = builtin_cluster_config_map.get_config(builtin_cluster.name)?;
872+
let cluster_allocation = cluster_sizes.get_allocation_by_name(&cluster_config.size)?;
873+
872874
txn.insert_system_cluster(
873875
builtin_cluster.name,
874876
vec![],
875877
builtin_cluster.privileges.to_vec(),
876878
builtin_cluster.owner_id.to_owned(),
877879
mz_catalog::durable::ClusterConfig {
878880
variant: mz_catalog::durable::ClusterVariant::Managed(ClusterVariantManaged {
879-
size: cluster_size,
881+
size: cluster_config.size,
880882
availability_zones: vec![],
881-
replication_factor: builtin_cluster.replication_factor,
883+
replication_factor: cluster_config.replication_factor,
882884
disk: cluster_allocation.is_cc,
883885
logging: default_logging_config(),
884886
optimizer_feature_overrides: Default::default(),
@@ -968,7 +970,7 @@ fn add_new_remove_old_builtin_roles_migration(
968970

969971
fn add_new_remove_old_builtin_cluster_replicas_migration(
970972
txn: &mut Transaction<'_>,
971-
builtin_cluster_sizes: &BuiltinBootstrapClusterSizes,
973+
builtin_cluster_config_map: &BuiltinBootstrapClusterConfigMap,
972974
cluster_sizes: &ClusterReplicaSizeMap,
973975
) -> Result<(), AdapterError> {
974976
let cluster_lookup: BTreeMap<_, _> = txn
@@ -996,12 +998,18 @@ fn add_new_remove_old_builtin_cluster_replicas_migration(
996998
let replica_names = durable_replicas
997999
.get_mut(&cluster.id)
9981000
.unwrap_or(&mut empty_map);
999-
if replica_names.remove(builtin_replica.name).is_none() {
1001+
1002+
let builtin_cluster_boostrap_config =
1003+
builtin_cluster_config_map.get_config(builtin_replica.cluster_name)?;
1004+
if replica_names.remove(builtin_replica.name).is_none()
1005+
// NOTE(SangJunBak): We need to explicitly check the replication factor because
1006+
// BUILT_IN_CLUSTER_REPLICAS is constant throughout all deployments but the replication
1007+
// factor is configurable on bootstrap.
1008+
&& builtin_cluster_boostrap_config.replication_factor > 0
1009+
{
10001010
let replica_size = match cluster.config.variant {
10011011
ClusterVariant::Managed(ClusterVariantManaged { ref size, .. }) => size.clone(),
1002-
ClusterVariant::Unmanaged => {
1003-
builtin_cluster_sizes.get_size(builtin_replica.cluster_name)?
1004-
}
1012+
ClusterVariant::Unmanaged => builtin_cluster_boostrap_config.size,
10051013
};
10061014
let replica_allocation = cluster_sizes.get_allocation_by_name(&replica_size)?;
10071015

@@ -1115,37 +1123,43 @@ fn default_logging_config() -> ReplicaLogging {
11151123
interval: Some(Duration::from_secs(1)),
11161124
}
11171125
}
1118-
pub struct BuiltinBootstrapClusterSizes {
1119-
/// Size to default system_cluster on bootstrap
1120-
pub system_cluster: String,
1121-
/// Size to default catalog_server_cluster on bootstrap
1122-
pub catalog_server_cluster: String,
1123-
/// Size to default probe_cluster on bootstrap
1124-
pub probe_cluster: String,
1125-
/// Size to default support_cluster on bootstrap
1126-
pub support_cluster: String,
1126+
1127+
#[derive(Debug)]
1128+
pub struct BuiltinBootstrapClusterConfigMap {
1129+
/// Size and replication factor to default system_cluster on bootstrap
1130+
pub system_cluster: BootstrapBuiltinClusterConfig,
1131+
/// Size and replication factor to default catalog_server_cluster on bootstrap
1132+
pub catalog_server_cluster: BootstrapBuiltinClusterConfig,
1133+
/// Size and replication factor to default probe_cluster on bootstrap
1134+
pub probe_cluster: BootstrapBuiltinClusterConfig,
1135+
/// Size and replication factor to default support_cluster on bootstrap
1136+
pub support_cluster: BootstrapBuiltinClusterConfig,
11271137
/// Size to default analytics_cluster on bootstrap
1128-
pub analytics_cluster: String,
1138+
pub analytics_cluster: BootstrapBuiltinClusterConfig,
11291139
}
11301140

1131-
impl BuiltinBootstrapClusterSizes {
1141+
impl BuiltinBootstrapClusterConfigMap {
11321142
/// Gets the size of the builtin cluster based on the provided name
1133-
fn get_size(&self, cluster_name: &str) -> Result<String, mz_catalog::durable::CatalogError> {
1134-
if cluster_name == mz_catalog::builtin::MZ_SYSTEM_CLUSTER.name {
1135-
Ok(self.system_cluster.clone())
1143+
fn get_config(
1144+
&self,
1145+
cluster_name: &str,
1146+
) -> Result<BootstrapBuiltinClusterConfig, mz_catalog::durable::CatalogError> {
1147+
let cluster_config = if cluster_name == mz_catalog::builtin::MZ_SYSTEM_CLUSTER.name {
1148+
&self.system_cluster
11361149
} else if cluster_name == mz_catalog::builtin::MZ_CATALOG_SERVER_CLUSTER.name {
1137-
Ok(self.catalog_server_cluster.clone())
1150+
&self.catalog_server_cluster
11381151
} else if cluster_name == mz_catalog::builtin::MZ_PROBE_CLUSTER.name {
1139-
Ok(self.probe_cluster.clone())
1152+
&self.probe_cluster
11401153
} else if cluster_name == mz_catalog::builtin::MZ_SUPPORT_CLUSTER.name {
1141-
Ok(self.support_cluster.clone())
1154+
&self.support_cluster
11421155
} else if cluster_name == mz_catalog::builtin::MZ_ANALYTICS_CLUSTER.name {
1143-
Ok(self.analytics_cluster.clone())
1156+
&self.analytics_cluster
11441157
} else {
1145-
Err(mz_catalog::durable::CatalogError::Catalog(
1158+
return Err(mz_catalog::durable::CatalogError::Catalog(
11461159
SqlCatalogError::UnexpectedBuiltinCluster(cluster_name.to_owned()),
1147-
))
1148-
}
1160+
));
1161+
};
1162+
Ok(cluster_config.clone())
11491163
}
11501164
}
11511165

src/adapter/src/coord.rs

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ use futures::StreamExt;
8888
use http::Uri;
8989
use ipnet::IpNet;
9090
use itertools::{Either, Itertools};
91+
use mz_adapter_types::bootstrap_builtin_cluster_config::BootstrapBuiltinClusterConfig;
9192
use mz_adapter_types::compaction::CompactionWindow;
9293
use mz_adapter_types::connection::ConnectionId;
9394
use mz_adapter_types::dyncfgs::WITH_0DT_DEPLOYMENT_CAUGHT_UP_CHECK_INTERVAL;
@@ -999,11 +1000,11 @@ pub struct Config {
9991000
pub cloud_resource_controller: Option<Arc<dyn CloudResourceController>>,
10001001
pub availability_zones: Vec<String>,
10011002
pub cluster_replica_sizes: ClusterReplicaSizeMap,
1002-
pub builtin_system_cluster_replica_size: String,
1003-
pub builtin_catalog_server_cluster_replica_size: String,
1004-
pub builtin_probe_cluster_replica_size: String,
1005-
pub builtin_support_cluster_replica_size: String,
1006-
pub builtin_analytics_cluster_replica_size: String,
1003+
pub builtin_system_cluster_config: BootstrapBuiltinClusterConfig,
1004+
pub builtin_catalog_server_cluster_config: BootstrapBuiltinClusterConfig,
1005+
pub builtin_probe_cluster_config: BootstrapBuiltinClusterConfig,
1006+
pub builtin_support_cluster_config: BootstrapBuiltinClusterConfig,
1007+
pub builtin_analytics_cluster_config: BootstrapBuiltinClusterConfig,
10071008
pub system_parameter_defaults: BTreeMap<String, String>,
10081009
pub storage_usage_client: StorageUsageClient,
10091010
pub storage_usage_collection_interval: Duration,
@@ -3888,11 +3889,11 @@ pub fn serve(
38883889
secrets_controller,
38893890
cloud_resource_controller,
38903891
cluster_replica_sizes,
3891-
builtin_system_cluster_replica_size,
3892-
builtin_catalog_server_cluster_replica_size,
3893-
builtin_probe_cluster_replica_size,
3894-
builtin_support_cluster_replica_size,
3895-
builtin_analytics_cluster_replica_size,
3892+
builtin_system_cluster_config,
3893+
builtin_catalog_server_cluster_config,
3894+
builtin_probe_cluster_config,
3895+
builtin_support_cluster_config,
3896+
builtin_analytics_cluster_config,
38963897
system_parameter_defaults,
38973898
availability_zones,
38983899
storage_usage_client,
@@ -4041,11 +4042,11 @@ pub fn serve(
40414042
boot_ts: boot_ts.clone(),
40424043
skip_migrations: false,
40434044
cluster_replica_sizes,
4044-
builtin_system_cluster_replica_size,
4045-
builtin_catalog_server_cluster_replica_size,
4046-
builtin_probe_cluster_replica_size,
4047-
builtin_support_cluster_replica_size,
4048-
builtin_analytics_cluster_replica_size,
4045+
builtin_system_cluster_config,
4046+
builtin_catalog_server_cluster_config,
4047+
builtin_probe_cluster_config,
4048+
builtin_support_cluster_config,
4049+
builtin_analytics_cluster_config,
40494050
system_parameter_defaults,
40504051
remote_system_parameters,
40514052
availability_zones,

src/catalog-debug/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ rust_binary(
3232
version = "0.133.0-dev.0",
3333
deps = [
3434
"//src/adapter:mz_adapter",
35+
"//src/adapter-types:mz_adapter_types",
3536
"//src/build-info:mz_build_info",
3637
"//src/catalog:mz_catalog",
3738
"//src/cloud-resources:mz_cloud_resources",

0 commit comments

Comments
 (0)