From 13a87101104b958a3988b73cb0936512a818b97b Mon Sep 17 00:00:00 2001 From: Sang Jun Bak Date: Mon, 1 Dec 2025 18:34:37 -0500 Subject: [PATCH 1/2] feat: add upgrade window check for environmentd image ref This commit introduces a new method to check if the current environmentd image reference is within the upgrade window of the last successful rollout. We default the image ref to the current version if status is not set. --- src/cloud-resources/src/crd/materialize.rs | 90 +++++++++++++++++++ .../src/controller/materialize.rs | 47 +++++++++- 2 files changed, 136 insertions(+), 1 deletion(-) diff --git a/src/cloud-resources/src/crd/materialize.rs b/src/cloud-resources/src/crd/materialize.rs index 8f617a5736553..eafb5ba6a8c34 100644 --- a/src/cloud-resources/src/crd/materialize.rs +++ b/src/cloud-resources/src/crd/materialize.rs @@ -30,6 +30,8 @@ use crate::crd::generated::cert_manager::certificates::{ CertificateIssuerRef, CertificateSecretTemplate, }; +const V26_0_0: Version = Version::new(26, 0, 0); + pub const LAST_KNOWN_ACTIVE_GENERATION_ANNOTATION: &str = "materialize.cloud/last-known-active-generation"; @@ -479,6 +481,38 @@ pub mod v1alpha1 { } } + /// Checks if the current environmentd image ref is within the upgrade window of the last + /// successful rollout. + /// + /// This check isn't strictly required since environmentd will still be able to determine + /// if the upgrade is allowed or not. However, doing this check allows us to provide + /// the error as soon as possible and in a more user friendly way. + pub fn within_upgrade_window(&self) -> bool { + let current_environmentd_version = self + .status + .as_ref() + .and_then(|status| { + status + .last_completed_rollout_environmentd_image_ref + .as_ref() + }) + .and_then(|image_ref| parse_image_ref(image_ref)); + + if let (Some(new_environmentd_version), Some(current_environmentd_version)) = ( + parse_image_ref(&self.spec.environmentd_image_ref), + current_environmentd_version, + ) { + if current_environmentd_version >= V26_0_0 { + // We deny upgrades past 1 major version of the last successful rollout + return new_environmentd_version.major + <= current_environmentd_version.major + 1; + } + } + // If we fail any of the preconditions for the check (e.g. we couldn't parse either version), + // we still allow the upgrade since environmentd will still error if the upgrade is not allowed. + true + } + pub fn managed_resource_meta(&self, name: String) -> ObjectMeta { ObjectMeta { namespace: Some(self.namespace()), @@ -516,6 +550,11 @@ pub mod v1alpha1 { .expect("valid int generation"); } + // Initialize the last completed rollout environmentd image ref to + // the current image ref if not already set. + status.last_completed_rollout_environmentd_image_ref = + Some(self.spec.environmentd_image_ref.clone()); + status }) } @@ -530,6 +569,10 @@ pub mod v1alpha1 { pub active_generation: u64, /// The UUID of the last successfully completed rollout. pub last_completed_rollout_request: Uuid, + /// The image ref of the environmentd image that was last successfully rolled out. + /// Used to deny upgrades past 1 major version from the last successful rollout. + /// When None, we upgrade anyways. + pub last_completed_rollout_environmentd_image_ref: Option, /// A hash calculated from the spec of resources to be created based on this Materialize /// spec. This is used for detecting when the existing resources are up to date. /// If you want to trigger a rollout without making other changes that would cause this @@ -630,4 +673,51 @@ mod tests { mz.spec.environmentd_image_ref = "my.private.registry:5000:v0.33.3".to_owned(); assert!(!mz.meets_minimum_version(&Version::parse("0.34.0").unwrap())); } + + #[mz_ore::test] + fn within_upgrade_window() { + use super::v1alpha1::MaterializeStatus; + + let mut mz = Materialize { + spec: MaterializeSpec { + environmentd_image_ref: "materialize/environmentd:v26.0.0".to_owned(), + ..Default::default() + }, + metadata: ObjectMeta { + ..Default::default() + }, + status: Some(MaterializeStatus { + last_completed_rollout_environmentd_image_ref: Some( + "materialize/environmentd:v26.0.0".to_owned(), + ), + ..Default::default() + }), + }; + + // Pass: upgrading from 26.0.0 to 27.7.3 (within 1 major version) + mz.spec.environmentd_image_ref = "materialize/environmentd:v27.7.3".to_owned(); + assert!(mz.within_upgrade_window()); + + // Pass: upgrading from 26.0.0 to 27.7.8-dev.0 (within 1 major version, pre-release) + mz.spec.environmentd_image_ref = "materialize/environmentd:v27.7.8-dev.0".to_owned(); + assert!(mz.within_upgrade_window()); + + // Fail: upgrading from 26.0.0 to 28.0.1 (more than 1 major version) + mz.spec.environmentd_image_ref = "materialize/environmentd:v28.0.1".to_owned(); + assert!(!mz.within_upgrade_window()); + + // Pass: upgrading from 26.0.0 to 28.0.1.not_a_valid_version (invalid version, defaults to true) + mz.spec.environmentd_image_ref = + "materialize/environmentd:v28.0.1.not_a_valid_version".to_owned(); + assert!(mz.within_upgrade_window()); + + // Pass: upgrading from 0.147.5 to 26.1.0 (any version before 26.0.0 passes) + mz.status + .as_mut() + .unwrap() + .last_completed_rollout_environmentd_image_ref = + Some("materialize/environmentd:v0.147.5".to_owned()); + mz.spec.environmentd_image_ref = "materialize/environmentd:v26.1.0".to_owned(); + assert!(mz.within_upgrade_window()); + } } diff --git a/src/orchestratord/src/controller/materialize.rs b/src/orchestratord/src/controller/materialize.rs index 3cab752159c51..2163e3b159536 100644 --- a/src/orchestratord/src/controller/materialize.rs +++ b/src/orchestratord/src/controller/materialize.rs @@ -360,6 +360,9 @@ impl Context { MaterializeStatus { active_generation: desired_generation, last_completed_rollout_request: mz.requested_reconciliation_id(), + last_completed_rollout_environmentd_image_ref: Some( + mz.spec.environmentd_image_ref.clone(), + ), resource_id: mz.status().resource_id, resources_hash, conditions: vec![Condition { @@ -591,7 +594,7 @@ impl k8s_controller::Context for Context { ) .await } - // There are changes pending, and we want to appy them. + // There are changes pending, and we want to apply them. (false, true, true) => { // we remove the environment resources hash annotation here // because if we fail halfway through applying the resources, @@ -615,6 +618,8 @@ impl k8s_controller::Context for Context { // we fail later on, we want to ensure that the // rollout gets retried. last_completed_rollout_request: status.last_completed_rollout_request, + last_completed_rollout_environmentd_image_ref: status + .last_completed_rollout_environmentd_image_ref, resource_id: status.resource_id, resources_hash: String::new(), conditions: vec![Condition { @@ -634,6 +639,38 @@ impl k8s_controller::Context for Context { let mz = &mz; let status = mz.status(); + if !mz.within_upgrade_window() { + let last_completed_rollout_environmentd_image_ref = + status.last_completed_rollout_environmentd_image_ref; + + self.update_status( + &mz_api, + mz, + MaterializeStatus { + active_generation, + last_completed_rollout_request: status.last_completed_rollout_request, + last_completed_rollout_environmentd_image_ref: last_completed_rollout_environmentd_image_ref.clone(), + resource_id: status.resource_id, + resources_hash: status.resources_hash, + conditions: vec![Condition { + type_: "UpToDate".into(), + status: "False".into(), + last_transition_time: Time(chrono::offset::Utc::now()), + message: format!( + "Refusing to upgrade from {} to {}. More than one major version from last successful rollout.", + last_completed_rollout_environmentd_image_ref.expect("should be set if upgrade window check fails"), + &mz.spec.environmentd_image_ref, + ), + observed_generation: mz.meta().generation, + reason: "FailedDeploy".into(), + }], + }, + active_generation != desired_generation, + ) + .await?; + return Ok(None); + } + if mz.spec.rollout_strategy == MaterializeRolloutStrategy::ImmediatelyPromoteCausingDowntime { @@ -673,6 +710,8 @@ impl k8s_controller::Context for Context { // rollout gets retried. last_completed_rollout_request: status .last_completed_rollout_request, + last_completed_rollout_environmentd_image_ref: status + .last_completed_rollout_environmentd_image_ref, resource_id: status.resource_id, resources_hash: resources_hash.clone(), conditions: vec![Condition { @@ -710,6 +749,8 @@ impl k8s_controller::Context for Context { // the rollout and we want to ensure it gets // retried. last_completed_rollout_request: status.last_completed_rollout_request, + last_completed_rollout_environmentd_image_ref: status + .last_completed_rollout_environmentd_image_ref, resource_id: status.resource_id, resources_hash: status.resources_hash, conditions: vec![Condition { @@ -746,6 +787,8 @@ impl k8s_controller::Context for Context { MaterializeStatus { active_generation, last_completed_rollout_request: mz.requested_reconciliation_id(), + last_completed_rollout_environmentd_image_ref: status + .last_completed_rollout_environmentd_image_ref, resource_id: status.resource_id, resources_hash: status.resources_hash, conditions: vec![Condition { @@ -786,6 +829,8 @@ impl k8s_controller::Context for Context { MaterializeStatus { active_generation, last_completed_rollout_request: mz.requested_reconciliation_id(), + last_completed_rollout_environmentd_image_ref: status + .last_completed_rollout_environmentd_image_ref, resource_id: status.resource_id, resources_hash: status.resources_hash, conditions: vec![Condition { From 96da51f4e4b4659770b07adb203951f3c1d43785 Mon Sep 17 00:00:00 2001 From: Sang Jun Bak Date: Wed, 3 Dec 2025 12:38:41 -0500 Subject: [PATCH 2/2] Unify `is_valid_upgrade_version` from Cloud The plan is to use this public static function in the Cloud repo. Borrows most of the same logic, but allows upgrades from/to dev versions. --- src/cloud-resources/src/crd/materialize.rs | 105 +++++++++++++++++---- 1 file changed, 87 insertions(+), 18 deletions(-) diff --git a/src/cloud-resources/src/crd/materialize.rs b/src/cloud-resources/src/crd/materialize.rs index eafb5ba6a8c34..f0447109d72f4 100644 --- a/src/cloud-resources/src/crd/materialize.rs +++ b/src/cloud-resources/src/crd/materialize.rs @@ -30,8 +30,6 @@ use crate::crd::generated::cert_manager::certificates::{ CertificateIssuerRef, CertificateSecretTemplate, }; -const V26_0_0: Version = Version::new(26, 0, 0); - pub const LAST_KNOWN_ACTIVE_GENERATION_ANNOTATION: &str = "materialize.cloud/last-known-active-generation"; @@ -481,14 +479,47 @@ pub mod v1alpha1 { } } - /// Checks if the current environmentd image ref is within the upgrade window of the last - /// successful rollout. - /// /// This check isn't strictly required since environmentd will still be able to determine /// if the upgrade is allowed or not. However, doing this check allows us to provide /// the error as soon as possible and in a more user friendly way. + pub fn is_valid_upgrade_version(active_version: &Version, next_version: &Version) -> bool { + // Don't allow rolling back + // Note: semver comparison handles RC versions correctly: + // v26.0.0-rc.1 < v26.0.0-rc.2 < v26.0.0 + if next_version < active_version { + return false; + } + + if active_version.major == 0 { + // Self managed 25.2 to 26.0 + if next_version.major != active_version.major { + if next_version.major == 26 + && active_version.major == 0 + && active_version.minor == 164 + { + return true; + } else { + return false; + } + } + // Self managed 25.1 to 25.2 + if next_version.minor == 147 && active_version.minor == 130 { + return true; + } + // only allow upgrading a single minor version at a time + return next_version.minor <= active_version.minor + 1; + } else if active_version.major >= 26 { + // For versions 26.X.X and onwards, we deny upgrades past 1 major version of the active version + return next_version.major <= active_version.major + 1; + } + + true + } + + /// Checks if the current environmentd image ref is within the upgrade window of the last + /// successful rollout. pub fn within_upgrade_window(&self) -> bool { - let current_environmentd_version = self + let active_environmentd_version = self .status .as_ref() .and_then(|status| { @@ -498,19 +529,19 @@ pub mod v1alpha1 { }) .and_then(|image_ref| parse_image_ref(image_ref)); - if let (Some(new_environmentd_version), Some(current_environmentd_version)) = ( + if let (Some(next_environmentd_version), Some(active_environmentd_version)) = ( parse_image_ref(&self.spec.environmentd_image_ref), - current_environmentd_version, + active_environmentd_version, ) { - if current_environmentd_version >= V26_0_0 { - // We deny upgrades past 1 major version of the last successful rollout - return new_environmentd_version.major - <= current_environmentd_version.major + 1; - } + Self::is_valid_upgrade_version( + &active_environmentd_version, + &next_environmentd_version, + ) + } else { + // If we fail to parse either version, + // we still allow the upgrade since environmentd will still error if the upgrade is not allowed. + true } - // If we fail any of the preconditions for the check (e.g. we couldn't parse either version), - // we still allow the upgrade since environmentd will still error if the upgrade is not allowed. - true } pub fn managed_resource_meta(&self, name: String) -> ObjectMeta { @@ -711,13 +742,51 @@ mod tests { "materialize/environmentd:v28.0.1.not_a_valid_version".to_owned(); assert!(mz.within_upgrade_window()); - // Pass: upgrading from 0.147.5 to 26.1.0 (any version before 26.0.0 passes) + // Pass: upgrading from 0.164.0 to 26.1.0 (self managed 25.2 to 26.0) mz.status .as_mut() .unwrap() .last_completed_rollout_environmentd_image_ref = - Some("materialize/environmentd:v0.147.5".to_owned()); + Some("materialize/environmentd:v0.164.0".to_owned()); mz.spec.environmentd_image_ref = "materialize/environmentd:v26.1.0".to_owned(); assert!(mz.within_upgrade_window()); } + + #[mz_ore::test] + fn is_valid_upgrade_version() { + let success_tests = [ + (Version::new(0, 83, 0), Version::new(0, 83, 0)), + (Version::new(0, 83, 0), Version::new(0, 84, 0)), + (Version::new(0, 9, 0), Version::new(0, 10, 0)), + (Version::new(0, 99, 0), Version::new(0, 100, 0)), + (Version::new(0, 83, 0), Version::new(0, 83, 1)), + (Version::new(0, 83, 0), Version::new(0, 83, 2)), + (Version::new(0, 83, 2), Version::new(0, 83, 10)), + (Version::new(0, 164, 0), Version::new(26, 0, 0)), + (Version::new(26, 0, 0), Version::new(26, 1, 0)), + (Version::new(26, 5, 3), Version::new(26, 10, 0)), + (Version::new(0, 130, 0), Version::new(0, 147, 0)), + ]; + for (active_version, next_version) in success_tests { + assert!( + Materialize::is_valid_upgrade_version(&active_version, &next_version), + "v{active_version} can upgrade to v{next_version}" + ); + } + + let failure_tests = [ + (Version::new(0, 83, 0), Version::new(0, 82, 0)), + (Version::new(0, 83, 3), Version::new(0, 83, 2)), + (Version::new(0, 83, 3), Version::new(1, 83, 3)), + (Version::new(0, 83, 0), Version::new(0, 85, 0)), + (Version::new(26, 0, 0), Version::new(28, 0, 0)), + (Version::new(0, 130, 0), Version::new(26, 1, 0)), + ]; + for (active_version, next_version) in failure_tests { + assert!( + !Materialize::is_valid_upgrade_version(&active_version, &next_version), + "v{active_version} can't upgrade to v{next_version}" + ); + } + } }