diff --git a/CHANGELOG.md b/CHANGELOG.md index b90aaca8..e4a76893 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +### Added + +- The lifetime of auto generated TLS certificates is now configurable with the role and roleGroup + config property `requestedSecretLifetime`. This helps reducing frequent Pod restarts ([#501]). + +[#501]: https://github.com/stackabletech/spark-k8s-operator/pull/501 + ## [24.11.0] - 2024-11-18 ### Added diff --git a/Cargo.lock b/Cargo.lock index 6089d012..f7cadfed 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -478,17 +478,6 @@ dependencies = [ "powerfmt", ] -[[package]] -name = "derivative" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "digest" version = "0.10.7" @@ -1830,9 +1819,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.15" +version = "0.23.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fbb44d7acc4e873d613422379f69f237a1b141928c02f6bc6ccfddddc2d7993" +checksum = "934b404430bb06b3fae2cba809eb45a1ab1aecd64491213d7c3301b88393f8d1" dependencies = [ "log", "once_cell", @@ -2172,15 +2161,15 @@ checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" [[package]] name = "stackable-operator" -version = "0.82.0" -source = "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.82.0#415bbd031bd52e9c0c5392060235030e9930b46b" +version = "0.83.0" +source = "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.83.0#3ce7bcbdb58097cde0c0f19488a104c96f69dbc3" dependencies = [ "chrono", "clap", "const_format", "delegate", - "derivative", "dockerfile-parser", + "educe", "either", "futures 0.3.31", "indexmap", @@ -2211,7 +2200,7 @@ dependencies = [ [[package]] name = "stackable-operator-derive" version = "0.3.1" -source = "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.82.0#415bbd031bd52e9c0c5392060235030e9930b46b" +source = "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.83.0#3ce7bcbdb58097cde0c0f19488a104c96f69dbc3" dependencies = [ "darling", "proc-macro2", @@ -2222,7 +2211,7 @@ dependencies = [ [[package]] name = "stackable-shared" version = "0.0.1" -source = "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.82.0#415bbd031bd52e9c0c5392060235030e9930b46b" +source = "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.83.0#3ce7bcbdb58097cde0c0f19488a104c96f69dbc3" dependencies = [ "kube", "semver", diff --git a/Cargo.toml b/Cargo.toml index bef82dfe..f281fd7c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,7 @@ serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" serde_yaml = "0.9" snafu = "0.8" -stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", tag = "stackable-operator-0.82.0" } +stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", tag = "stackable-operator-0.83.0" } strum = { version = "0.26", features = ["derive"] } tokio = { version = "1.39", features = ["full"] } tracing = "0.1" diff --git a/deploy/helm/spark-k8s-operator/crds/crds.yaml b/deploy/helm/spark-k8s-operator/crds/crds.yaml index 949f0431..5f81b170 100644 --- a/deploy/helm/spark-k8s-operator/crds/crds.yaml +++ b/deploy/helm/spark-k8s-operator/crds/crds.yaml @@ -193,6 +193,10 @@ spec: nullable: true type: boolean type: object + requestedSecretLifetime: + description: Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. That this can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. + nullable: true + type: string resources: default: cpu: @@ -463,6 +467,10 @@ spec: nullable: true type: boolean type: object + requestedSecretLifetime: + description: Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. That this can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. + nullable: true + type: string resources: default: cpu: @@ -549,6 +557,10 @@ spec: config: default: {} properties: + requestedSecretLifetime: + description: Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. This can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. + nullable: true + type: string resources: default: cpu: @@ -1347,6 +1359,10 @@ spec: nullable: true type: boolean type: object + requestedSecretLifetime: + description: Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. This can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. + nullable: true + type: string resources: default: cpu: @@ -1563,6 +1579,10 @@ spec: nullable: true type: boolean type: object + requestedSecretLifetime: + description: Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. This can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. + nullable: true + type: string resources: default: cpu: diff --git a/rust/crd/src/history.rs b/rust/crd/src/history.rs index 371cc502..0651ece5 100644 --- a/rust/crd/src/history.rs +++ b/rust/crd/src/history.rs @@ -27,6 +27,7 @@ use stackable_operator::{ product_logging::{self, spec::Logging}, role_utils::{Role, RoleGroup, RoleGroupRef}, schemars::{self, JsonSchema}, + time::Duration, }; use std::collections::{BTreeMap, HashMap}; use strum::{Display, EnumIter}; @@ -400,9 +401,17 @@ pub struct HistoryConfig { pub logging: Logging, #[fragment_attrs(serde(default))] pub affinity: StackableAffinity, + + /// Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. + /// This can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. + #[fragment_attrs(serde(default))] + pub requested_secret_lifetime: Option, } impl HistoryConfig { + // Auto TLS certificate lifetime + const DEFAULT_HISTORY_SECRET_LIFETIME: Duration = Duration::from_days_unchecked(7); + fn default_config(cluster_name: &str) -> HistoryConfigFragment { HistoryConfigFragment { cleaner: None, @@ -419,6 +428,7 @@ impl HistoryConfig { }, logging: product_logging::spec::default_logging(), affinity: history_affinity(cluster_name), + requested_secret_lifetime: Some(Self::DEFAULT_HISTORY_SECRET_LIFETIME), } } } diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs index 76c94e40..bd7e6e2e 100644 --- a/rust/crd/src/lib.rs +++ b/rust/crd/src/lib.rs @@ -14,6 +14,7 @@ use logdir::ResolvedLogDir; use product_config::{types::PropertyNameKind, ProductConfigManager}; use serde::{Deserialize, Serialize}; use snafu::{OptionExt, ResultExt, Snafu}; +use stackable_operator::time::Duration; use stackable_operator::{ builder::pod::volume::{ SecretFormat, SecretOperatorVolumeSourceBuilder, SecretOperatorVolumeSourceBuilderError, @@ -281,6 +282,7 @@ impl SparkApplication { s3conn: &Option, logdir: &Option, log_config_map: Option<&str>, + requested_secret_lifetime: &Duration, ) -> Result, Error> { let mut result: Vec = self.spec.volumes.clone(); @@ -356,6 +358,7 @@ impl SparkApplication { .ephemeral( SecretOperatorVolumeSourceBuilder::new(cert_secret) .with_format(SecretFormat::TlsPkcs12) + .with_auto_tls_cert_lifetime(*requested_secret_lifetime) .build() .context(TlsCertSecretClassVolumeBuildSnafu)?, ) @@ -1068,6 +1071,7 @@ mod tests { use indoc::indoc; use rstest::rstest; + use stackable_operator::time::Duration; use std::collections::{BTreeMap, HashMap}; #[test] @@ -1206,6 +1210,7 @@ mod tests { }, volume_mounts: Default::default(), affinity: StackableAffinity::default(), + requested_secret_lifetime: Some(Duration::from_days_unchecked(1)), }; let mut props = BTreeMap::new(); @@ -1250,6 +1255,7 @@ mod tests { }, volume_mounts: Default::default(), affinity: StackableAffinity::default(), + requested_secret_lifetime: Some(Duration::from_days_unchecked(1)), }; let mut props = BTreeMap::new(); diff --git a/rust/crd/src/logdir.rs b/rust/crd/src/logdir.rs index 6aea7deb..502466ec 100644 --- a/rust/crd/src/logdir.rs +++ b/rust/crd/src/logdir.rs @@ -16,6 +16,7 @@ use stackable_operator::{ secret_class::SecretClassVolume, }, k8s_openapi::api::core::v1::{Volume, VolumeMount}, + time::Duration, }; use std::collections::BTreeMap; @@ -103,9 +104,9 @@ impl ResolvedLogDir { } } - pub fn volumes(&self) -> Result, Error> { + pub fn volumes(&self, requested_secret_lifetime: &Duration) -> Result, Error> { match self { - ResolvedLogDir::S3(s3_log_dir) => s3_log_dir.volumes(), + ResolvedLogDir::S3(s3_log_dir) => s3_log_dir.volumes(requested_secret_lifetime), ResolvedLogDir::Custom(_) => Ok(vec![]), } } @@ -248,7 +249,7 @@ impl S3LogDir { ) } - pub fn volumes(&self) -> Result, Error> { + pub fn volumes(&self, requested_secret_lifetime: &Duration) -> Result, Error> { let mut volumes: Vec = self.credentials_volume()?.into_iter().collect(); if let Some(secret_name) = tlscerts::tls_secret_name(&self.bucket.connection) { @@ -257,6 +258,7 @@ impl S3LogDir { .ephemeral( SecretOperatorVolumeSourceBuilder::new(secret_name) .with_format(SecretFormat::TlsPkcs12) + .with_auto_tls_cert_lifetime(*requested_secret_lifetime) .build() .context(TlsCertSecretClassVolumeBuildSnafu)?, ) diff --git a/rust/crd/src/roles.rs b/rust/crd/src/roles.rs index ef4fd052..77c3d57b 100644 --- a/rust/crd/src/roles.rs +++ b/rust/crd/src/roles.rs @@ -36,6 +36,7 @@ use stackable_operator::{ product_config_utils::Configuration, product_logging::{self, spec::Logging}, schemars::{self, JsonSchema}, + time::Duration, utils::crds::raw_object_list_schema, }; use strum::{Display, EnumIter}; @@ -123,9 +124,17 @@ pub struct RoleConfig { #[fragment_attrs(serde(default))] pub affinity: StackableAffinity, + + /// Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. + /// This can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. + #[fragment_attrs(serde(default))] + pub requested_secret_lifetime: Option, } impl RoleConfig { + // Auto TLS certificate lifetime + const DEFAULT_SECRET_LIFETIME: Duration = Duration::from_days_unchecked(7); + pub fn default_config() -> RoleConfigFragment { RoleConfigFragment { resources: ResourcesFragment { @@ -142,6 +151,7 @@ impl RoleConfig { logging: product_logging::spec::default_logging(), volume_mounts: Some(VolumeMounts::default()), affinity: Default::default(), + requested_secret_lifetime: Some(Self::DEFAULT_SECRET_LIFETIME), } } pub fn volume_mounts( @@ -206,9 +216,17 @@ pub struct SubmitConfig { pub resources: Resources, #[fragment_attrs(serde(default, flatten))] pub volume_mounts: Option, + + /// Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. + /// This can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate. + #[fragment_attrs(serde(default))] + pub requested_secret_lifetime: Option, } impl SubmitConfig { + // Auto TLS certificate lifetime + const DEFAULT_SECRET_LIFETIME: Duration = Duration::from_days_unchecked(7); + pub fn default_config() -> SubmitConfigFragment { SubmitConfigFragment { resources: ResourcesFragment { @@ -223,6 +241,7 @@ impl SubmitConfig { storage: SparkStorageConfigFragment {}, }, volume_mounts: Some(VolumeMounts::default()), + requested_secret_lifetime: Some(Self::DEFAULT_SECRET_LIFETIME), } } } diff --git a/rust/operator-binary/src/history/history_controller.rs b/rust/operator-binary/src/history/history_controller.rs index 04dce3b8..5149bf4d 100644 --- a/rust/operator-binary/src/history/history_controller.rs +++ b/rust/operator-binary/src/history/history_controller.rs @@ -65,6 +65,9 @@ use strum::{EnumDiscriminants, IntoStaticStr}; #[strum_discriminants(derive(IntoStaticStr))] #[allow(clippy::enum_variant_names)] pub enum Error { + #[snafu(display("missing secret lifetime"))] + MissingSecretLifetime, + #[snafu(display("object has no namespace"))] ObjectHasNoNamespace, @@ -444,7 +447,7 @@ fn build_stateful_set( resolved_product_image: &ResolvedProductImage, rolegroupref: &RoleGroupRef, log_dir: &ResolvedLogDir, - config: &HistoryConfig, + merged_config: &HistoryConfig, serviceaccount: &ServiceAccount, ) -> Result { let log_config_map = if let Some(ContainerLogConfig { @@ -452,7 +455,7 @@ fn build_stateful_set( Some(ContainerLogConfigChoice::Custom(CustomContainerLogConfig { custom: ConfigMapLogConfig { config_map }, })), - }) = config + }) = merged_config .logging .containers .get(&SparkHistoryServerContainer::SparkHistory) @@ -473,6 +476,9 @@ fn build_stateful_set( let mut pb = PodBuilder::new(); + let requested_secret_lifetime = merged_config + .requested_secret_lifetime + .context(MissingSecretLifetimeSnafu)?; pb.service_account_name(serviceaccount.name_unchecked()) .metadata(metadata) .image_pull_secrets_from_product_image(resolved_product_image) @@ -497,7 +503,11 @@ fn build_stateful_set( .build(), ) .context(AddVolumeSnafu)? - .add_volumes(log_dir.volumes().context(CreateLogDirVolumesSpecSnafu)?) + .add_volumes( + log_dir + .volumes(&requested_secret_lifetime) + .context(CreateLogDirVolumesSpecSnafu)?, + ) .context(AddVolumeSnafu)? .security_context(PodSecurityContext { run_as_user: Some(SPARK_UID), @@ -516,7 +526,7 @@ fn build_stateful_set( let container = ContainerBuilder::new(container_name) .context(InvalidContainerNameSnafu)? .image_from_product_image(resolved_product_image) - .resources(config.resources.clone().into()) + .resources(merged_config.resources.clone().into()) .command(vec!["/bin/bash".to_string()]) .args(command_args(log_dir)) .add_container_port("http", 18080) @@ -533,13 +543,13 @@ fn build_stateful_set( .build(); pb.add_container(container); - if config.logging.enable_vector_agent { + if merged_config.logging.enable_vector_agent { pb.add_container( vector_container( resolved_product_image, VOLUME_MOUNT_NAME_CONFIG, VOLUME_MOUNT_NAME_LOG, - config + merged_config .logging .containers .get(&SparkHistoryServerContainer::Vector), diff --git a/rust/operator-binary/src/spark_k8s_controller.rs b/rust/operator-binary/src/spark_k8s_controller.rs index 72dff776..ccb9eb84 100644 --- a/rust/operator-binary/src/spark_k8s_controller.rs +++ b/rust/operator-binary/src/spark_k8s_controller.rs @@ -65,6 +65,9 @@ use strum::{EnumDiscriminants, IntoStaticStr}; #[strum_discriminants(derive(IntoStaticStr))] #[allow(clippy::enum_variant_names)] pub enum Error { + #[snafu(display("missing secret lifetime"))] + MissingSecretLifetime, + #[snafu(display("object has no namespace"))] ObjectHasNoNamespace, @@ -667,7 +670,7 @@ fn pod_template( fn pod_template_config_map( spark_application: &SparkApplication, role: SparkApplicationRole, - config: &RoleConfig, + merged_config: &RoleConfig, product_config: Option<&HashMap>>, env: &[EnvVar], s3conn: &Option, @@ -682,15 +685,23 @@ fn pod_template_config_map( Some(ContainerLogConfigChoice::Custom(CustomContainerLogConfig { custom: ConfigMapLogConfig { config_map }, })), - }) = config.logging.containers.get(&SparkContainer::Spark) + }) = merged_config.logging.containers.get(&SparkContainer::Spark) { config_map.into() } else { cm_name.clone() }; + let requested_secret_lifetime = merged_config + .requested_secret_lifetime + .context(MissingSecretLifetimeSnafu)?; let mut volumes = spark_application - .volumes(s3conn, logdir, Some(&log_config_map)) + .volumes( + s3conn, + logdir, + Some(&log_config_map), + &requested_secret_lifetime, + ) .context(CreateVolumesSnafu)?; volumes.push( VolumeBuilder::new(VOLUME_MOUNT_NAME_CONFIG) @@ -701,7 +712,7 @@ fn pod_template_config_map( let template = pod_template( spark_application, role.clone(), - config, + merged_config, volumes.as_ref(), env, s3conn, @@ -737,7 +748,7 @@ fn pod_template_config_map( role_group: String::new(), }, vector_aggregator_address, - &config.logging, + &merged_config.logging, SparkContainer::Spark, SparkContainer::Vector, &mut cm_builder, @@ -881,9 +892,12 @@ fn spark_job( ) .build(), ]; + let requested_secret_lifetime = job_config + .requested_secret_lifetime + .context(MissingSecretLifetimeSnafu)?; volumes.extend( spark_application - .volumes(s3conn, logdir, None) + .volumes(s3conn, logdir, None, &requested_secret_lifetime) .context(CreateVolumesSnafu)?, );