Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file.

## [Unreleased]

### Added

- The lifetime of auto generated TLS certificates is now configurable with the role and roleGroup
config property `requestedSecretLifetime`. This helps reducing frequent Pod restarts ([#501]).

[#501]: https://github.com/stackabletech/spark-k8s-operator/pull/501

## [24.11.0] - 2024-11-18

### Added
Expand Down
21 changes: 5 additions & 16 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
serde_yaml = "0.9"
snafu = "0.8"
stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", tag = "stackable-operator-0.82.0" }
stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", tag = "stackable-operator-0.83.0" }
strum = { version = "0.26", features = ["derive"] }
tokio = { version = "1.39", features = ["full"] }
tracing = "0.1"
Expand Down
20 changes: 20 additions & 0 deletions deploy/helm/spark-k8s-operator/crds/crds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,10 @@ spec:
nullable: true
type: boolean
type: object
requestedSecretLifetime:
description: Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. That this can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate.
nullable: true
type: string
resources:
default:
cpu:
Expand Down Expand Up @@ -463,6 +467,10 @@ spec:
nullable: true
type: boolean
type: object
requestedSecretLifetime:
description: Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. That this can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate.
nullable: true
type: string
resources:
default:
cpu:
Expand Down Expand Up @@ -549,6 +557,10 @@ spec:
config:
default: {}
properties:
requestedSecretLifetime:
description: Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. This can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate.
nullable: true
type: string
resources:
default:
cpu:
Expand Down Expand Up @@ -1347,6 +1359,10 @@ spec:
nullable: true
type: boolean
type: object
requestedSecretLifetime:
description: Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. This can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate.
nullable: true
type: string
resources:
default:
cpu:
Expand Down Expand Up @@ -1563,6 +1579,10 @@ spec:
nullable: true
type: boolean
type: object
requestedSecretLifetime:
description: Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. This can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate.
nullable: true
type: string
resources:
default:
cpu:
Expand Down
9 changes: 9 additions & 0 deletions rust/crd/src/history.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ use stackable_operator::{
product_logging::{self, spec::Logging},
role_utils::{Role, RoleGroup, RoleGroupRef},
schemars::{self, JsonSchema},
time::Duration,
};
use std::collections::{BTreeMap, HashMap};
use strum::{Display, EnumIter};
Expand Down Expand Up @@ -400,9 +401,16 @@ pub struct HistoryConfig {
pub logging: Logging<SparkHistoryServerContainer>,
#[fragment_attrs(serde(default))]
pub affinity: StackableAffinity,
/// Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`.
/// This can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate.
#[fragment_attrs(serde(default))]
pub requested_secret_lifetime: Option<Duration>,
}

impl HistoryConfig {
// Auto TLS certificate lifetime
const DEFAULT_HISTORY_SECRET_LIFETIME: Duration = Duration::from_days_unchecked(7);

fn default_config(cluster_name: &str) -> HistoryConfigFragment {
HistoryConfigFragment {
cleaner: None,
Expand All @@ -419,6 +427,7 @@ impl HistoryConfig {
},
logging: product_logging::spec::default_logging(),
affinity: history_affinity(cluster_name),
requested_secret_lifetime: Some(Self::DEFAULT_HISTORY_SECRET_LIFETIME),
}
}
}
Expand Down
6 changes: 6 additions & 0 deletions rust/crd/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use logdir::ResolvedLogDir;
use product_config::{types::PropertyNameKind, ProductConfigManager};
use serde::{Deserialize, Serialize};
use snafu::{OptionExt, ResultExt, Snafu};
use stackable_operator::time::Duration;
use stackable_operator::{
builder::pod::volume::{
SecretFormat, SecretOperatorVolumeSourceBuilder, SecretOperatorVolumeSourceBuilderError,
Expand Down Expand Up @@ -281,6 +282,7 @@ impl SparkApplication {
s3conn: &Option<S3ConnectionSpec>,
logdir: &Option<ResolvedLogDir>,
log_config_map: Option<&str>,
requested_secret_lifetime: &Duration,
) -> Result<Vec<Volume>, Error> {
let mut result: Vec<Volume> = self.spec.volumes.clone();

Expand Down Expand Up @@ -356,6 +358,7 @@ impl SparkApplication {
.ephemeral(
SecretOperatorVolumeSourceBuilder::new(cert_secret)
.with_format(SecretFormat::TlsPkcs12)
.with_auto_tls_cert_lifetime(*requested_secret_lifetime)
.build()
.context(TlsCertSecretClassVolumeBuildSnafu)?,
)
Expand Down Expand Up @@ -1068,6 +1071,7 @@ mod tests {

use indoc::indoc;
use rstest::rstest;
use stackable_operator::time::Duration;
use std::collections::{BTreeMap, HashMap};

#[test]
Expand Down Expand Up @@ -1206,6 +1210,7 @@ mod tests {
},
volume_mounts: Default::default(),
affinity: StackableAffinity::default(),
requested_secret_lifetime: Some(Duration::from_days_unchecked(1)),
};

let mut props = BTreeMap::new();
Expand Down Expand Up @@ -1250,6 +1255,7 @@ mod tests {
},
volume_mounts: Default::default(),
affinity: StackableAffinity::default(),
requested_secret_lifetime: Some(Duration::from_days_unchecked(1)),
};

let mut props = BTreeMap::new();
Expand Down
8 changes: 5 additions & 3 deletions rust/crd/src/logdir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use stackable_operator::{
secret_class::SecretClassVolume,
},
k8s_openapi::api::core::v1::{Volume, VolumeMount},
time::Duration,
};
use std::collections::BTreeMap;

Expand Down Expand Up @@ -103,9 +104,9 @@ impl ResolvedLogDir {
}
}

pub fn volumes(&self) -> Result<Vec<Volume>, Error> {
pub fn volumes(&self, requested_secret_lifetime: &Duration) -> Result<Vec<Volume>, Error> {
match self {
ResolvedLogDir::S3(s3_log_dir) => s3_log_dir.volumes(),
ResolvedLogDir::S3(s3_log_dir) => s3_log_dir.volumes(requested_secret_lifetime),
ResolvedLogDir::Custom(_) => Ok(vec![]),
}
}
Expand Down Expand Up @@ -248,7 +249,7 @@ impl S3LogDir {
)
}

pub fn volumes(&self) -> Result<Vec<Volume>, Error> {
pub fn volumes(&self, requested_secret_lifetime: &Duration) -> Result<Vec<Volume>, Error> {
let mut volumes: Vec<Volume> = self.credentials_volume()?.into_iter().collect();

if let Some(secret_name) = tlscerts::tls_secret_name(&self.bucket.connection) {
Expand All @@ -257,6 +258,7 @@ impl S3LogDir {
.ephemeral(
SecretOperatorVolumeSourceBuilder::new(secret_name)
.with_format(SecretFormat::TlsPkcs12)
.with_auto_tls_cert_lifetime(*requested_secret_lifetime)
.build()
.context(TlsCertSecretClassVolumeBuildSnafu)?,
)
Expand Down
18 changes: 18 additions & 0 deletions rust/crd/src/roles.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ use stackable_operator::{
product_config_utils::Configuration,
product_logging::{self, spec::Logging},
schemars::{self, JsonSchema},
time::Duration,
utils::crds::raw_object_list_schema,
};
use strum::{Display, EnumIter};
Expand Down Expand Up @@ -123,9 +124,17 @@ pub struct RoleConfig {

#[fragment_attrs(serde(default))]
pub affinity: StackableAffinity,

/// Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`.
/// That this can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate.
#[fragment_attrs(serde(default))]
pub requested_secret_lifetime: Option<Duration>,
}

impl RoleConfig {
// Auto TLS certificate lifetime
const DEFAULT_SECRET_LIFETIME: Duration = Duration::from_days_unchecked(7);

pub fn default_config() -> RoleConfigFragment {
RoleConfigFragment {
resources: ResourcesFragment {
Expand All @@ -142,6 +151,7 @@ impl RoleConfig {
logging: product_logging::spec::default_logging(),
volume_mounts: Some(VolumeMounts::default()),
affinity: Default::default(),
requested_secret_lifetime: Some(Self::DEFAULT_SECRET_LIFETIME),
}
}
pub fn volume_mounts(
Expand Down Expand Up @@ -206,9 +216,16 @@ pub struct SubmitConfig {
pub resources: Resources<SparkStorageConfig, NoRuntimeLimits>,
#[fragment_attrs(serde(default, flatten))]
pub volume_mounts: Option<VolumeMounts>,
/// Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`.
/// This can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate.
#[fragment_attrs(serde(default))]
pub requested_secret_lifetime: Option<Duration>,
}

impl SubmitConfig {
// Auto TLS certificate lifetime
const DEFAULT_SECRET_LIFETIME: Duration = Duration::from_days_unchecked(7);

pub fn default_config() -> SubmitConfigFragment {
SubmitConfigFragment {
resources: ResourcesFragment {
Expand All @@ -223,6 +240,7 @@ impl SubmitConfig {
storage: SparkStorageConfigFragment {},
},
volume_mounts: Some(VolumeMounts::default()),
requested_secret_lifetime: Some(Self::DEFAULT_SECRET_LIFETIME),
}
}
}
Expand Down
22 changes: 16 additions & 6 deletions rust/operator-binary/src/history/history_controller.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ use strum::{EnumDiscriminants, IntoStaticStr};
#[strum_discriminants(derive(IntoStaticStr))]
#[allow(clippy::enum_variant_names)]
pub enum Error {
#[snafu(display("missing secret lifetime"))]
MissingSecretLifetime,

#[snafu(display("object has no namespace"))]
ObjectHasNoNamespace,

Expand Down Expand Up @@ -444,15 +447,15 @@ fn build_stateful_set(
resolved_product_image: &ResolvedProductImage,
rolegroupref: &RoleGroupRef<SparkHistoryServer>,
log_dir: &ResolvedLogDir,
config: &HistoryConfig,
merged_config: &HistoryConfig,
serviceaccount: &ServiceAccount,
) -> Result<StatefulSet, Error> {
let log_config_map = if let Some(ContainerLogConfig {
choice:
Some(ContainerLogConfigChoice::Custom(CustomContainerLogConfig {
custom: ConfigMapLogConfig { config_map },
})),
}) = config
}) = merged_config
.logging
.containers
.get(&SparkHistoryServerContainer::SparkHistory)
Expand All @@ -473,6 +476,9 @@ fn build_stateful_set(

let mut pb = PodBuilder::new();

let requested_secret_lifetime = merged_config
.requested_secret_lifetime
.context(MissingSecretLifetimeSnafu)?;
pb.service_account_name(serviceaccount.name_unchecked())
.metadata(metadata)
.image_pull_secrets_from_product_image(resolved_product_image)
Expand All @@ -497,7 +503,11 @@ fn build_stateful_set(
.build(),
)
.context(AddVolumeSnafu)?
.add_volumes(log_dir.volumes().context(CreateLogDirVolumesSpecSnafu)?)
.add_volumes(
log_dir
.volumes(&requested_secret_lifetime)
.context(CreateLogDirVolumesSpecSnafu)?,
)
.context(AddVolumeSnafu)?
.security_context(PodSecurityContext {
run_as_user: Some(SPARK_UID),
Expand All @@ -516,7 +526,7 @@ fn build_stateful_set(
let container = ContainerBuilder::new(container_name)
.context(InvalidContainerNameSnafu)?
.image_from_product_image(resolved_product_image)
.resources(config.resources.clone().into())
.resources(merged_config.resources.clone().into())
.command(vec!["/bin/bash".to_string()])
.args(command_args(log_dir))
.add_container_port("http", 18080)
Expand All @@ -533,13 +543,13 @@ fn build_stateful_set(
.build();
pb.add_container(container);

if config.logging.enable_vector_agent {
if merged_config.logging.enable_vector_agent {
pb.add_container(
vector_container(
resolved_product_image,
VOLUME_MOUNT_NAME_CONFIG,
VOLUME_MOUNT_NAME_LOG,
config
merged_config
.logging
.containers
.get(&SparkHistoryServerContainer::Vector),
Expand Down
Loading
Loading