diff --git a/CHANGELOG.md b/CHANGELOG.md index 7ee49bf4..e7220756 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,12 +10,16 @@ All notable changes to this project will be documented in this file. - BREAKING: The file log directory was set by `HIVE_OPERATOR_LOG_DIRECTORY`, and is now set by `ROLLING_LOGS` (or via `--rolling-logs `). - Replace stackable-operator `print_startup_string` with `tracing::info!` with fields. +- BREAKING: Inject the vector aggregator address into the vector config using the env var `VECTOR_AGGREGATOR_ADDRESS` instead + of having the operator write it to the vector config ([#589]). ### Fixed - Use `json` file extension for log files ([#591]). +- Fix a bug where changes to ConfigMaps that are referenced in the HiveCluster spec didn't trigger a reconciliation ([#589]). [#585]: https://github.com/stackabletech/hive-operator/pull/585 +[#589]: https://github.com/stackabletech/hdfs-operator/pull/589 [#591]: https://github.com/stackabletech/hive-operator/pull/591 ## [25.3.0] - 2025-03-21 diff --git a/Cargo.lock b/Cargo.lock index 6f40d3df..cbcd0b2a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2670,8 +2670,8 @@ dependencies = [ [[package]] name = "stackable-operator" -version = "0.89.1" -source = "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.89.1#cd73728af410c52972b9a9a3ba1302bcdb574d04" +version = "0.90.0" +source = "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.90.0#ea063b4595caa20c82d37c595487c76476c9ab10" dependencies = [ "chrono", "clap", @@ -2706,7 +2706,7 @@ dependencies = [ [[package]] name = "stackable-operator-derive" version = "0.3.1" -source = "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.89.1#cd73728af410c52972b9a9a3ba1302bcdb574d04" +source = "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.90.0#ea063b4595caa20c82d37c595487c76476c9ab10" dependencies = [ "darling", "proc-macro2", @@ -2717,7 +2717,7 @@ dependencies = [ [[package]] name = "stackable-shared" version = "0.0.1" -source = "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.89.1#cd73728af410c52972b9a9a3ba1302bcdb574d04" +source = "git+https://github.com/stackabletech/operator-rs.git?tag=stackable-operator-0.90.0#ea063b4595caa20c82d37c595487c76476c9ab10" dependencies = [ "kube", "semver", diff --git a/Cargo.toml b/Cargo.toml index 4a9d90ab..7d5e23d2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,7 +11,7 @@ repository = "https://github.com/stackabletech/hive-operator" [workspace.dependencies] product-config = { git = "https://github.com/stackabletech/product-config.git", tag = "0.7.0" } -stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", tag = "stackable-operator-0.89.1" } +stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", tag = "stackable-operator-0.90.0" } stackable-telemetry = { git = "https://github.com/stackabletech/operator-rs.git", tag = "stackable-telemetry-0.4.0" } stackable-versioned = { git = "https://github.com/stackabletech/operator-rs.git", features = ["k8s"], tag = "stackable-versioned-0.7.1" } @@ -34,4 +34,5 @@ tokio = { version = "1.40", features = ["full"] } tracing = "0.1" # [patch."https://github.com/stackabletech/operator-rs.git"] +# stackable-operator = { path = "../operator-rs/crates/stackable-operator" } # stackable-operator = { git = "https://github.com/stackabletech//operator-rs.git", branch = "main" } diff --git a/rust/operator-binary/src/controller.rs b/rust/operator-binary/src/controller.rs index d87cef34..d6f9160c 100644 --- a/rust/operator-binary/src/controller.rs +++ b/rust/operator-binary/src/controller.rs @@ -94,7 +94,7 @@ use crate::{ kerberos_container_start_commands, }, operations::{graceful_shutdown::add_graceful_shutdown_config, pdb::add_pdbs}, - product_logging::{extend_role_group_config_map, resolve_vector_aggregator_address}, + product_logging::extend_role_group_config_map, }; pub const HIVE_CONTROLLER_NAME: &str = "hivecluster"; @@ -228,10 +228,8 @@ pub enum Error { source: stackable_operator::cluster_resources::Error, }, - #[snafu(display("failed to resolve the Vector aggregator address"))] - ResolveVectorAggregatorAddress { - source: crate::product_logging::Error, - }, + #[snafu(display("vector agent is enabled but vector aggregator ConfigMap is missing"))] + VectorAggregatorConfigMapMissing, #[snafu(display("failed to add the logging configuration to the ConfigMap [{cm_name}]"))] InvalidLoggingConfig { @@ -439,10 +437,6 @@ pub async fn reconcile_hive( .await .context(ApplyRoleServiceSnafu)?; - let vector_aggregator_address = resolve_vector_aggregator_address(hive, client) - .await - .context(ResolveVectorAggregatorAddressSnafu)?; - let mut ss_cond_builder = StatefulSetConditionBuilder::default(); for (rolegroup_name, rolegroup_config) in metastore_config.iter() { @@ -461,7 +455,6 @@ pub async fn reconcile_hive( rolegroup_config, s3_connection_spec.as_ref(), &config, - vector_aggregator_address.as_deref(), &client.kubernetes_cluster_info, )?; let rg_statefulset = build_metastore_rolegroup_statefulset( @@ -604,7 +597,6 @@ fn build_metastore_rolegroup_config_map( role_group_config: &HashMap>, s3_connection_spec: Option<&S3ConnectionSpec>, merged_config: &MetaStoreConfig, - vector_aggregator_address: Option<&str>, cluster_info: &KubernetesClusterInfo, ) -> Result { let mut hive_site_data = String::new(); @@ -718,15 +710,11 @@ fn build_metastore_rolegroup_config_map( cm_builder.add_data(CORE_SITE_XML, to_hadoop_xml(data.iter())); } - extend_role_group_config_map( - rolegroup, - vector_aggregator_address, - &merged_config.logging, - &mut cm_builder, - ) - .context(InvalidLoggingConfigSnafu { - cm_name: rolegroup.object_name(), - })?; + extend_role_group_config_map(rolegroup, &merged_config.logging, &mut cm_builder).context( + InvalidLoggingConfigSnafu { + cm_name: rolegroup.object_name(), + }, + )?; cm_builder .build() @@ -1049,21 +1037,29 @@ fn build_metastore_rolegroup_statefulset( // N.B. the vector container should *follow* the hive container so that the hive one is the // default, is started first and can provide any dependencies that vector expects if merged_config.logging.enable_vector_agent { - pod_builder.add_container( - product_logging::framework::vector_container( - resolved_product_image, - STACKABLE_CONFIG_MOUNT_DIR_NAME, - STACKABLE_LOG_DIR_NAME, - merged_config.logging.containers.get(&Container::Vector), - ResourceRequirementsBuilder::new() - .with_cpu_request("250m") - .with_cpu_limit("500m") - .with_memory_request("128Mi") - .with_memory_limit("128Mi") - .build(), - ) - .context(BuildVectorContainerSnafu)?, - ); + match &hive.spec.cluster_config.vector_aggregator_config_map_name { + Some(vector_aggregator_config_map_name) => { + pod_builder.add_container( + product_logging::framework::vector_container( + resolved_product_image, + STACKABLE_CONFIG_MOUNT_DIR_NAME, + STACKABLE_LOG_DIR_NAME, + merged_config.logging.containers.get(&Container::Vector), + ResourceRequirementsBuilder::new() + .with_cpu_request("250m") + .with_cpu_limit("500m") + .with_memory_request("128Mi") + .with_memory_limit("128Mi") + .build(), + vector_aggregator_config_map_name, + ) + .context(BuildVectorContainerSnafu)?, + ); + } + None => { + VectorAggregatorConfigMapMissingSnafu.fail()?; + } + } } let mut pod_template = pod_builder.build_template(); diff --git a/rust/operator-binary/src/main.rs b/rust/operator-binary/src/main.rs index 31eaaf51..c45342db 100644 --- a/rust/operator-binary/src/main.rs +++ b/rust/operator-binary/src/main.rs @@ -19,10 +19,12 @@ use stackable_operator::{ core::v1::{ConfigMap, Service}, }, kube::{ + ResourceExt, core::DeserializeGuard, runtime::{ Controller, events::{Recorder, Reporter}, + reflector::ObjectRef, watcher, }, }, @@ -128,51 +130,78 @@ async fn main() -> anyhow::Result<()> { instance: None, })); - Controller::new( + let hive_controller = Controller::new( watch_namespace.get_api::>(&client), watcher::Config::default(), - ) - .owns( - watch_namespace.get_api::(&client), - watcher::Config::default(), - ) - .owns( - watch_namespace.get_api::(&client), - watcher::Config::default(), - ) - .owns( - watch_namespace.get_api::(&client), - watcher::Config::default(), - ) - .shutdown_on_signal() - .run( - controller::reconcile_hive, - controller::error_policy, - Arc::new(controller::Ctx { - client: client.clone(), - product_config, - }), - ) - // We can let the reporting happen in the background - .for_each_concurrent( - 16, // concurrency limit - |result| { - // The event_recorder needs to be shared across all invocations, so that - // events are correctly aggregated - let event_recorder = event_recorder.clone(); - async move { - report_controller_reconciled( - &event_recorder, - HIVE_FULL_CONTROLLER_NAME, - &result, - ) - .await; - } - }, - ) - .await; + ); + let config_map_store = hive_controller.store(); + hive_controller + .owns( + watch_namespace.get_api::(&client), + watcher::Config::default(), + ) + .owns( + watch_namespace.get_api::(&client), + watcher::Config::default(), + ) + .owns( + watch_namespace.get_api::(&client), + watcher::Config::default(), + ) + .shutdown_on_signal() + .watches( + watch_namespace.get_api::>(&client), + watcher::Config::default(), + move |config_map| { + config_map_store + .state() + .into_iter() + .filter(move |hive| references_config_map(hive, &config_map)) + .map(|hive| ObjectRef::from_obj(&*hive)) + }, + ) + .run( + controller::reconcile_hive, + controller::error_policy, + Arc::new(controller::Ctx { + client: client.clone(), + product_config, + }), + ) + // We can let the reporting happen in the background + .for_each_concurrent( + 16, // concurrency limit + |result| { + // The event_recorder needs to be shared across all invocations, so that + // events are correctly aggregated + let event_recorder = event_recorder.clone(); + async move { + report_controller_reconciled( + &event_recorder, + HIVE_FULL_CONTROLLER_NAME, + &result, + ) + .await; + } + }, + ) + .await; } } Ok(()) } + +fn references_config_map( + hive: &DeserializeGuard, + config_map: &DeserializeGuard, +) -> bool { + let Ok(hive) = &hive.0 else { + return false; + }; + + match &hive.spec.cluster_config.hdfs { + Some(hdfs_connection) => hdfs_connection.config_map == config_map.name_any(), + None => false, + } +} diff --git a/rust/operator-binary/src/product_logging.rs b/rust/operator-binary/src/product_logging.rs index 9e6da94f..30e9318b 100644 --- a/rust/operator-binary/src/product_logging.rs +++ b/rust/operator-binary/src/product_logging.rs @@ -1,9 +1,6 @@ -use snafu::{OptionExt, ResultExt, Snafu}; +use snafu::Snafu; use stackable_operator::{ builder::configmap::ConfigMapBuilder, - client::Client, - k8s_openapi::api::core::v1::ConfigMap, - kube::ResourceExt, memory::BinaryMultiple, product_logging::{ self, @@ -33,54 +30,16 @@ pub enum Error { }, #[snafu(display("crd validation failure"))] CrdValidationFailure { source: crate::crd::Error }, - #[snafu(display("vectorAggregatorConfigMapName must be set"))] - MissingVectorAggregatorAddress, } type Result = std::result::Result; -const VECTOR_AGGREGATOR_CM_ENTRY: &str = "ADDRESS"; const CONSOLE_CONVERSION_PATTERN: &str = "%d{ISO8601} %5p [%t] %c{2}: %m%n"; const HIVE_LOG_FILE: &str = "hive.log4j2.xml"; -/// Return the address of the Vector aggregator if the corresponding ConfigMap name is given in the -/// cluster spec -pub async fn resolve_vector_aggregator_address( - hive: &v1alpha1::HiveCluster, - client: &Client, -) -> Result> { - let vector_aggregator_address = if let Some(vector_aggregator_config_map_name) = - &hive.spec.cluster_config.vector_aggregator_config_map_name - { - let vector_aggregator_address = client - .get::( - vector_aggregator_config_map_name, - hive.namespace() - .as_deref() - .context(ObjectHasNoNamespaceSnafu)?, - ) - .await - .context(ConfigMapNotFoundSnafu { - cm_name: vector_aggregator_config_map_name.to_string(), - })? - .data - .and_then(|mut data| data.remove(VECTOR_AGGREGATOR_CM_ENTRY)) - .context(MissingConfigMapEntrySnafu { - entry: VECTOR_AGGREGATOR_CM_ENTRY, - cm_name: vector_aggregator_config_map_name.to_string(), - })?; - Some(vector_aggregator_address) - } else { - None - }; - - Ok(vector_aggregator_address) -} - /// Extend the role group ConfigMap with logging and Vector configurations pub fn extend_role_group_config_map( rolegroup: &RoleGroupRef, - vector_aggregator_address: Option<&str>, logging: &Logging, cm_builder: &mut ConfigMapBuilder, ) -> Result<()> { @@ -118,11 +77,7 @@ pub fn extend_role_group_config_map( if logging.enable_vector_agent { cm_builder.add_data( product_logging::framework::VECTOR_CONFIG_FILE, - product_logging::framework::create_vector_config( - rolegroup, - vector_aggregator_address.context(MissingVectorAggregatorAddressSnafu)?, - vector_log_config, - ), + product_logging::framework::create_vector_config(rolegroup, vector_log_config), ); }