diff --git a/rust/operator-binary/src/config/command.rs b/rust/operator-binary/src/config/command.rs index 8887c904..3c25a8be 100644 --- a/rust/operator-binary/src/config/command.rs +++ b/rust/operator-binary/src/config/command.rs @@ -137,6 +137,48 @@ fn broker_start_command( } } +// During a namespace or stacklet delete the Kafka controllers shut down too fast leaving the brokers +// in a bad state. +// Brokers try to connect to controllers before gracefully shutting down but by that time, all +// controllers are already gone. +// The broker pods are then kept alive until the value of `gracefulShutdownTimeout` is reached. +// The environment variable `PRE_STOP_CONTROLLER_SLEEP_SECONDS` delays the termination of the +// controller processes to give the brokers more time to offload data and shutdown gracefully. +// Kubernetes has a built in `pre-stop` hook feature that is not yet generally available on all platforms +// supported by the operator. +const BASH_TRAP_FUNCTIONS: &str = r#" +prepare_signal_handlers() +{ + unset term_child_pid + unset term_kill_needed + trap 'handle_term_signal' TERM +} + +handle_term_signal() +{ + if [ "${term_child_pid}" ]; then + [ -n "$PRE_STOP_CONTROLLER_SLEEP_SECONDS" ] && sleep "$PRE_STOP_CONTROLLER_SLEEP_SECONDS" + kill -TERM "${term_child_pid}" 2>/dev/null + else + term_kill_needed="yes" + fi +} + +wait_for_termination() +{ + set +e + term_child_pid=$1 + if [[ -v term_kill_needed ]]; then + [ -n "$PRE_STOP_CONTROLLER_SLEEP_SECONDS" ] && sleep "$PRE_STOP_CONTROLLER_SLEEP_SECONDS" + kill -TERM "${term_child_pid}" 2>/dev/null + fi + wait ${term_child_pid} 2>/dev/null + trap - TERM + wait ${term_child_pid} 2>/dev/null + set -e +} +"#; + pub fn controller_kafka_container_command( cluster_id: &str, controller_descriptors: Vec, @@ -152,7 +194,7 @@ pub fn controller_kafka_container_command( // - use config-utils for proper replacements? // - should we print the adapted properties file at startup? formatdoc! {" - {COMMON_BASH_TRAP_FUNCTIONS} + {BASH_TRAP_FUNCTIONS} {remove_vector_shutdown_file_command} prepare_signal_handlers containerdebug --output={STACKABLE_LOG_DIR}/containerdebug-state.json --loop & diff --git a/rust/operator-binary/src/resource/statefulset.rs b/rust/operator-binary/src/resource/statefulset.rs index 18bcc04c..b5b758ec 100644 --- a/rust/operator-binary/src/resource/statefulset.rs +++ b/rust/operator-binary/src/resource/statefulset.rs @@ -23,8 +23,8 @@ use stackable_operator::{ apps::v1::{StatefulSet, StatefulSetSpec, StatefulSetUpdateStrategy}, core::v1::{ ConfigMapKeySelector, ConfigMapVolumeSource, ContainerPort, EnvVar, EnvVarSource, - ExecAction, Lifecycle, LifecycleHandler, ObjectFieldSelector, PodSpec, Probe, - ServiceAccount, SleepAction, TCPSocketAction, Volume, + ExecAction, ObjectFieldSelector, PodSpec, Probe, ServiceAccount, TCPSocketAction, + Volume, }, }, apimachinery::pkg::{apis::meta::v1::LabelSelector, util::intstr::IntOrString}, @@ -658,6 +658,7 @@ pub fn build_controller_rolegroup_statefulset( kafka_security, &resolved_product_image.product_version, )]) + .add_env_var("PRE_STOP_CONTROLLER_SLEEP_SECONDS", "5") .add_env_var( "EXTRA_ARGS", kafka_role @@ -756,19 +757,7 @@ pub fn build_controller_rolegroup_statefulset( ) .context(AddVolumesAndVolumeMountsSnafu)?; - // Currently, Controllers shutdown very fast, too fast in most times (flakyness) for the Brokers - // to off load properly. The Brokers then try to connect to any controllers until the - // `gracefulShutdownTimeout` is reached and the pod is finally killed. - // The `pre-stop` hook will delay the kill signal to the Controllers to provide the Brokers more - // time to offload data. - let mut kafka_container = cb_kafka.build(); - kafka_container.lifecycle = Some(Lifecycle { - pre_stop: Some(LifecycleHandler { - sleep: Some(SleepAction { seconds: 10 }), - ..Default::default() - }), - ..Default::default() - }); + let kafka_container = cb_kafka.build(); pod_builder .metadata(metadata)