@@ -2,8 +2,8 @@ use std::collections::{BTreeMap, HashMap};
22
33use snafu:: { OptionExt , ResultExt , Snafu } ;
44use stackable_operator:: {
5- builder,
65 builder:: {
6+ self ,
77 configmap:: ConfigMapBuilder ,
88 meta:: ObjectMetaBuilder ,
99 pod:: {
@@ -17,11 +17,11 @@ use stackable_operator::{
1717 api:: {
1818 apps:: v1:: { Deployment , DeploymentSpec } ,
1919 core:: v1:: {
20- ConfigMap , EnvVar , PodSecurityContext , Service , ServiceAccount , ServicePort ,
21- ServiceSpec ,
20+ ConfigMap , EnvVar , HTTPGetAction , PodSecurityContext , Probe , Service ,
21+ ServiceAccount , ServicePort , ServiceSpec ,
2222 } ,
2323 } ,
24- apimachinery:: pkg:: apis:: meta:: v1:: LabelSelector ,
24+ apimachinery:: pkg:: { apis:: meta:: v1:: LabelSelector , util :: intstr :: IntOrString } ,
2525 } ,
2626 kube:: { ResourceExt , runtime:: reflector:: ObjectRef } ,
2727 kvp:: { Label , Labels } ,
@@ -253,7 +253,9 @@ pub fn build_deployment(
253253 . add_volume_mount ( VOLUME_MOUNT_NAME_CONFIG , VOLUME_MOUNT_PATH_CONFIG )
254254 . context ( AddVolumeMountSnafu ) ?
255255 . add_volume_mount ( VOLUME_MOUNT_NAME_LOG , VOLUME_MOUNT_PATH_LOG )
256- . context ( AddVolumeMountSnafu ) ?;
256+ . context ( AddVolumeMountSnafu ) ?
257+ . readiness_probe ( probe ( ) )
258+ . liveness_probe ( probe ( ) ) ;
257259
258260 // Add custom log4j config map volumes if configured
259261 if let Some ( cm_name) = config. log_config_map ( ) {
@@ -339,10 +341,18 @@ pub fn build_service(
339341 app_version_label : & str ,
340342 service_cluster_ip : Option < String > ,
341343) -> Result < Service , Error > {
342- let ( service_name, service_type) = match service_cluster_ip. clone ( ) {
344+ let ( service_name, service_type, publish_not_ready_addresses) = match service_cluster_ip. clone ( )
345+ {
343346 Some ( _) => (
347+ // These are the properties of the headless driver service used for the internal
348+ // communication with the executors as recommended by the Spark docs.
349+ //
350+ // The flag `publish_not_ready_addresses` *must* be `true` to allow for readiness
351+ // probes. Without it, the driver runs into a deadlock beacuse the Pod cannot become
352+ // "ready" until the Service is "ready" and vice versa.
344353 object_name ( & scs. name_any ( ) , SparkConnectRole :: Server ) ,
345354 "ClusterIP" . to_string ( ) ,
355+ Some ( true ) ,
346356 ) ,
347357 None => (
348358 format ! (
@@ -351,6 +361,7 @@ pub fn build_service(
351361 SparkConnectRole :: Server
352362 ) ,
353363 scs. spec . cluster_config . listener_class . k8s_service_type ( ) ,
364+ Some ( false ) ,
354365 ) ,
355366 } ;
356367
@@ -393,14 +404,15 @@ pub fn build_service(
393404 } ,
394405 ] ) ,
395406 selector : Some ( selector) ,
407+ publish_not_ready_addresses,
396408 ..ServiceSpec :: default ( )
397409 } ) ,
398410 status : None ,
399411 } )
400412}
401413
402414#[ allow( clippy:: result_large_err) ]
403- pub fn command_args ( user_args : & [ String ] , spark_version : & str ) -> Vec < String > {
415+ pub fn command_args ( user_args : & [ String ] ) -> Vec < String > {
404416 let mut command = vec ! [
405417 // ---------- start containerdebug
406418 format!(
@@ -411,7 +423,6 @@ pub fn command_args(user_args: &[String], spark_version: &str) -> Vec<String> {
411423 "--deploy-mode client" . to_string( ) , // 'cluster' mode not supported
412424 "--master k8s://https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT_HTTPS}"
413425 . to_string( ) ,
414- format!( "--jars /stackable/spark/connect/spark-connect_2.12-{spark_version}.jar" ) ,
415426 format!( "--properties-file {VOLUME_MOUNT_PATH_CONFIG}/{SPARK_DEFAULTS_FILE_NAME}" ) ,
416427 ] ;
417428
@@ -461,6 +472,7 @@ pub fn server_properties(
461472 pi : & ResolvedProductImage ,
462473) -> Result < BTreeMap < String , Option < String > > , Error > {
463474 let spark_image = pi. image . clone ( ) ;
475+ let spark_version = pi. product_version . clone ( ) ;
464476 let service_account_name = service_account. name_unchecked ( ) ;
465477 let namespace = driver_service
466478 . namespace ( )
@@ -498,7 +510,7 @@ pub fn server_properties(
498510 ) ,
499511 (
500512 "spark.driver.extraClassPath" . to_string ( ) ,
501- Some ( "/stackable/spark/extra-jars/*" . to_string ( ) ) ,
513+ Some ( format ! ( "/stackable/spark/extra-jars/*:/stackable/spark/connect/spark-connect_2.12-{spark_version}.jar" ) ) ,
502514 ) ,
503515 ]
504516 . into ( ) ;
@@ -538,3 +550,16 @@ fn server_jvm_args(
538550 name : scs. name_any ( ) ,
539551 } )
540552}
553+
554+ fn probe ( ) -> Probe {
555+ Probe {
556+ http_get : Some ( HTTPGetAction {
557+ port : IntOrString :: Int ( CONNECT_UI_PORT ) ,
558+ scheme : Some ( "HTTP" . to_string ( ) ) ,
559+ path : Some ( "/metrics" . to_string ( ) ) ,
560+ ..Default :: default ( )
561+ } ) ,
562+ failure_threshold : Some ( 10 ) ,
563+ ..Probe :: default ( )
564+ }
565+ }
0 commit comments