@@ -39,7 +39,7 @@ use crate::{
3939 } ,
4040 crd:: constants:: {
4141 APP_NAME , JVM_SECURITY_PROPERTIES_FILE , LOG4J2_CONFIG_FILE , MAX_SPARK_LOG_FILES_SIZE ,
42- METRICS_PORT , POD_TEMPLATE_FILE , SPARK_DEFAULTS_FILE_NAME , SPARK_UID ,
42+ METRICS_PROPERTIES_FILE , POD_TEMPLATE_FILE , SPARK_DEFAULTS_FILE_NAME , SPARK_UID ,
4343 VOLUME_MOUNT_NAME_CONFIG , VOLUME_MOUNT_NAME_LOG , VOLUME_MOUNT_NAME_LOG_CONFIG ,
4444 VOLUME_MOUNT_PATH_CONFIG , VOLUME_MOUNT_PATH_LOG , VOLUME_MOUNT_PATH_LOG_CONFIG ,
4545 } ,
@@ -83,6 +83,9 @@ pub enum Error {
8383 #[ snafu( display( "server jvm security properties for spark connect {name}" , ) ) ]
8484 ServerJvmSecurityProperties { source : common:: Error , name : String } ,
8585
86+ #[ snafu( display( "server metrics properties for spark connect {name}" , ) ) ]
87+ MetricsProperties { source : common:: Error , name : String } ,
88+
8689 #[ snafu( display( "failed to serialize [{SPARK_DEFAULTS_FILE_NAME}] for the connect server" , ) ) ]
8790 SparkDefaultsProperties {
8891 source : product_config:: writer:: PropertiesWriterError ,
@@ -138,6 +141,16 @@ pub fn server_config_map(
138141 name : scs. name_unchecked ( ) ,
139142 } ) ?;
140143
144+ let metrics_props = common:: metrics_properties (
145+ scs. spec
146+ . server
147+ . as_ref ( )
148+ . and_then ( |s| s. config_overrides . get ( METRICS_PROPERTIES_FILE ) ) ,
149+ )
150+ . context ( MetricsPropertiesSnafu {
151+ name : scs. name_unchecked ( ) ,
152+ } ) ?;
153+
141154 let mut cm_builder = ConfigMapBuilder :: new ( ) ;
142155
143156 cm_builder
@@ -157,7 +170,8 @@ pub fn server_config_map(
157170 )
158171 . add_data ( SPARK_DEFAULTS_FILE_NAME , spark_properties)
159172 . add_data ( POD_TEMPLATE_FILE , executor_pod_template_spec)
160- . add_data ( JVM_SECURITY_PROPERTIES_FILE , jvm_sec_props) ;
173+ . add_data ( JVM_SECURITY_PROPERTIES_FILE , jvm_sec_props)
174+ . add_data ( METRICS_PROPERTIES_FILE , metrics_props) ;
161175
162176 let role_group_ref = RoleGroupRef {
163177 cluster : ObjectRef :: from_obj ( scs) ,
@@ -197,6 +211,7 @@ pub fn build_deployment(
197211 & SparkConnectRole :: Server . to_string ( ) ,
198212 ) )
199213 . context ( MetadataBuildSnafu ) ?
214+ . with_label ( Label :: try_from ( ( "prometheus.io/scrape" , "true" ) ) . context ( LabelBuildSnafu ) ?)
200215 . build ( ) ;
201216
202217 let mut pb = PodBuilder :: new ( ) ;
@@ -248,7 +263,6 @@ pub fn build_deployment(
248263 . args ( args)
249264 . add_container_port ( "grpc" , CONNECT_GRPC_PORT )
250265 . add_container_port ( "http" , CONNECT_UI_PORT )
251- . add_container_port ( "metrics" , METRICS_PORT . into ( ) )
252266 . add_env_vars ( container_env)
253267 . add_volume_mount ( VOLUME_MOUNT_NAME_CONFIG , VOLUME_MOUNT_PATH_CONFIG )
254268 . context ( AddVolumeMountSnafu ) ?
@@ -397,11 +411,6 @@ pub fn build_service(
397411 port: CONNECT_UI_PORT ,
398412 ..ServicePort :: default ( )
399413 } ,
400- ServicePort {
401- name: Some ( String :: from( "metrics" ) ) ,
402- port: METRICS_PORT . into( ) ,
403- ..ServicePort :: default ( )
404- } ,
405414 ] ) ,
406415 selector : Some ( selector) ,
407416 publish_not_ready_addresses,
@@ -512,6 +521,18 @@ pub fn server_properties(
512521 "spark.driver.extraClassPath" . to_string ( ) ,
513522 Some ( format ! ( "/stackable/spark/extra-jars/*:/stackable/spark/connect/spark-connect_2.12-{spark_version}.jar" ) ) ,
514523 ) ,
524+ (
525+ "spark.metrics.conf" . to_string ( ) ,
526+ Some ( format ! ( "{VOLUME_MOUNT_PATH_CONFIG}/{METRICS_PROPERTIES_FILE}" ) ) ,
527+ ) ,
528+ // This enables the "/metrics/executors/prometheus" endpoint on the server pod.
529+ // The driver collects metrics from the executors and makes them available here.
530+ // The "/metrics/prometheus" endpoint delievers the driver metrics.
531+ (
532+ "spark.ui.prometheus.enabled" . to_string ( ) ,
533+ Some ( "true" . to_string ( ) ) ,
534+ ) ,
535+
515536 ]
516537 . into ( ) ;
517538
@@ -556,7 +577,7 @@ fn probe() -> Probe {
556577 http_get : Some ( HTTPGetAction {
557578 port : IntOrString :: Int ( CONNECT_UI_PORT ) ,
558579 scheme : Some ( "HTTP" . to_string ( ) ) ,
559- path : Some ( "/metrics" . to_string ( ) ) ,
580+ path : Some ( "/metrics/prometheus " . to_string ( ) ) ,
560581 ..Default :: default ( )
561582 } ) ,
562583 failure_threshold : Some ( 10 ) ,
0 commit comments