Skip to content

Commit 6cfd7aa

Browse files
maltesanderrazvan
andauthored
chore: ensure metrics are correctly exposed (#897)
* add headless and metrics services * add headleass & metrics service * fix smoke-kraft tests * fix smoke tests * adapted changelog * fix docs and unit tests * add missing pr reference * pre commit * Update rust/operator-binary/src/resource/service.rs Co-authored-by: Razvan-Daniel Mihai <[email protected]> --------- Co-authored-by: Razvan-Daniel Mihai <[email protected]>
1 parent 9166590 commit 6cfd7aa

File tree

11 files changed

+298
-59
lines changed

11 files changed

+298
-59
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,18 @@ All notable changes to this project will be documented in this file.
99
- Helm: Allow Pod `priorityClassName` to be configured ([#890]).
1010
- Add experimental support for Kafka KRaft mode ([#889]).
1111
- Add experimental support for Kafka `4.1.0` ([#889]).
12+
- Add `prometheus.io/path|port|scheme` annotations to metrics service ([#897]).
1213

1314
### Changed
1415

1516
- Deprecate support for Kafka `3.7.2` ([#892]).
17+
- BREAKING: The `<cluster>-<role>-<rolegroup>` rolegroup service was replaced with a `<cluster>-<role>-<rolegroup>-headless`
18+
and `<cluster>-<role>-<rolegroup>-metrics` rolegroup service ([#897]).
1619

1720
[#889]: https://github.com/stackabletech/kafka-operator/pull/889
1821
[#890]: https://github.com/stackabletech/kafka-operator/pull/890
1922
[#892]: https://github.com/stackabletech/kafka-operator/pull/892
23+
[#897]: https://github.com/stackabletech/kafka-operator/pull/897
2024

2125
## [25.7.0] - 2025-07-23
2226

rust/operator-binary/src/config/command.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ pub fn controller_kafka_container_command(
229229
fn to_listeners(port: u16) -> String {
230230
// The environment variables are set in the statefulset of the controller
231231
format!(
232-
"{listener_name}://$POD_NAME.$ROLEGROUP_REF.$NAMESPACE.svc.$CLUSTER_DOMAIN:{port}",
232+
"{listener_name}://$POD_NAME.$ROLEGROUP_HEADLESS_SERVICE_NAME.$NAMESPACE.svc.$CLUSTER_DOMAIN:{port}",
233233
listener_name = KafkaListenerName::Controller
234234
)
235235
}

rust/operator-binary/src/crd/listener.rs

Lines changed: 50 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@ use std::{
44
};
55

66
use snafu::{OptionExt, Snafu};
7-
use stackable_operator::{kube::ResourceExt, utils::cluster_info::KubernetesClusterInfo};
7+
use stackable_operator::{
8+
kube::ResourceExt, role_utils::RoleGroupRef, utils::cluster_info::KubernetesClusterInfo,
9+
};
810
use strum::{EnumDiscriminants, EnumString};
911

1012
use crate::crd::{STACKABLE_LISTENER_BROKER_DIR, security::KafkaTlsSecurity, v1alpha1};
@@ -170,10 +172,14 @@ impl Display for KafkaListener {
170172
pub fn get_kafka_listener_config(
171173
kafka: &v1alpha1::KafkaCluster,
172174
kafka_security: &KafkaTlsSecurity,
173-
object_name: &str,
175+
rolegroup_ref: &RoleGroupRef<v1alpha1::KafkaCluster>,
174176
cluster_info: &KubernetesClusterInfo,
175177
) -> Result<KafkaListenerConfig, KafkaListenerError> {
176-
let pod_fqdn = pod_fqdn(kafka, object_name, cluster_info)?;
178+
let pod_fqdn = pod_fqdn(
179+
kafka,
180+
&rolegroup_ref.rolegroup_headless_service_name(),
181+
cluster_info,
182+
)?;
177183
let mut listeners = vec![];
178184
let mut advertised_listeners = vec![];
179185
let mut listener_security_protocol_map: BTreeMap<KafkaListenerName, KafkaListenerProtocol> =
@@ -334,12 +340,11 @@ pub fn node_port_cmd(directory: &str, port_name: &str) -> String {
334340

335341
pub fn pod_fqdn(
336342
kafka: &v1alpha1::KafkaCluster,
337-
object_name: &str,
343+
sts_service_name: &str,
338344
cluster_info: &KubernetesClusterInfo,
339345
) -> Result<String, KafkaListenerError> {
340346
Ok(format!(
341-
"$POD_NAME.{object_name}.{namespace}.svc.{cluster_domain}",
342-
object_name = object_name,
347+
"$POD_NAME.{sts_service_name}.{namespace}.svc.{cluster_domain}",
343348
namespace = kafka.namespace().context(ObjectHasNoNamespaceSnafu)?,
344349
cluster_domain = cluster_info.cluster_domain
345350
))
@@ -354,7 +359,7 @@ mod tests {
354359
};
355360

356361
use super::*;
357-
use crate::crd::authentication::ResolvedAuthenticationClasses;
362+
use crate::crd::{authentication::ResolvedAuthenticationClasses, role::KafkaRole};
358363

359364
fn default_cluster_info() -> KubernetesClusterInfo {
360365
KubernetesClusterInfo {
@@ -364,9 +369,6 @@ mod tests {
364369

365370
#[test]
366371
fn test_get_kafka_listeners_config() {
367-
let object_name = "simple-kafka-broker-default";
368-
let cluster_info = default_cluster_info();
369-
370372
let kafka_cluster = r#"
371373
apiVersion: kafka.stackable.tech/v1alpha1
372374
kind: KafkaCluster
@@ -400,9 +402,12 @@ mod tests {
400402
"internalTls".to_string(),
401403
Some("tls".to_string()),
402404
);
403-
405+
let cluster_info = default_cluster_info();
406+
// "simple-kafka-broker-default"
407+
let rolegroup_ref = kafka.rolegroup_ref(&KafkaRole::Broker, "default");
404408
let config =
405-
get_kafka_listener_config(&kafka, &kafka_security, object_name, &cluster_info).unwrap();
409+
get_kafka_listener_config(&kafka, &kafka_security, &rolegroup_ref, &cluster_info)
410+
.unwrap();
406411

407412
assert_eq!(
408413
config.listeners(),
@@ -428,7 +433,12 @@ mod tests {
428433
kafka_security.client_port_name()
429434
),
430435
internal_name = KafkaListenerName::Internal,
431-
internal_host = pod_fqdn(&kafka, object_name, &cluster_info).unwrap(),
436+
internal_host = pod_fqdn(
437+
&kafka,
438+
&rolegroup_ref.rolegroup_headless_service_name(),
439+
&cluster_info
440+
)
441+
.unwrap(),
432442
internal_port = kafka_security.internal_port(),
433443
)
434444
);
@@ -454,7 +464,8 @@ mod tests {
454464
Some("tls".to_string()),
455465
);
456466
let config =
457-
get_kafka_listener_config(&kafka, &kafka_security, object_name, &cluster_info).unwrap();
467+
get_kafka_listener_config(&kafka, &kafka_security, &rolegroup_ref, &cluster_info)
468+
.unwrap();
458469

459470
assert_eq!(
460471
config.listeners(),
@@ -480,7 +491,12 @@ mod tests {
480491
kafka_security.client_port_name()
481492
),
482493
internal_name = KafkaListenerName::Internal,
483-
internal_host = pod_fqdn(&kafka, object_name, &cluster_info).unwrap(),
494+
internal_host = pod_fqdn(
495+
&kafka,
496+
&rolegroup_ref.rolegroup_headless_service_name(),
497+
&cluster_info
498+
)
499+
.unwrap(),
484500
internal_port = kafka_security.internal_port(),
485501
)
486502
);
@@ -505,7 +521,8 @@ mod tests {
505521
);
506522

507523
let config =
508-
get_kafka_listener_config(&kafka, &kafka_security, object_name, &cluster_info).unwrap();
524+
get_kafka_listener_config(&kafka, &kafka_security, &rolegroup_ref, &cluster_info)
525+
.unwrap();
509526

510527
assert_eq!(
511528
config.listeners(),
@@ -531,7 +548,12 @@ mod tests {
531548
kafka_security.client_port_name()
532549
),
533550
internal_name = KafkaListenerName::Internal,
534-
internal_host = pod_fqdn(&kafka, object_name, &cluster_info).unwrap(),
551+
internal_host = pod_fqdn(
552+
&kafka,
553+
&rolegroup_ref.rolegroup_headless_service_name(),
554+
&cluster_info
555+
)
556+
.unwrap(),
535557
internal_port = kafka_security.internal_port(),
536558
)
537559
);
@@ -552,9 +574,6 @@ mod tests {
552574

553575
#[test]
554576
fn test_get_kafka_kerberos_listeners_config() {
555-
let object_name = "simple-kafka-broker-default";
556-
let cluster_info = default_cluster_info();
557-
558577
let kafka_cluster = r#"
559578
apiVersion: kafka.stackable.tech/v1alpha1
560579
kind: KafkaCluster
@@ -587,9 +606,12 @@ mod tests {
587606
"tls".to_string(),
588607
Some("tls".to_string()),
589608
);
590-
609+
let cluster_info = default_cluster_info();
610+
// "simple-kafka-broker-default"
611+
let rolegroup_ref = kafka.rolegroup_ref(&KafkaRole::Broker, "default");
591612
let config =
592-
get_kafka_listener_config(&kafka, &kafka_security, object_name, &cluster_info).unwrap();
613+
get_kafka_listener_config(&kafka, &kafka_security, &rolegroup_ref, &cluster_info)
614+
.unwrap();
593615

594616
assert_eq!(
595617
config.listeners(),
@@ -618,7 +640,12 @@ mod tests {
618640
kafka_security.client_port_name()
619641
),
620642
internal_name = KafkaListenerName::Internal,
621-
internal_host = pod_fqdn(&kafka, object_name, &cluster_info).unwrap(),
643+
internal_host = pod_fqdn(
644+
&kafka,
645+
&rolegroup_ref.rolegroup_headless_service_name(),
646+
&cluster_info
647+
)
648+
.unwrap(),
622649
internal_port = kafka_security.internal_port(),
623650
bootstrap_name = KafkaListenerName::Bootstrap,
624651
bootstrap_host = node_address_cmd(STACKABLE_LISTENER_BROKER_DIR),

rust/operator-binary/src/crd/mod.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,9 @@ impl v1alpha1::KafkaCluster {
294294
for replica in 0..replicas {
295295
pod_descriptors.push(KafkaPodDescriptor {
296296
namespace: namespace.clone(),
297-
role_group_service_name: rolegroup_ref.object_name(),
297+
role_group_service_name: rolegroup_ref
298+
.rolegroup_headless_service_name(),
299+
role_group_statefulset_name: rolegroup_ref.object_name(),
298300
replica,
299301
cluster_domain: cluster_info.cluster_domain.clone(),
300302
node_id: node_id_hash_offset + u32::from(replica),
@@ -341,6 +343,7 @@ impl v1alpha1::KafkaCluster {
341343
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
342344
pub struct KafkaPodDescriptor {
343345
namespace: String,
346+
role_group_statefulset_name: String,
344347
role_group_service_name: String,
345348
replica: u16,
346349
cluster_domain: DomainName,
@@ -361,7 +364,7 @@ impl KafkaPodDescriptor {
361364
}
362365

363366
pub fn pod_name(&self) -> String {
364-
format!("{}-{}", self.role_group_service_name, self.replica)
367+
format!("{}-{}", self.role_group_statefulset_name, self.replica)
365368
}
366369

367370
/// Build the Kraft voter String

rust/operator-binary/src/kafka_controller.rs

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ use crate::{
4949
resource::{
5050
configmap::build_rolegroup_config_map,
5151
listener::build_broker_rolegroup_bootstrap_listener,
52-
service::build_rolegroup_service,
52+
service::{build_rolegroup_headless_service, build_rolegroup_metrics_service},
5353
statefulset::{build_broker_rolegroup_statefulset, build_controller_rolegroup_statefulset},
5454
},
5555
};
@@ -347,8 +347,16 @@ pub async fn reconcile_kafka(
347347
.merged_config(kafka, &rolegroup_ref.role_group)
348348
.context(FailedToResolveConfigSnafu)?;
349349

350-
let rg_service =
351-
build_rolegroup_service(kafka, &resolved_product_image, &rolegroup_ref)
350+
let rg_headless_service = build_rolegroup_headless_service(
351+
kafka,
352+
&resolved_product_image,
353+
&rolegroup_ref,
354+
&kafka_security,
355+
)
356+
.context(BuildServiceSnafu)?;
357+
358+
let rg_metrics_service =
359+
build_rolegroup_metrics_service(kafka, &resolved_product_image, &rolegroup_ref)
352360
.context(BuildServiceSnafu)?;
353361

354362
let rg_configmap = build_rolegroup_config_map(
@@ -407,7 +415,13 @@ pub async fn reconcile_kafka(
407415
}
408416

409417
cluster_resources
410-
.add(client, rg_service)
418+
.add(client, rg_headless_service)
419+
.await
420+
.with_context(|_| ApplyRoleGroupServiceSnafu {
421+
rolegroup: rolegroup_ref.clone(),
422+
})?;
423+
cluster_resources
424+
.add(client, rg_metrics_service)
411425
.await
412426
.with_context(|_| ApplyRoleGroupServiceSnafu {
413427
rolegroup: rolegroup_ref.clone(),

0 commit comments

Comments
 (0)