Skip to content

Commit 238dbd8

Browse files
committed
feat: Move ZK ports to headless service
1 parent 69bda89 commit 238dbd8

File tree

9 files changed

+236
-62
lines changed

9 files changed

+236
-62
lines changed

rust/operator-binary/src/crd/mod.rs

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,11 @@ use stackable_operator::{
3434
};
3535
use strum::{Display, EnumIter, EnumString, IntoEnumIterator};
3636

37-
use crate::crd::{affinity::get_affinity, v1alpha1::ZookeeperServerRoleConfig};
37+
use crate::{
38+
crd::{affinity::get_affinity, v1alpha1::ZookeeperServerRoleConfig},
39+
discovery::build_role_group_headless_service_name,
40+
listener::role_listener_name,
41+
};
3842

3943
pub mod affinity;
4044
pub mod authentication;
@@ -47,6 +51,9 @@ pub const OPERATOR_NAME: &str = "zookeeper.stackable.tech";
4751
pub const ZOOKEEPER_PROPERTIES_FILE: &str = "zoo.cfg";
4852
pub const JVM_SECURITY_PROPERTIES_FILE: &str = "security.properties";
4953

54+
pub const ZOOKEEPER_LEADER_PORT: u16 = 2888;
55+
pub const ZOOKEEPER_ELECTION_PORT: u16 = 3888;
56+
5057
pub const METRICS_PORT_NAME: &str = "metrics";
5158
pub const METRICS_PORT: u16 = 9505;
5259

@@ -335,7 +342,7 @@ pub enum ZookeeperRole {
335342
/// Used for service discovery.
336343
pub struct ZookeeperPodRef {
337344
pub namespace: String,
338-
pub role_group_service_name: String,
345+
pub role_group_headless_service_name: String,
339346
pub pod_name: String,
340347
pub zookeeper_myid: u16,
341348
}
@@ -488,12 +495,11 @@ impl HasStatusCondition for v1alpha1::ZookeeperCluster {
488495
}
489496

490497
impl ZookeeperPodRef {
491-
// TODO (@NickLarsenNZ): What to do here?
492-
pub fn fqdn(&self, cluster_info: &KubernetesClusterInfo) -> String {
498+
pub fn internal_fqdn(&self, cluster_info: &KubernetesClusterInfo) -> String {
493499
format!(
494500
"{pod_name}.{service_name}.{namespace}.svc.{cluster_domain}",
495501
pod_name = self.pod_name,
496-
service_name = self.role_group_service_name,
502+
service_name = self.role_group_headless_service_name,
497503
namespace = self.namespace,
498504
cluster_domain = cluster_info.cluster_domain
499505
)
@@ -524,13 +530,6 @@ impl v1alpha1::ZookeeperCluster {
524530
}
525531
}
526532

527-
/// The name of the role-level [Listener]
528-
///
529-
/// [Listener]: stackable_operator::crd::listener::v1alpha1::Listener
530-
pub fn server_role_listener_name(&self) -> Option<String> {
531-
self.metadata.name.clone()
532-
}
533-
534533
/// The fully-qualified domain name of the role-level [Listener]
535534
///
536535
/// [Listener]: stackable_operator::crd::listener::v1alpha1::Listener
@@ -540,7 +539,7 @@ impl v1alpha1::ZookeeperCluster {
540539
) -> Option<String> {
541540
Some(format!(
542541
"{role_listener_name}.{namespace}.svc.{cluster_domain}",
543-
role_listener_name = self.server_role_listener_name()?,
542+
role_listener_name = role_listener_name(self, &ZookeeperRole::Server),
544543
namespace = self.metadata.namespace.as_ref()?,
545544
cluster_domain = cluster_info.cluster_domain
546545
))
@@ -626,8 +625,10 @@ impl v1alpha1::ZookeeperCluster {
626625
for i in 0..rolegroup.replicas.unwrap_or(1) {
627626
pod_refs.push(ZookeeperPodRef {
628627
namespace: ns.clone(),
629-
role_group_service_name: rolegroup_ref.object_name(),
630-
pod_name: format!("{}-{}", rolegroup_ref.object_name(), i),
628+
role_group_headless_service_name: build_role_group_headless_service_name(
629+
rolegroup_ref.object_name(),
630+
),
631+
pod_name: format!("{role_group}-{i}", role_group = rolegroup_ref.object_name()),
631632
zookeeper_myid: i + myid_offset,
632633
});
633634
}

rust/operator-binary/src/crd/security.rs

Lines changed: 37 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,13 @@ use stackable_operator::{
2525
time::Duration,
2626
};
2727

28-
use crate::crd::{authentication, authentication::ResolvedAuthenticationClasses, tls, v1alpha1};
28+
use crate::{
29+
crd::{
30+
authentication::{self, ResolvedAuthenticationClasses},
31+
tls, v1alpha1,
32+
},
33+
zk_controller::LISTENER_VOLUME_NAME,
34+
};
2935

3036
type Result<T, E = Error> = std::result::Result<T, E>;
3137

@@ -158,7 +164,7 @@ impl ZookeeperSecurity {
158164
.add_volume_mount(tls_volume_name, Self::SERVER_TLS_DIR)
159165
.context(AddVolumeMountSnafu)?;
160166
pod_builder
161-
.add_volume(Self::create_tls_volume(
167+
.add_volume(Self::create_server_tls_volume(
162168
tls_volume_name,
163169
secret_class,
164170
requested_secret_lifetime,
@@ -172,7 +178,7 @@ impl ZookeeperSecurity {
172178
.add_volume_mount(tls_volume_name, Self::QUORUM_TLS_DIR)
173179
.context(AddVolumeMountSnafu)?;
174180
pod_builder
175-
.add_volume(Self::create_tls_volume(
181+
.add_volume(Self::create_quorum_tls_volume(
176182
tls_volume_name,
177183
&self.quorum_secret_class,
178184
requested_secret_lifetime,
@@ -298,8 +304,34 @@ impl ZookeeperSecurity {
298304
.or(self.server_secret_class.as_ref())
299305
}
300306

301-
/// Creates ephemeral volumes to mount the `SecretClass` into the Pods
302-
fn create_tls_volume(
307+
/// Creates ephemeral volumes to mount the `SecretClass` with the listener-volume scope into the Pods.
308+
///
309+
/// The resulting volume will contain TLS certificates with the FQDN reported in the applicable [ListenerStatus].
310+
///
311+
/// [ListenerStatus]: ::stackable_operator::crd::listener::v1alpha1::ListenerStatus
312+
fn create_server_tls_volume(
313+
volume_name: &str,
314+
secret_class_name: &str,
315+
requested_secret_lifetime: &Duration,
316+
) -> Result<Volume> {
317+
let volume = VolumeBuilder::new(volume_name)
318+
.ephemeral(
319+
SecretOperatorVolumeSourceBuilder::new(secret_class_name)
320+
.with_listener_volume_scope(LISTENER_VOLUME_NAME)
321+
.with_format(SecretFormat::TlsPkcs12)
322+
.with_auto_tls_cert_lifetime(*requested_secret_lifetime)
323+
.build()
324+
.context(BuildTlsVolumeSnafu { volume_name })?,
325+
)
326+
.build();
327+
328+
Ok(volume)
329+
}
330+
331+
/// Creates ephemeral volumes to mount the `SecretClass` with the pod scope into the Pods.
332+
///
333+
/// The resulting volume will contain TLS certificates with the FQDN of the Pod in relation to the StatefulSet's headless service.
334+
fn create_quorum_tls_volume(
303335
volume_name: &str,
304336
secret_class_name: &str,
305337
requested_secret_lifetime: &Duration,
@@ -308,7 +340,6 @@ impl ZookeeperSecurity {
308340
.ephemeral(
309341
SecretOperatorVolumeSourceBuilder::new(secret_class_name)
310342
.with_pod_scope()
311-
.with_node_scope()
312343
.with_format(SecretFormat::TlsPkcs12)
313344
.with_auto_tls_cert_lifetime(*requested_secret_lifetime)
314345
.build()

rust/operator-binary/src/discovery.rs

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ pub enum Error {
4646
},
4747

4848
#[snafu(display("{listener} has no ingress addresses"))]
49-
NoListnerAddresses {
49+
NoListenerIngressAddresses {
5050
listener: ObjectRef<listener::v1alpha1::Listener>,
5151
},
5252

@@ -157,8 +157,7 @@ fn listener_addresses(
157157
.status
158158
.as_ref()
159159
.and_then(|listener_status| listener_status.ingress_addresses.as_ref())
160-
// TODO (@NickLarsenNZ): Rename error variant
161-
.context(NoListnerAddressesSnafu { listener })?
160+
.context(NoListenerIngressAddressesSnafu { listener })?
162161
.iter()
163162
// Filter the addresses that have the port we are interested in (they likely all have it though)
164163
.filter_map(|listener_ingress| {
@@ -185,7 +184,13 @@ fn listener_addresses(
185184
}
186185

187186
// TODO (@NickLarsenNZ): Implement this directly on RoleGroupRef, ie:
188-
// RoleGroupRef<K: Resource>::metrics_service_name(&self)
189-
pub fn build_headless_role_group_metrics_service_name(name: String) -> String {
187+
// RoleGroupRef<K: Resource>::metrics_service_name(&self) to restrict what _name_ can be.
188+
pub fn build_role_group_headless_service_name(name: String) -> String {
189+
format!("{name}-headless")
190+
}
191+
192+
// TODO (@NickLarsenNZ): Implement this directly on RoleGroupRef, ie:
193+
// RoleGroupRef<K: Resource>::metrics_service_name(&self) to restrict what _name_ can be.
194+
pub fn build_role_group_metrics_service_name(name: String) -> String {
190195
format!("{name}-metrics")
191196
}

rust/operator-binary/src/listener.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ pub fn build_role_listener(
7373
}
7474

7575
// TODO (@NickLarsenNZ): This could be a method we can put on a Resource that takes a role_name
76-
fn role_listener_name(zk: &v1alpha1::ZookeeperCluster, zk_role: &ZookeeperRole) -> String {
76+
pub fn role_listener_name(zk: &v1alpha1::ZookeeperCluster, zk_role: &ZookeeperRole) -> String {
7777
// TODO (@NickLarsenNZ): Make a convention, do we use name_any() and allow empty string? or metadata.name.expect, or handle the error?
7878
format!("{zk}-{zk_role}", zk = zk.name_any())
7979
}

rust/operator-binary/src/zk_controller.rs

Lines changed: 87 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -79,12 +79,15 @@ use crate::{
7979
DOCKER_IMAGE_BASE_NAME, JVM_SECURITY_PROPERTIES_FILE, MAX_PREPARE_LOG_FILE_SIZE,
8080
MAX_ZK_LOG_FILES_SIZE, METRICS_PORT, METRICS_PORT_NAME, STACKABLE_CONFIG_DIR,
8181
STACKABLE_DATA_DIR, STACKABLE_LOG_CONFIG_DIR, STACKABLE_LOG_DIR, STACKABLE_RW_CONFIG_DIR,
82-
ZOOKEEPER_PROPERTIES_FILE, ZookeeperRole,
82+
ZOOKEEPER_ELECTION_PORT, ZOOKEEPER_LEADER_PORT, ZOOKEEPER_PROPERTIES_FILE, ZookeeperRole,
8383
security::{self, ZookeeperSecurity},
8484
v1alpha1::{self, ZookeeperServerRoleConfig},
8585
},
86-
discovery::{self, build_discovery_configmap, build_headless_role_group_metrics_service_name},
87-
listener::build_role_listener,
86+
discovery::{
87+
self, build_discovery_configmap, build_role_group_headless_service_name,
88+
build_role_group_metrics_service_name,
89+
},
90+
listener::{build_role_listener, role_listener_name},
8891
operations::{graceful_shutdown::add_graceful_shutdown_config, pdb::add_pdbs},
8992
product_logging::extend_role_group_config_map,
9093
utils::build_recommended_labels,
@@ -430,7 +433,10 @@ pub async fn reconcile_zk(
430433
.merged_config(&ZookeeperRole::Server, &rolegroup)
431434
.context(FailedToResolveConfigSnafu)?;
432435

433-
let rg_service = build_server_rolegroup_service(zk, &rolegroup, &resolved_product_image)?;
436+
let rg_headless_service =
437+
build_server_rolegroup_headless_service(zk, &rolegroup, &resolved_product_image)?;
438+
let rg_metrics_service =
439+
build_server_rolegroup_metrics_service(zk, &rolegroup, &resolved_product_image)?;
434440
let rg_configmap = build_server_rolegroup_config_map(
435441
zk,
436442
&rolegroup,
@@ -451,7 +457,13 @@ pub async fn reconcile_zk(
451457
)?;
452458

453459
cluster_resources
454-
.add(client, rg_service)
460+
.add(client, rg_headless_service)
461+
.await
462+
.with_context(|_| ApplyRoleGroupServiceSnafu {
463+
rolegroup: rolegroup.clone(),
464+
})?;
465+
cluster_resources
466+
.add(client, rg_metrics_service)
455467
.await
456468
.with_context(|_| ApplyRoleGroupServiceSnafu {
457469
rolegroup: rolegroup.clone(),
@@ -556,11 +568,11 @@ fn build_server_rolegroup_config_map(
556568
.flatten()
557569
.map(|pod| {
558570
(
559-
format!("server.{}", pod.zookeeper_myid),
571+
format!("server.{id}", id = pod.zookeeper_myid),
560572
format!(
561-
"{}:2888:3888;{}",
562-
pod.fqdn(cluster_info),
563-
zookeeper_security.client_port()
573+
"{internal_fqdn}:{ZOOKEEPER_LEADER_PORT}:{ZOOKEEPER_ELECTION_PORT};{client_port}",
574+
internal_fqdn = pod.internal_fqdn(cluster_info),
575+
client_port = zookeeper_security.client_port()
564576
),
565577
)
566578
})
@@ -642,10 +654,69 @@ fn build_server_rolegroup_config_map(
642654
})
643655
}
644656

645-
/// The rolegroup [`Service`] is a headless service that allows direct access to the instances of a certain rolegroup
657+
/// The rolegroup [`Service`] is a headless service that allows internal access to the instances of a certain rolegroup
646658
///
647659
/// This is mostly useful for internal communication between peers, or for clients that perform client-side load balancing.
648-
fn build_server_rolegroup_service(
660+
fn build_server_rolegroup_headless_service(
661+
zk: &v1alpha1::ZookeeperCluster,
662+
rolegroup: &RoleGroupRef<v1alpha1::ZookeeperCluster>,
663+
resolved_product_image: &ResolvedProductImage,
664+
) -> Result<Service> {
665+
let metadata = ObjectMetaBuilder::new()
666+
.name_and_namespace(zk)
667+
.name(build_role_group_headless_service_name(
668+
rolegroup.object_name(),
669+
))
670+
.ownerreference_from_resource(zk, None, Some(true))
671+
.context(ObjectMissingMetadataForOwnerRefSnafu)?
672+
.with_recommended_labels(build_recommended_labels(
673+
zk,
674+
ZK_CONTROLLER_NAME,
675+
&resolved_product_image.app_version_label,
676+
&rolegroup.role,
677+
&rolegroup.role_group,
678+
))
679+
.context(ObjectMetaSnafu)?
680+
.build();
681+
682+
let service_selector_labels =
683+
Labels::role_group_selector(zk, APP_NAME, &rolegroup.role, &rolegroup.role_group)
684+
.context(BuildLabelSnafu)?;
685+
686+
let service_spec = ServiceSpec {
687+
// Internal communication does not need to be exposed
688+
type_: Some("ClusterIP".to_string()),
689+
cluster_ip: Some("None".to_string()),
690+
ports: Some(vec![
691+
ServicePort {
692+
// TODO (@NickLarsenNZ): Use a const
693+
name: Some("zk-leader".to_string()),
694+
port: ZOOKEEPER_LEADER_PORT as i32,
695+
protocol: Some("TCP".to_string()),
696+
..ServicePort::default()
697+
},
698+
ServicePort {
699+
// TODO (@NickLarsenNZ): Use a const
700+
name: Some("zk-election".to_string()),
701+
port: ZOOKEEPER_ELECTION_PORT as i32,
702+
protocol: Some("TCP".to_string()),
703+
..ServicePort::default()
704+
},
705+
]),
706+
selector: Some(service_selector_labels.into()),
707+
publish_not_ready_addresses: Some(true),
708+
..ServiceSpec::default()
709+
};
710+
711+
Ok(Service {
712+
metadata,
713+
spec: Some(service_spec),
714+
status: None,
715+
})
716+
}
717+
718+
/// The rolegroup [`Service`] for exposing metrics
719+
fn build_server_rolegroup_metrics_service(
649720
zk: &v1alpha1::ZookeeperCluster,
650721
rolegroup: &RoleGroupRef<v1alpha1::ZookeeperCluster>,
651722
resolved_product_image: &ResolvedProductImage,
@@ -655,7 +726,7 @@ fn build_server_rolegroup_service(
655726

656727
let metadata = ObjectMetaBuilder::new()
657728
.name_and_namespace(zk)
658-
.name(build_headless_role_group_metrics_service_name(
729+
.name(build_role_group_metrics_service_name(
659730
rolegroup.object_name(),
660731
))
661732
.ownerreference_from_resource(zk, None, Some(true))
@@ -767,8 +838,7 @@ fn build_server_rolegroup_statefulset(
767838
// .context(LabelBuildSnafu)?;
768839

769840
let listener_pvc = build_role_listener_pvc(
770-
&zk.server_role_listener_name()
771-
.expect("todo: get role from zk_role"),
841+
&role_listener_name(zk, &ZookeeperRole::Server),
772842
&unversioned_recommended_labels,
773843
)?;
774844

@@ -901,8 +971,8 @@ fn build_server_rolegroup_statefulset(
901971
..Probe::default()
902972
})
903973
.add_container_port("zk", zookeeper_security.client_port().into())
904-
.add_container_port("zk-leader", 2888)
905-
.add_container_port("zk-election", 3888)
974+
.add_container_port("zk-leader", ZOOKEEPER_LEADER_PORT as i32)
975+
.add_container_port("zk-election", ZOOKEEPER_ELECTION_PORT as i32)
906976
.add_container_port("metrics", 9505)
907977
.add_volume_mount("data", STACKABLE_DATA_DIR)
908978
.context(AddVolumeMountSnafu)?
@@ -1058,7 +1128,7 @@ fn build_server_rolegroup_statefulset(
10581128
match_labels: Some(statefulset_match_labels.into()),
10591129
..LabelSelector::default()
10601130
},
1061-
service_name: Some(build_headless_role_group_metrics_service_name(
1131+
service_name: Some(build_role_group_headless_service_name(
10621132
rolegroup_ref.object_name(),
10631133
)),
10641134
template: pod_template,

0 commit comments

Comments
 (0)