-
-
Notifications
You must be signed in to change notification settings - Fork 4
chore: ensure metrics are correctly exposed #701
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -69,16 +69,20 @@ pub const HBASE_UI_PORT_NAME_HTTP: &str = "ui-http"; | |
pub const HBASE_UI_PORT_NAME_HTTPS: &str = "ui-https"; | ||
pub const HBASE_REST_PORT_NAME_HTTP: &str = "rest-http"; | ||
pub const HBASE_REST_PORT_NAME_HTTPS: &str = "rest-https"; | ||
pub const HBASE_METRICS_PORT_NAME: &str = "metrics"; | ||
|
||
pub const HBASE_MASTER_PORT: u16 = 16000; | ||
// HBase always uses 16010, regardless of http or https. On 2024-01-17 we decided in Arch-meeting that we want to stick | ||
// the port numbers to what the product is doing, so we get the least surprise for users - even when this means we have | ||
// inconsistency between Stackable products. | ||
pub const HBASE_MASTER_UI_PORT: u16 = 16010; | ||
pub const HBASE_MASTER_METRICS_PORT: u16 = 16010; | ||
pub const HBASE_REGIONSERVER_PORT: u16 = 16020; | ||
pub const HBASE_REGIONSERVER_UI_PORT: u16 = 16030; | ||
pub const HBASE_REGIONSERVER_METRICS_PORT: u16 = 16030; | ||
pub const HBASE_REST_PORT: u16 = 8080; | ||
pub const HBASE_REST_UI_PORT: u16 = 8085; | ||
pub const HBASE_REST_METRICS_PORT: u16 = 8085; | ||
pub const LISTENER_VOLUME_NAME: &str = "listener"; | ||
pub const LISTENER_VOLUME_DIR: &str = "/stackable/listener"; | ||
|
||
|
@@ -542,6 +546,24 @@ impl v1alpha1::HbaseCluster { | |
} | ||
} | ||
|
||
/// Returns required metrics port name and metrics port number tuples depending on the role. | ||
/// The metrics are available over the UI port. | ||
pub fn metrics_ports(&self, role: &HbaseRole) -> Vec<(String, u16)> { | ||
match role { | ||
HbaseRole::Master => vec![( | ||
HBASE_METRICS_PORT_NAME.to_string(), | ||
HBASE_MASTER_METRICS_PORT, | ||
)], | ||
HbaseRole::RegionServer => vec![( | ||
HBASE_METRICS_PORT_NAME.to_string(), | ||
HBASE_REGIONSERVER_METRICS_PORT, | ||
)], | ||
HbaseRole::RestServer => { | ||
vec![(HBASE_METRICS_PORT_NAME.to_string(), HBASE_REST_METRICS_PORT)] | ||
} | ||
} | ||
} | ||
Comment on lines
+551
to
+565
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. note: The mapping of role to metrics port exists in two places: Here and below at line 575 which inevitably contains the risk to drift apart. I think it makes sense to contain this mapping in a single place instead. One possible solution is to drop the associated |
||
|
||
pub fn service_port(&self, role: &HbaseRole) -> u16 { | ||
match role { | ||
HbaseRole::Master => HBASE_MASTER_PORT, | ||
|
@@ -550,6 +572,14 @@ impl v1alpha1::HbaseCluster { | |
} | ||
} | ||
|
||
pub fn metrics_port(&self, role: &HbaseRole) -> u16 { | ||
match role { | ||
HbaseRole::Master => HBASE_MASTER_METRICS_PORT, | ||
HbaseRole::RegionServer => HBASE_REGIONSERVER_METRICS_PORT, | ||
HbaseRole::RestServer => HBASE_REST_METRICS_PORT, | ||
} | ||
} | ||
|
||
/// Name of the port used by the Web UI, which depends on HTTPS usage | ||
pub fn ui_port_name(&self) -> String { | ||
if self.has_https_enabled() { | ||
|
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
|
@@ -45,7 +45,7 @@ use stackable_operator::{ | |||||||||
core::{DeserializeGuard, error_boundary}, | ||||||||||
runtime::controller::Action, | ||||||||||
}, | ||||||||||
kvp::{Label, LabelError, Labels, ObjectLabels}, | ||||||||||
kvp::{Annotations, Label, LabelError, Labels, ObjectLabels}, | ||||||||||
logging::controller::ReconcilerError, | ||||||||||
memory::{BinaryMultiple, MemoryQuantity}, | ||||||||||
product_config_utils::{transform_all_roles_to_config, validate_all_roles_and_groups_config}, | ||||||||||
|
@@ -427,6 +427,14 @@ pub async fn reconcile_hbase( | |||||||||
|
||||||||||
let rg_service = | ||||||||||
build_rolegroup_service(hbase, &hbase_role, &rolegroup, &resolved_product_image)?; | ||||||||||
|
||||||||||
let rg_metrics_service = build_rolegroup_metrics_service( | ||||||||||
hbase, | ||||||||||
&hbase_role, | ||||||||||
&rolegroup, | ||||||||||
&resolved_product_image, | ||||||||||
)?; | ||||||||||
|
||||||||||
let rg_configmap = build_rolegroup_config_map( | ||||||||||
hbase, | ||||||||||
&client.kubernetes_cluster_info, | ||||||||||
|
@@ -452,6 +460,12 @@ pub async fn reconcile_hbase( | |||||||||
.with_context(|_| ApplyRoleGroupServiceSnafu { | ||||||||||
rolegroup: rolegroup.clone(), | ||||||||||
})?; | ||||||||||
cluster_resources | ||||||||||
.add(client, rg_metrics_service) | ||||||||||
.await | ||||||||||
.with_context(|_| ApplyRoleGroupServiceSnafu { | ||||||||||
rolegroup: rolegroup.clone(), | ||||||||||
})?; | ||||||||||
cluster_resources | ||||||||||
.add(client, rg_configmap) | ||||||||||
.await | ||||||||||
|
@@ -739,12 +753,9 @@ fn build_rolegroup_service( | |||||||||
}) | ||||||||||
.collect(); | ||||||||||
|
||||||||||
let prometheus_label = | ||||||||||
Label::try_from(("prometheus.io/scrape", "true")).context(BuildLabelSnafu)?; | ||||||||||
|
||||||||||
let metadata = ObjectMetaBuilder::new() | ||||||||||
.name_and_namespace(hbase) | ||||||||||
.name(headless_service_name(&rolegroup.object_name())) | ||||||||||
.name(rolegroup.rolegroup_headless_service_name()) | ||||||||||
.ownerreference_from_resource(hbase, None, Some(true)) | ||||||||||
.context(ObjectMissingMetadataForOwnerRefSnafu)? | ||||||||||
.with_recommended_labels(build_recommended_labels( | ||||||||||
|
@@ -754,7 +765,6 @@ fn build_rolegroup_service( | |||||||||
&rolegroup.role_group, | ||||||||||
)) | ||||||||||
.context(ObjectMetaSnafu)? | ||||||||||
.with_label(prometheus_label) | ||||||||||
.build(); | ||||||||||
|
||||||||||
let service_selector = | ||||||||||
|
@@ -778,6 +788,82 @@ fn build_rolegroup_service( | |||||||||
}) | ||||||||||
} | ||||||||||
|
||||||||||
/// The rolegroup metrics [`Service`] is a service that exposes metrics and a prometheus scraping label. | ||||||||||
pub fn build_rolegroup_metrics_service( | ||||||||||
hbase: &v1alpha1::HbaseCluster, | ||||||||||
hbase_role: &HbaseRole, | ||||||||||
rolegroup: &RoleGroupRef<v1alpha1::HbaseCluster>, | ||||||||||
resolved_product_image: &ResolvedProductImage, | ||||||||||
) -> Result<Service, Error> { | ||||||||||
let ports = hbase | ||||||||||
.metrics_ports(hbase_role) | ||||||||||
.into_iter() | ||||||||||
.map(|(name, value)| ServicePort { | ||||||||||
name: Some(name), | ||||||||||
port: i32::from(value), | ||||||||||
protocol: Some("TCP".to_string()), | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggestion: Use
Suggested change
|
||||||||||
..ServicePort::default() | ||||||||||
}) | ||||||||||
.collect(); | ||||||||||
|
||||||||||
let service_selector = | ||||||||||
Labels::role_group_selector(hbase, APP_NAME, &rolegroup.role, &rolegroup.role_group) | ||||||||||
.context(BuildLabelSnafu)?; | ||||||||||
|
||||||||||
Ok(Service { | ||||||||||
metadata: ObjectMetaBuilder::new() | ||||||||||
.name_and_namespace(hbase) | ||||||||||
.name(rolegroup.rolegroup_metrics_service_name()) | ||||||||||
.ownerreference_from_resource(hbase, None, Some(true)) | ||||||||||
.context(ObjectMissingMetadataForOwnerRefSnafu)? | ||||||||||
.with_recommended_labels(build_recommended_labels( | ||||||||||
hbase, | ||||||||||
&resolved_product_image.app_version_label_value, | ||||||||||
&rolegroup.role, | ||||||||||
&rolegroup.role_group, | ||||||||||
)) | ||||||||||
.context(ObjectMetaSnafu)? | ||||||||||
.with_label(Label::try_from(("prometheus.io/scrape", "true")).context(LabelBuildSnafu)?) | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. question: Does it make sense to pull this key out into a constant? Or make it an associated function on |
||||||||||
.with_annotations(prometheus_annotations(hbase, hbase_role)) | ||||||||||
.build(), | ||||||||||
spec: Some(ServiceSpec { | ||||||||||
// Internal communication does not need to be exposed | ||||||||||
type_: Some("ClusterIP".to_string()), | ||||||||||
cluster_ip: Some("None".to_string()), | ||||||||||
Comment on lines
+831
to
+832
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggestion: Use
Suggested change
|
||||||||||
ports: Some(ports), | ||||||||||
selector: Some(service_selector.into()), | ||||||||||
publish_not_ready_addresses: Some(true), | ||||||||||
..ServiceSpec::default() | ||||||||||
}), | ||||||||||
status: None, | ||||||||||
}) | ||||||||||
} | ||||||||||
|
||||||||||
/// Common annotations for Prometheus | ||||||||||
/// | ||||||||||
/// These annotations can be used in a ServiceMonitor. | ||||||||||
/// | ||||||||||
/// see also <https://github.com/prometheus-community/helm-charts/blob/prometheus-27.32.0/charts/prometheus/values.yaml#L983-L1036> | ||||||||||
fn prometheus_annotations(hbase: &v1alpha1::HbaseCluster, hbase_role: &HbaseRole) -> Annotations { | ||||||||||
Annotations::try_from([ | ||||||||||
("prometheus.io/path".to_owned(), "/prometheus".to_owned()), | ||||||||||
( | ||||||||||
"prometheus.io/port".to_owned(), | ||||||||||
hbase.metrics_port(hbase_role).to_string(), | ||||||||||
), | ||||||||||
( | ||||||||||
"prometheus.io/scheme".to_owned(), | ||||||||||
if hbase.has_https_enabled() { | ||||||||||
"https".to_owned() | ||||||||||
} else { | ||||||||||
"http".to_owned() | ||||||||||
}, | ||||||||||
), | ||||||||||
("prometheus.io/scrape".to_owned(), "true".to_owned()), | ||||||||||
]) | ||||||||||
.expect("should be valid annotations") | ||||||||||
} | ||||||||||
Comment on lines
+847
to
+865
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. question: Similar question to above - does it make sense to pull these out into contants or associated function on |
||||||||||
|
||||||||||
/// The rolegroup [`StatefulSet`] runs the rolegroup, as configured by the administrator. | ||||||||||
/// | ||||||||||
/// The [`Pod`](`stackable_operator::k8s_openapi::api::core::v1::Pod`)s are accessible through the corresponding [`Service`] (from [`build_rolegroup_service`]). | ||||||||||
|
@@ -1088,7 +1174,7 @@ fn build_rolegroup_statefulset( | |||||||||
match_labels: Some(statefulset_match_labels.into()), | ||||||||||
..LabelSelector::default() | ||||||||||
}, | ||||||||||
service_name: Some(headless_service_name(&rolegroup_ref.object_name())), | ||||||||||
service_name: Some(rolegroup_ref.rolegroup_headless_service_name()), | ||||||||||
template: pod_template, | ||||||||||
volume_claim_templates: listener_pvc, | ||||||||||
..StatefulSetSpec::default() | ||||||||||
|
@@ -1198,10 +1284,6 @@ fn build_hbase_env_sh( | |||||||||
Ok(result) | ||||||||||
} | ||||||||||
|
||||||||||
fn headless_service_name(role_group_name: &str) -> String { | ||||||||||
format!("{name}-headless", name = role_group_name) | ||||||||||
} | ||||||||||
|
||||||||||
#[cfg(test)] | ||||||||||
mod test { | ||||||||||
use rstest::rstest; | ||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
suggestion: Use
to_owned
instead.