99
1010from mgr_module import HandleCommandResult
1111from .service_registry import register_cephadm_service
12+ from cephadm .services .service_registry import service_registry
1213
1314from orchestrator import DaemonDescription
1415from ceph .deployment .service_spec import AlertManagerSpec , GrafanaSpec , ServiceSpec , \
2425logger = logging .getLogger (__name__ )
2526
2627
28+ def get_field_from_spec (spec : ServiceSpec , attr : str , default : Any ) -> Any :
29+ try :
30+ value = getattr (spec , attr )
31+ return value if value else default
32+ except AttributeError :
33+ return default
34+
35+
2736@register_cephadm_service
2837class GrafanaService (CephadmService ):
2938 TYPE = 'grafana'
@@ -484,6 +493,14 @@ class PrometheusService(CephadmService):
484493 USER_CFG_KEY = 'prometheus/web_user'
485494 PASS_CFG_KEY = 'prometheus/web_password'
486495
496+ def prepare_create (
497+ self ,
498+ daemon_spec : CephadmDaemonDeploySpec ,
499+ ) -> CephadmDaemonDeploySpec :
500+ assert self .TYPE == daemon_spec .daemon_type
501+ daemon_spec .final_config , daemon_spec .deps = self .generate_config (daemon_spec )
502+ return daemon_spec
503+
487504 def config (self , spec : ServiceSpec ) -> None :
488505 # make sure module is enabled
489506 mgr_map = self .mgr .get ('mgr_map' )
@@ -501,70 +518,71 @@ def get_prometheus_certificates(self, daemon_spec: CephadmDaemonDeploySpec) -> T
501518 cert , key = self .mgr .cert_mgr .generate_cert ([host_fqdn , 'prometheus_servers' ], node_ip )
502519 return cert , key
503520
504- def prepare_create (
505- self ,
506- daemon_spec : CephadmDaemonDeploySpec ,
507- ) -> CephadmDaemonDeploySpec :
508- assert self .TYPE == daemon_spec .daemon_type
509- daemon_spec .final_config , daemon_spec .deps = self .generate_config (daemon_spec )
510- return daemon_spec
521+ def get_service_discovery_cfg (self , security_enabled : bool , mgmt_gw_enabled : bool ) -> Dict [str , List [str ]]:
522+ """
523+ Retrieves the service discovery URLs for the services that require monitoring
524+
525+ Returns:
526+ Dict[str, List[str]]: A dictionary where the keys represent service categories (e.g., "nfs", "node-exporterr") and
527+ the values are a list of service-discovery URLs used to get the corresponding service targets.
528+ """
529+ if mgmt_gw_enabled :
530+ service_discovery_url_prefixes = [f'{ self .mgr .get_mgmt_gw_internal_endpoint ()} ' ]
531+ else :
532+ port = self .mgr .service_discovery_port
533+ protocol = 'https' if security_enabled else 'http'
534+ service_discovery_url_prefixes = [f'{ protocol } ://{ wrap_ipv6 (ip )} :{ port } '
535+ for ip in self .mgr ._get_mgr_ips ()]
536+ return {
537+ service : [f'{ prefix } /sd/prometheus/sd-config?service={ service } ' for prefix in service_discovery_url_prefixes ]
538+ for service in service_registry .get_services_requiring_monitoring ()
539+ if service == 'ceph'
540+ or bool (self .mgr .cache .get_daemons_by_service (service ))
541+ or bool (self .mgr .cache .get_daemons_by_type (service ))
542+ }
543+
544+ def configure_alerts (self , r : Dict ) -> None :
545+ # include alerts, if present in the container
546+ if os .path .exists (self .mgr .prometheus_alerts_path ):
547+ with open (self .mgr .prometheus_alerts_path , 'r' , encoding = 'utf-8' ) as f :
548+ alerts = f .read ()
549+ r ['files' ]['/etc/prometheus/alerting/ceph_alerts.yml' ] = alerts
550+
551+ # Include custom alerts if present in key value store. This enables the
552+ # users to add custom alerts. Write the file in any case, so that if the
553+ # content of the key value store changed, that file is overwritten
554+ # (emptied in case they value has been removed from the key value
555+ # store). This prevents the necessity to adapt `cephadm` binary to
556+ # remove the file.
557+ #
558+ # Don't use the template engine for it as
559+ #
560+ # 1. the alerts are always static and
561+ # 2. they are a template themselves for the Go template engine, which
562+ # use curly braces and escaping that is cumbersome and unnecessary
563+ # for the user.
564+ #
565+ r ['files' ]['/etc/prometheus/alerting/custom_alerts.yml' ] = \
566+ self .mgr .get_store ('services/prometheus/alerting/custom_alerts.yml' , '' )
511567
512568 def generate_config (
513569 self ,
514570 daemon_spec : CephadmDaemonDeploySpec ,
515571 ) -> Tuple [Dict [str , Any ], List [str ]]:
516572
517573 assert self .TYPE == daemon_spec .daemon_type
518- spec = cast (PrometheusSpec , self .mgr .spec_store [daemon_spec .service_name ].spec )
519- try :
520- retention_time = spec .retention_time if spec .retention_time else '15d'
521- except AttributeError :
522- retention_time = '15d'
523574
524- try :
525- targets = spec .targets
526- except AttributeError :
527- logger .warning ('Prometheus targets not found in the spec. Using empty list.' )
528- targets = []
529-
530- try :
531- retention_size = spec .retention_size if spec .retention_size else '0'
532- except AttributeError :
533- # default to disabled
534- retention_size = '0'
575+ spec = cast (PrometheusSpec , self .mgr .spec_store [daemon_spec .service_name ].spec )
576+ retention_time = get_field_from_spec (spec , 'retention_time' , '15d' )
577+ retention_size = get_field_from_spec (spec , 'retention_size' , '0' )
578+ targets = get_field_from_spec (spec , 'targets' , [])
535579
536580 # build service discovery end-point
537581 security_enabled , mgmt_gw_enabled , oauth2_enabled = self .mgr ._get_security_config ()
538- port = self .mgr .service_discovery_port
539- mgr_addr = wrap_ipv6 (self .mgr .get_mgr_ip ())
540-
541- protocol = 'https' if security_enabled else 'http'
542- self .mgr .get_mgmt_gw_internal_endpoint ()
543- if mgmt_gw_enabled :
544- service_discovery_url_prefix = f'{ self .mgr .get_mgmt_gw_internal_endpoint ()} '
545- else :
546- service_discovery_url_prefix = f'{ protocol } ://{ mgr_addr } :{ port } '
547- srv_end_point = f'{ service_discovery_url_prefix } /sd/prometheus/sd-config?'
548-
549- node_exporter_cnt = len (self .mgr .cache .get_daemons_by_service ('node-exporter' ))
550- alertmgr_cnt = len (self .mgr .cache .get_daemons_by_service ('alertmanager' ))
551- haproxy_cnt = len (self .mgr .cache .get_daemons_by_type ('ingress' ))
552- node_exporter_sd_url = f'{ srv_end_point } service=node-exporter' if node_exporter_cnt > 0 else None
553- alertmanager_sd_url = f'{ srv_end_point } service=alertmanager' if alertmgr_cnt > 0 else None
554- haproxy_sd_url = f'{ srv_end_point } service=haproxy' if haproxy_cnt > 0 else None
555- mgr_prometheus_sd_url = f'{ srv_end_point } service=mgr-prometheus' # always included
556- ceph_exporter_sd_url = f'{ srv_end_point } service=ceph-exporter' # always included
557- nvmeof_sd_url = f'{ srv_end_point } service=nvmeof' # always included
558- mgmt_gw_enabled = len (self .mgr .cache .get_daemons_by_service ('mgmt-gateway' )) > 0
559- nfs_sd_url = f'{ srv_end_point } service=nfs' # always included
560- smb_sd_url = f'{ srv_end_point } service=smb' # always included
561-
562582 alertmanager_user , alertmanager_password = self .mgr ._get_alertmanager_credentials ()
563- prometheus_user , prometheus_password = self .mgr ._get_prometheus_credentials ()
564583 federate_path = self .get_target_cluster_federate_path (targets )
565584 cluster_credentials : Dict [str , Any ] = {}
566585 cluster_credentials_files : Dict [str , Any ] = {'files' : {}}
567- FSID = self .mgr ._cluster_fsid
568586 if targets :
569587 if 'dashboard' in self .mgr .get ('mgr_map' )['modules' ]:
570588 cluster_credentials_files , cluster_credentials = self .mgr .remote (
@@ -576,21 +594,14 @@ def generate_config(
576594 # generate the prometheus configuration
577595 context = {
578596 'alertmanager_url_prefix' : '/alertmanager' if mgmt_gw_enabled else '/' ,
597+ 'security_enabled' : security_enabled ,
579598 'alertmanager_web_user' : alertmanager_user ,
580599 'alertmanager_web_password' : alertmanager_password ,
581- 'security_enabled' : security_enabled ,
582600 'service_discovery_username' : self .mgr .http_server .service_discovery .username ,
583601 'service_discovery_password' : self .mgr .http_server .service_discovery .password ,
584- 'mgr_prometheus_sd_url' : mgr_prometheus_sd_url ,
585- 'node_exporter_sd_url' : node_exporter_sd_url ,
586- 'alertmanager_sd_url' : alertmanager_sd_url ,
587- 'haproxy_sd_url' : haproxy_sd_url ,
588- 'ceph_exporter_sd_url' : ceph_exporter_sd_url ,
589- 'nvmeof_sd_url' : nvmeof_sd_url ,
602+ 'service_discovery_cfg' : self .get_service_discovery_cfg (security_enabled , mgmt_gw_enabled ),
590603 'external_prometheus_targets' : targets ,
591- 'cluster_fsid' : FSID ,
592- 'nfs_sd_url' : nfs_sd_url ,
593- 'smb_sd_url' : smb_sd_url ,
604+ 'cluster_fsid' : self .mgr ._cluster_fsid ,
594605 'clusters_credentials' : cluster_credentials ,
595606 'federate_path' : federate_path
596607 }
@@ -600,69 +611,41 @@ def generate_config(
600611 assert daemon_spec .host is not None
601612 ip_to_bind_to = self .mgr .get_first_matching_network_ip (daemon_spec .host , spec ) or ''
602613 if ip_to_bind_to :
603- daemon_spec .port_ips = {str (port ): ip_to_bind_to }
614+ daemon_spec .port_ips = {str (self . mgr . service_discovery_port ): ip_to_bind_to }
604615
605- web_context = {
606- 'enable_mtls' : mgmt_gw_enabled ,
607- 'enable_basic_auth' : not oauth2_enabled ,
608- 'prometheus_web_user' : prometheus_user ,
609- 'prometheus_web_password' : password_hash (prometheus_password ),
616+ files = {
617+ 'prometheus.yml' : self .mgr .template .render ('services/prometheus/prometheus.yml.j2' , context )
618+ }
619+ r : Dict [str , Any ] = {
620+ 'files' : files ,
621+ 'retention_time' : retention_time ,
622+ 'retention_size' : retention_size ,
623+ 'ip_to_bind_to' : ip_to_bind_to ,
624+ 'use_url_prefix' : mgmt_gw_enabled
610625 }
611-
612626 if security_enabled :
613627 # Following key/cert are needed for:
614628 # 1- run the prometheus server (web.yml config)
615629 # 2- use mTLS to scrape node-exporter (prometheus acts as client)
616630 # 3- use mTLS to send alerts to alertmanager (prometheus acts as client)
617- cert , key = self .get_prometheus_certificates (daemon_spec )
618- r : Dict [str , Any ] = {
619- 'files' : {
620- 'prometheus.yml' : self .mgr .template .render ('services/prometheus/prometheus.yml.j2' , context ),
621- 'root_cert.pem' : self .mgr .cert_mgr .get_root_ca (),
622- 'web.yml' : self .mgr .template .render ('services/prometheus/web.yml.j2' , web_context ),
623- 'prometheus.crt' : cert ,
624- 'prometheus.key' : key ,
625- },
626- 'retention_time' : retention_time ,
627- 'retention_size' : retention_size ,
628- 'ip_to_bind_to' : ip_to_bind_to ,
629- 'web_config' : '/etc/prometheus/web.yml' ,
630- 'use_url_prefix' : mgmt_gw_enabled
631- }
632- r ['files' ].update (cluster_credentials_files ['files' ])
633- else :
634- r = {
635- 'files' : {
636- 'prometheus.yml' : self .mgr .template .render ('services/prometheus/prometheus.yml.j2' , context )
637- },
638- 'retention_time' : retention_time ,
639- 'retention_size' : retention_size ,
640- 'ip_to_bind_to' : ip_to_bind_to ,
641- 'use_url_prefix' : mgmt_gw_enabled
631+ prometheus_user , prometheus_password = self .mgr ._get_prometheus_credentials ()
632+ web_context = {
633+ 'enable_mtls' : mgmt_gw_enabled ,
634+ 'enable_basic_auth' : not oauth2_enabled ,
635+ 'prometheus_web_user' : prometheus_user ,
636+ 'prometheus_web_password' : password_hash (prometheus_password ),
642637 }
638+ cert , key = self .get_prometheus_certificates (daemon_spec )
639+ files .update ({
640+ 'root_cert.pem' : self .mgr .cert_mgr .get_root_ca (),
641+ 'web.yml' : self .mgr .template .render ('services/prometheus/web.yml.j2' , web_context ),
642+ 'prometheus.crt' : cert ,
643+ 'prometheus.key' : key ,
644+ ** cluster_credentials_files ['files' ]
645+ })
646+ r .update ({'web_config' : '/etc/prometheus/web.yml' })
643647
644- # include alerts, if present in the container
645- if os .path .exists (self .mgr .prometheus_alerts_path ):
646- with open (self .mgr .prometheus_alerts_path , 'r' , encoding = 'utf-8' ) as f :
647- alerts = f .read ()
648- r ['files' ]['/etc/prometheus/alerting/ceph_alerts.yml' ] = alerts
649-
650- # Include custom alerts if present in key value store. This enables the
651- # users to add custom alerts. Write the file in any case, so that if the
652- # content of the key value store changed, that file is overwritten
653- # (emptied in case they value has been removed from the key value
654- # store). This prevents the necessity to adapt `cephadm` binary to
655- # remove the file.
656- #
657- # Don't use the template engine for it as
658- #
659- # 1. the alerts are always static and
660- # 2. they are a template themselves for the Go template engine, which
661- # use curly braces and escaping that is cumbersome and unnecessary
662- # for the user.
663- #
664- r ['files' ]['/etc/prometheus/alerting/custom_alerts.yml' ] = \
665- self .mgr .get_store ('services/prometheus/alerting/custom_alerts.yml' , '' )
648+ self .configure_alerts (r )
666649
667650 return r , self .get_dependencies (self .mgr )
668651
0 commit comments