77 setup_prometheus_instrumentation as setup_rest_instrumentation ,
88)
99
10+ from .._meta import APP_NAME
1011from ..core .errors import ConfigurationError
1112from ..core .settings import get_application_settings
1213from ..models import AssociatedInstance , Cluster , NonAssociatedInstance
1314
15+ METRICS_NAMESPACE : Final [str ] = APP_NAME .replace ("-" , "_" )
1416EC2_INSTANCE_LABELS : Final [tuple [str ]] = ("instance_type" ,)
1517
1618
@@ -25,8 +27,9 @@ def _update_gauge(
2527
2628
2729@dataclass (slots = True , kw_only = True )
28- class AutoscalingInstrumentation :
30+ class AutoscalingInstrumentation : # pylint: disable=too-many-instance-attributes
2931 registry : CollectorRegistry
32+ subsystem : str
3033 _active_nodes : Gauge = field (init = False )
3134 _pending_nodes : Gauge = field (init = False )
3235 _drained_nodes : Gauge = field (init = False )
@@ -41,40 +44,56 @@ def __post_init__(self) -> None:
4144 "active_nodes" ,
4245 "Number of EC2-backed docker nodes which are active and ready to run tasks" ,
4346 labelnames = EC2_INSTANCE_LABELS ,
47+ namespace = METRICS_NAMESPACE ,
48+ subsystem = self .subsystem ,
4449 )
4550 self ._pending_nodes = Gauge (
4651 "pending_nodes" ,
4752 "Number of EC2-backed docker nodes which are active and NOT ready to run tasks" ,
4853 labelnames = EC2_INSTANCE_LABELS ,
54+ namespace = METRICS_NAMESPACE ,
55+ subsystem = self .subsystem ,
4956 )
5057 self ._drained_nodes = Gauge (
5158 "drained_nodes" ,
5259 "Number of EC2-backed docker nodes which are drained" ,
5360 labelnames = EC2_INSTANCE_LABELS ,
61+ namespace = METRICS_NAMESPACE ,
62+ subsystem = self .subsystem ,
5463 )
5564 self ._buffer_drained_nodes = Gauge (
5665 "buffer_drained_nodes" ,
5766 "Number of EC2-backed docker nodes which are drained and in buffer/reserve" ,
5867 labelnames = EC2_INSTANCE_LABELS ,
68+ namespace = METRICS_NAMESPACE ,
69+ subsystem = self .subsystem ,
5970 )
6071 self ._pending_ec2s = Gauge (
6172 "pending_ec2s" ,
6273 "Number of EC2 instance not yet part of the cluster" ,
6374 labelnames = EC2_INSTANCE_LABELS ,
75+ namespace = METRICS_NAMESPACE ,
76+ subsystem = self .subsystem ,
6477 )
6578 self ._disconnected_nodes = Gauge (
6679 "disconnected_nodes" ,
6780 "Number of docker node not backed by a running EC2 instance" ,
81+ namespace = METRICS_NAMESPACE ,
82+ subsystem = self .subsystem ,
6883 )
6984 self ._started_instances = Counter (
7085 "started_instances_total" ,
7186 "Number of EC2 instances that were started" ,
7287 labelnames = EC2_INSTANCE_LABELS ,
88+ namespace = METRICS_NAMESPACE ,
89+ subsystem = self .subsystem ,
7390 )
7491 self ._terminated_instances = Counter (
7592 "terminated_ec2_instances_total" ,
7693 "Number of EC2 instances that were terminated" ,
7794 labelnames = EC2_INSTANCE_LABELS ,
95+ namespace = METRICS_NAMESPACE ,
96+ subsystem = self .subsystem ,
7897 )
7998
8099 def update_from_cluster (self , cluster : Cluster ) -> None :
@@ -101,8 +120,11 @@ def setup(app: FastAPI) -> None:
101120 instrumentator = setup_rest_instrumentation (app )
102121
103122 async def on_startup () -> None :
123+ metrics_subsystem = (
124+ "dynamic" if app_settings .AUTOSCALING_NODES_MONITORING else "computational"
125+ )
104126 app .state .instrumentation = AutoscalingInstrumentation (
105- registry = instrumentator .registry
127+ registry = instrumentator .registry , subsystem = metrics_subsystem
106128 )
107129
108130 async def on_shutdown () -> None :
0 commit comments