Skip to content

Commit 5930d5d

Browse files
author
Andrei Neagu
committed
added instrumentation for prometheus metrics
1 parent e6cc226 commit 5930d5d

File tree

5 files changed

+101
-5
lines changed

5 files changed

+101
-5
lines changed

services/agent/src/simcore_service_agent/core/application.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,6 @@
55
get_common_oas_options,
66
override_fastapi_openapi_method,
77
)
8-
from servicelib.fastapi.prometheus_instrumentation import (
9-
setup_prometheus_instrumentation,
10-
)
118
from servicelib.logging_utils import config_all_loggers
129

1310
from .._meta import (
@@ -18,6 +15,7 @@
1815
SUMMARY,
1916
VERSION,
2017
)
18+
from ..services.instrumentation import setup_instrumentation
2119
from ..services.rabbitmq import setup_rabbitmq
2220
from ..services.volumes_manager import setup_volume_manager
2321
from .api.rest.routes import setup_rest_api
@@ -53,8 +51,7 @@ def create_app() -> FastAPI:
5351
override_fastapi_openapi_method(app)
5452
app.state.settings = settings
5553

56-
if app.state.settings.AGENT_PROMETHEUS_INSTRUMENTATION_ENABLED:
57-
setup_prometheus_instrumentation(app)
54+
setup_instrumentation(app)
5855

5956
setup_rabbitmq(app)
6057
setup_volume_manager(app)

services/agent/src/simcore_service_agent/services/docker_utils.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,11 @@
1212
)
1313
from servicelib.docker_constants import PREFIX_DYNAMIC_SIDECAR_VOLUMES
1414
from servicelib.logging_utils import log_catch, log_context
15+
from simcore_service_agent.core.settings import ApplicationSettings
1516
from starlette import status
1617

1718
from .backup import backup_volume
19+
from .instrumentation import get_instrumentation
1820
from .models import VolumeDetails
1921

2022
_logger = logging.getLogger(__name__)
@@ -79,6 +81,10 @@ async def _backup_volume(app: FastAPI, docker: Docker, *, volume_name: str) -> N
7981
_logger, logging.INFO, f"backup '{volume_name}'", log_duration=True
8082
):
8183
volume_details = await get_volume_details(docker, volume_name=volume_name)
84+
settings: ApplicationSettings = app.state.settings
85+
get_instrumentation(app).agent_metrics.backedup_volumes(
86+
settings.AGENT_DOCKER_NODE_ID
87+
)
8288
await backup_volume(app, volume_details, volume_name)
8389
else:
8490
_logger.debug("No backup is required for '%s'", volume_name)
@@ -95,3 +101,8 @@ async def remove_volume(
95101
await _backup_volume(app, docker, volume_name=volume_name)
96102

97103
await DockerVolume(docker, volume_name).delete()
104+
105+
settings: ApplicationSettings = app.state.settings
106+
get_instrumentation(app).agent_metrics.remove_volumes(
107+
settings.AGENT_DOCKER_NODE_ID
108+
)
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
from ._setup import get_instrumentation, setup_instrumentation
2+
3+
__all__: tuple[str, ...] = (
4+
"get_instrumentation",
5+
"setup_instrumentation",
6+
)
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
from dataclasses import dataclass, field
2+
from typing import Final
3+
4+
from prometheus_client import CollectorRegistry, Counter
5+
from servicelib.instrumentation import MetricsBase, get_metrics_namespace
6+
7+
from ..._meta import APP_NAME
8+
9+
_METRICS_NAMESPACE: Final[str] = get_metrics_namespace(APP_NAME)
10+
_LABELS_COUNTERS: Final[tuple[str, ...]] = ("docker_node_id",)
11+
12+
13+
@dataclass(slots=True, kw_only=True)
14+
class AgentMetrics(MetricsBase):
15+
volumes_removed: Counter = field(init=False)
16+
volumes_backedup: Counter = field(init=False)
17+
18+
def __post_init__(self) -> None:
19+
self.volumes_removed = Counter(
20+
"volumes_removed_total",
21+
"Number of removed volumes by the agent",
22+
labelnames=_LABELS_COUNTERS,
23+
namespace=_METRICS_NAMESPACE,
24+
subsystem=self.subsystem,
25+
registry=self.registry,
26+
)
27+
28+
self.volumes_backedup = Counter(
29+
"volumes_backedup_total",
30+
"Number of removed volumes who's content was uplaoded by the agent",
31+
labelnames=_LABELS_COUNTERS,
32+
namespace=_METRICS_NAMESPACE,
33+
subsystem=self.subsystem,
34+
registry=self.registry,
35+
)
36+
37+
def remove_volumes(self, docker_node_id: str) -> None:
38+
self.volumes_removed.labels(docker_node_id=docker_node_id).inc()
39+
40+
def backedup_volumes(self, docker_node_id: str) -> None:
41+
self.volumes_backedup.labels(docker_node_id=docker_node_id).inc()
42+
43+
44+
@dataclass(slots=True, kw_only=True)
45+
class AgentInstrumentation:
46+
registry: CollectorRegistry
47+
agent_metrics: AgentMetrics = field(init=False)
48+
49+
def __post_init__(self) -> None:
50+
self.agent_metrics = AgentMetrics( # pylint: disable=unexpected-keyword-arg
51+
subsystem="agent", registry=self.registry
52+
)
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
from fastapi import FastAPI
2+
from servicelib.fastapi.prometheus_instrumentation import (
3+
setup_prometheus_instrumentation,
4+
)
5+
from simcore_service_agent.core.settings import ApplicationSettings
6+
7+
from ._models import AgentInstrumentation
8+
9+
10+
def setup_instrumentation(app: FastAPI) -> None:
11+
settings: ApplicationSettings = app.state.settings
12+
if not settings.AGENT_PROMETHEUS_INSTRUMENTATION_ENABLED:
13+
return
14+
15+
instrumentator = setup_prometheus_instrumentation(app)
16+
17+
async def on_startup() -> None:
18+
app.state.instrumentation = AgentInstrumentation(
19+
registry=instrumentator.registry
20+
)
21+
22+
app.add_event_handler("startup", on_startup)
23+
24+
25+
def get_instrumentation(app: FastAPI) -> AgentInstrumentation:
26+
assert (
27+
app.state.instrumentation
28+
), "Instrumentation not setup. Please check the configuration" # nosec
29+
instrumentation: AgentInstrumentation = app.state.instrumentation
30+
return instrumentation

0 commit comments

Comments
 (0)