Skip to content

Commit 03dd66d

Browse files
committed
refactor : logger and introduced normalize_name in utils.metrics module
1 parent 1a0ce0c commit 03dd66d

File tree

3 files changed

+49
-26
lines changed

3 files changed

+49
-26
lines changed

container_exporter.py

Lines changed: 39 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
from fastapi import FastAPI
77
from fastapi.responses import PlainTextResponse
88
from contextlib import asynccontextmanager
9-
from utils.metrics import PromMetric, prune_stale_metrics, flush_metric_labels
10-
from logging import basicConfig, error, ERROR
9+
from utils.metrics import PromMetric, prune_stale_metrics, normalize_name
10+
import logging
1111
from settings.settings import settings
1212

1313
docker_client: Docker
@@ -23,7 +23,16 @@ async def lifespan(app: FastAPI):
2323

2424
app = FastAPI(lifespan=lifespan)
2525

26-
gauge_container_status = Gauge('cxp_container_status', 'Docker container status (0 = not running, 1 = running, 2 = restarting/unhealthy)', ['container_name'])
26+
logging.basicConfig(
27+
level=logging.ERROR,
28+
format='%(asctime)s ERROR %(message)s',
29+
datefmt='%Y-%m-%d %H:%M:%S',
30+
)
31+
32+
logger = logging.getLogger(__name__)
33+
34+
35+
gauge_container_status = Gauge('cxp_container_status', 'Docker container status (0 = not running, 1 = running, 2 = restarting/unhealthy/paused)', ['container_name'])
2736
gauge_cpu_percentage = Gauge('cxp_cpu_percentage', 'Docker container CPU usage', ['container_name'])
2837
gauge_memory_percentage = Gauge('cxp_memory_percentage', 'Docker container memory usage in percent', ['container_name'])
2938
gauge_memory_bytes = Gauge('cxp_memory_bytes_total', 'Docker container memory usage in bytes', ['container_name'])
@@ -33,24 +42,26 @@ async def lifespan(app: FastAPI):
3342
counter_net_rx = Gauge("cxp_network_rx_bytes_total", "Total bytes received over network", ['container_name'])
3443
counter_net_tx = Gauge("cxp_network_tx_bytes_total", "Total bytes sent over network", ['container_name'])
3544

36-
37-
metrics_to_clear: list[PromMetric] = [gauge_cpu_percentage, gauge_memory_percentage, gauge_memory_bytes, counter_disk_read, counter_disk_write, counter_net_rx, counter_net_tx]
38-
39-
40-
4145
async def get_containers(all=False) -> list[DockerContainer]:
4246
return await docker_client.containers.list(all=all)
4347

4448
def update_container_status(running_containers:list[DockerContainer]):
4549
for c in running_containers:
46-
gauge_container_status.labels(container_name=c._container.get("Names")[0][1:]).set(1 if c._container.get('State') == 'running' else 2)
50+
info = c._container
51+
name = normalize_name(info.get("Names", []), info.get("Id", ""))
52+
state = info.get("State", "").lower()
53+
if state == "running":
54+
gauge_container_status.labels(container_name=name).set(1)
55+
else:
56+
gauge_container_status.labels(container_name=name).set(2)
4757

4858
# Async metrics gathering
4959
async def container_stats( running_containers: list[DockerContainer]):
5060
all_stats = await stat.get_containers_stats(running_containers)
5161

5262
for stats in all_stats:
53-
name = stats[0]['name'][1:]
63+
name = stats[0].get('name', stats[0].get('id', 'Unkown').lstrip("/")).lstrip("/")
64+
5465
gauge_cpu_percentage.labels(container_name=name).set(stat.calculate_cpu_percentage(stats[0]))
5566
gauge_memory_percentage.labels(container_name=name).set(stat.calculate_memory_percentage(stats[0]))
5667
gauge_memory_bytes.labels(container_name=name).set(stat.calculate_memory_bytes(stats[0]))
@@ -69,29 +80,39 @@ async def container_stats( running_containers: list[DockerContainer]):
6980
]
7081

7182
# Metrics we want to always keep, and set to 0 instead
72-
persistent_metrics: list[PromMetric] = [gauge_container_status]
83+
persistent_metrics: list[Gauge] = [gauge_container_status]
7384

7485

7586
@app.get("/")
7687
def root():
7788
return {"message": "Welcome to CXP, Container Exporter for Prometheus."}
7889

90+
@app.get("/healthz")
91+
async def healthz():
92+
try:
93+
# A simple, cheap call to Docker, e.g. list one container
94+
await docker_client.containers.list(limit=1)
95+
return PlainTextResponse("OK", status_code=200)
96+
except:
97+
return PlainTextResponse("NOT OK", status_code=500)
98+
7999
@app.get("/metrics")
80100
async def metrics():
81101
try:
82102
running_containers = await get_containers()
83103
update_container_status(running_containers)
84-
prune_stale_metrics([c._container.get("Names")[0][1:] for c in running_containers], prunable_metrics, persistent_metrics)
104+
105+
c_names = [
106+
normalize_name(c._container.get("Names", []), c._container.get("Id", ""))
107+
for c in running_containers
108+
]
109+
prune_stale_metrics(c_names, prunable_metrics, persistent_metrics)
110+
85111
await container_stats(running_containers)
86112
return PlainTextResponse(
87113
content=generate_latest(),
88114
media_type=CONTENT_TYPE_LATEST
89115
)
90116
except Exception as e:
91-
basicConfig(
92-
level=ERROR,
93-
format='%(asctime)s ERROR %(message)s',
94-
datefmt='%Y-%m-%d %H:%M:%S'
95-
)
96-
error(str(e))
117+
logger.error("Error running metrics collection: %s", e, exc_info=settings.CONTAINER_EXPORTER_DEBUG)
97118
return PlainTextResponse(f"Error running metrics collection: {str(e)}", status_code=500)

stats/get_docker_stats.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def calculate_memory_percentage(stats: dict) -> float:
3333
return (usage / limit) * 100.0
3434

3535

36-
def calculate_memory_bytes(stats) -> bytes:
36+
def calculate_memory_bytes(stats) -> float:
3737
mem_stats = stats.get('memory_stats', {}) or {}
3838
memory_usage_bytes = mem_stats.get('usage')
3939

utils/metrics.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from settings.settings import settings
55
PromMetric = Union[Gauge, Counter]
66

7-
def prune_stale_metrics(active_names: Iterable[str], prunable_metrics: list[PromMetric], persistent_metrics : list[PromMetric]):
7+
def prune_stale_metrics(active_names: Iterable[str], prunable_metrics: list[PromMetric], persistent_metrics : list[Gauge]):
88
"""
99
Removes time series for inactive containers from selected metrics
1010
while preserving container status metrics by setting them to 0.
@@ -30,9 +30,11 @@ def prune_stale_metrics(active_names: Iterable[str], prunable_metrics: list[Prom
3030
if name not in active_set:
3131
metric.labels(container_name=name).set(0)
3232

33-
34-
def flush_metric_labels(containers:list[DockerContainer], metrics_to_clear: list[PromMetric]):
35-
for container in containers:
36-
if container._container.get("State") != "running":
37-
for metric in metrics_to_clear:
38-
metric.labels(container_name=container._container.get("Names")[0][1:]).set(0)
33+
def normalize_name(raw_names: list[str], fallback_id: str) -> str:
34+
"""
35+
Given Docker’s 'Names' array (e.g. ['/my‐container']), pick the first one and strip leading '/'.
36+
If it’s missing or empty, return a short version of container ID.
37+
"""
38+
if raw_names and isinstance(raw_names, list) and raw_names[0]:
39+
return raw_names[0].lstrip("/")
40+
return fallback_id[:12]

0 commit comments

Comments
 (0)