Skip to content

Commit b068efe

Browse files
authored
761 healthoverview (#762)
* Add InfluxDB metrics routes and schema definitions * Add metrics dashboard and related components for performance monitoring * Refactor Healthcheck and Metrics routes to use nested structure with redirects * Bump version to 0.1.49 * precommit fixes
1 parent 66d5ead commit b068efe

File tree

16 files changed

+1486
-17
lines changed

16 files changed

+1486
-17
lines changed
Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
from fastapi import APIRouter
2+
from fastapi import Depends
3+
from fastapi import Query
4+
from fastapi import Security
5+
from sqlalchemy.ext.asyncio import AsyncSession
6+
7+
from app.auth.utils import AuthHandler
8+
from app.connectors.influxdb.schema.metrics import HostsResponse
9+
from app.connectors.influxdb.schema.metrics import MetricsResponse
10+
from app.connectors.influxdb.services.metrics import get_cpu_metrics
11+
from app.connectors.influxdb.services.metrics import get_disk_metrics
12+
from app.connectors.influxdb.services.metrics import get_hosts
13+
from app.connectors.influxdb.services.metrics import get_kernel_metrics
14+
from app.connectors.influxdb.services.metrics import get_memory_metrics
15+
from app.connectors.influxdb.services.metrics import get_network_metrics
16+
from app.connectors.influxdb.services.metrics import get_process_metrics
17+
from app.connectors.influxdb.services.metrics import get_summary
18+
from app.db.db_session import get_db
19+
20+
influxdb_metrics_router = APIRouter()
21+
22+
23+
@influxdb_metrics_router.get(
24+
"/hosts",
25+
response_model=HostsResponse,
26+
description="Get available hosts from InfluxDB metrics",
27+
dependencies=[Security(AuthHandler().require_any_scope("admin", "analyst"))],
28+
)
29+
async def get_hosts_route(
30+
session: AsyncSession = Depends(get_db),
31+
) -> HostsResponse:
32+
"""
33+
Get a list of all hosts reporting metrics to InfluxDB.
34+
"""
35+
return await get_hosts(session)
36+
37+
38+
@influxdb_metrics_router.get(
39+
"/summary",
40+
response_model=MetricsResponse,
41+
description="Get system summary metrics for a host",
42+
dependencies=[Security(AuthHandler().require_any_scope("admin", "analyst"))],
43+
)
44+
async def get_summary_route(
45+
host: str = Query(..., description="Hostname to retrieve metrics for"),
46+
range_h: str = Query("1", description="Number of hours to look back"),
47+
session: AsyncSession = Depends(get_db),
48+
) -> MetricsResponse:
49+
"""
50+
Get a summary of system metrics for a host including uptime, CPU count,
51+
memory, processes, swap, and system load time-series.
52+
"""
53+
return await get_summary(host, range_h, session)
54+
55+
56+
@influxdb_metrics_router.get(
57+
"/cpu",
58+
response_model=MetricsResponse,
59+
description="Get CPU metrics for a host",
60+
dependencies=[Security(AuthHandler().require_any_scope("admin", "analyst"))],
61+
)
62+
async def get_cpu_route(
63+
host: str = Query(..., description="Hostname to retrieve metrics for"),
64+
range_h: str = Query("1", description="Number of hours to look back"),
65+
session: AsyncSession = Depends(get_db),
66+
) -> MetricsResponse:
67+
"""
68+
Get CPU usage time-series: system, user, iowait, and softirq.
69+
"""
70+
return await get_cpu_metrics(host, range_h, session)
71+
72+
73+
@influxdb_metrics_router.get(
74+
"/memory",
75+
response_model=MetricsResponse,
76+
description="Get memory metrics for a host",
77+
dependencies=[Security(AuthHandler().require_any_scope("admin", "analyst"))],
78+
)
79+
async def get_memory_route(
80+
host: str = Query(..., description="Hostname to retrieve metrics for"),
81+
range_h: str = Query("1", description="Number of hours to look back"),
82+
session: AsyncSession = Depends(get_db),
83+
) -> MetricsResponse:
84+
"""
85+
Get memory usage time-series and swap info.
86+
"""
87+
return await get_memory_metrics(host, range_h, session)
88+
89+
90+
@influxdb_metrics_router.get(
91+
"/kernel",
92+
response_model=MetricsResponse,
93+
description="Get kernel metrics for a host",
94+
dependencies=[Security(AuthHandler().require_any_scope("admin", "analyst"))],
95+
)
96+
async def get_kernel_route(
97+
host: str = Query(..., description="Hostname to retrieve metrics for"),
98+
range_h: str = Query("1", description="Number of hours to look back"),
99+
session: AsyncSession = Depends(get_db),
100+
) -> MetricsResponse:
101+
"""
102+
Get kernel metrics: interrupts and processes forked (rate per second).
103+
"""
104+
return await get_kernel_metrics(host, range_h, session)
105+
106+
107+
@influxdb_metrics_router.get(
108+
"/disks",
109+
response_model=MetricsResponse,
110+
description="Get disk metrics for a host",
111+
dependencies=[Security(AuthHandler().require_any_scope("admin", "analyst"))],
112+
)
113+
async def get_disks_route(
114+
host: str = Query(..., description="Hostname to retrieve metrics for"),
115+
range_h: str = Query("1", description="Number of hours to look back"),
116+
session: AsyncSession = Depends(get_db),
117+
) -> MetricsResponse:
118+
"""
119+
Get disk metrics: total size, usage percent by path, I/O throughput by device,
120+
and inode usage.
121+
"""
122+
return await get_disk_metrics(host, range_h, session)
123+
124+
125+
@influxdb_metrics_router.get(
126+
"/processes",
127+
response_model=MetricsResponse,
128+
description="Get process metrics for a host",
129+
dependencies=[Security(AuthHandler().require_any_scope("admin", "analyst"))],
130+
)
131+
async def get_processes_route(
132+
host: str = Query(..., description="Hostname to retrieve metrics for"),
133+
range_h: str = Query("1", description="Number of hours to look back"),
134+
session: AsyncSession = Depends(get_db),
135+
) -> MetricsResponse:
136+
"""
137+
Get process metrics: status time-series (running, sleeping, zombies, stopped, blocked)
138+
and current counts per state.
139+
"""
140+
return await get_process_metrics(host, range_h, session)
141+
142+
143+
@influxdb_metrics_router.get(
144+
"/network",
145+
response_model=MetricsResponse,
146+
description="Get network metrics for a host",
147+
dependencies=[Security(AuthHandler().require_any_scope("admin", "analyst"))],
148+
)
149+
async def get_network_route(
150+
host: str = Query(..., description="Hostname to retrieve metrics for"),
151+
range_h: str = Query("1", description="Number of hours to look back"),
152+
session: AsyncSession = Depends(get_db),
153+
) -> MetricsResponse:
154+
"""
155+
Get network metrics: traffic (bytes/sec by interface), TCP established connections,
156+
and interface errors.
157+
"""
158+
return await get_network_metrics(host, range_h, session)
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
from datetime import datetime
2+
from typing import Any
3+
from typing import Optional
4+
5+
from pydantic import BaseModel
6+
from pydantic import Field
7+
8+
9+
class TimeSeriesPoint(BaseModel):
10+
"""A single time-series data point."""
11+
12+
time: datetime
13+
field: str
14+
value: float
15+
label: Optional[str] = None
16+
17+
18+
class TimeSeriesData(BaseModel):
19+
"""Time-series data grouped by field/label."""
20+
21+
series: dict[str, list[dict[str, Any]]] = Field(
22+
default_factory=dict,
23+
description="Mapping of series name to list of {time, value} points",
24+
)
25+
26+
27+
class MetricsResponse(BaseModel):
28+
"""Generic response for metrics endpoints."""
29+
30+
success: bool
31+
message: str
32+
data: dict[str, Any] = Field(default_factory=dict)
33+
34+
35+
class HostsResponse(BaseModel):
36+
"""Response for listing available hosts."""
37+
38+
success: bool
39+
message: str
40+
hosts: list[str] = Field(default_factory=list)

0 commit comments

Comments
 (0)