Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions frigate/stats/prometheus.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,16 +355,37 @@ def collect(self):
gpu_mem_usages = GaugeMetricFamily(
"frigate_gpu_mem_usage_percent", "GPU memory usage %", labels=["gpu_name"]
)
gpu_enc_usages = GaugeMetricFamily(
"frigate_gpu_encoder_usage_percent",
"GPU encoder utilisation %",
labels=["gpu_name"],
)
gpu_compute_usages = GaugeMetricFamily(
"frigate_gpu_compute_usage_percent",
"GPU compute / encode utilisation %",
labels=["gpu_name"],
)
gpu_dec_usages = GaugeMetricFamily(
"frigate_gpu_decoder_usage_percent",
"GPU decoder utilisation %",
labels=["gpu_name"],
)

try:
for gpu_name, gpu_stats in stats["gpu_usages"].items():
self.add_metric(gpu_usages, [gpu_name], gpu_stats, "gpu")
self.add_metric(gpu_mem_usages, [gpu_name], gpu_stats, "mem")
self.add_metric(gpu_enc_usages, [gpu_name], gpu_stats, "enc")
self.add_metric(gpu_compute_usages, [gpu_name], gpu_stats, "compute")
self.add_metric(gpu_dec_usages, [gpu_name], gpu_stats, "dec")
except KeyError:
pass

yield gpu_usages
yield gpu_mem_usages
yield gpu_enc_usages
yield gpu_compute_usages
yield gpu_dec_usages

# service stats
uptime_seconds = GaugeMetricFamily(
Expand Down
46 changes: 17 additions & 29 deletions frigate/stats/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,45 +261,33 @@ async def set_gpu_stats(
else:
stats["jetson-gpu"] = {"gpu": "", "mem": ""}
hwaccel_errors.append(args)
elif "qsv" in args:
elif "qsv" in args or ("vaapi" in args and not is_vaapi_amd_driver()):
if not config.telemetry.stats.intel_gpu_stats:
continue

# intel QSV GPU
intel_usage = get_intel_gpu_stats(config.telemetry.stats.intel_gpu_device)

if intel_usage is not None:
stats["intel-qsv"] = intel_usage or {"gpu": "", "mem": ""}
else:
stats["intel-qsv"] = {"gpu": "", "mem": ""}
hwaccel_errors.append(args)
elif "vaapi" in args:
if is_vaapi_amd_driver():
if not config.telemetry.stats.amd_gpu_stats:
continue

# AMD VAAPI GPU
amd_usage = get_amd_gpu_stats()

if amd_usage:
stats["amd-vaapi"] = amd_usage
else:
stats["amd-vaapi"] = {"gpu": "", "mem": ""}
hwaccel_errors.append(args)
else:
if not config.telemetry.stats.intel_gpu_stats:
continue

# intel VAAPI GPU
if "intel-gpu" not in stats:
# intel GPU (QSV or VAAPI both use the same physical GPU)
intel_usage = get_intel_gpu_stats(
config.telemetry.stats.intel_gpu_device
)

if intel_usage is not None:
stats["intel-vaapi"] = intel_usage or {"gpu": "", "mem": ""}
stats["intel-gpu"] = intel_usage or {"gpu": "", "mem": ""}
else:
stats["intel-vaapi"] = {"gpu": "", "mem": ""}
stats["intel-gpu"] = {"gpu": "", "mem": ""}
hwaccel_errors.append(args)
elif "vaapi" in args:
if not config.telemetry.stats.amd_gpu_stats:
continue

# AMD VAAPI GPU
amd_usage = get_amd_gpu_stats()

if amd_usage:
stats["amd-vaapi"] = amd_usage
else:
stats["amd-vaapi"] = {"gpu": "", "mem": ""}
hwaccel_errors.append(args)
elif "preset-rk" in args:
rga_usage = get_rockchip_gpu_stats()

Expand Down
8 changes: 6 additions & 2 deletions frigate/test/test_gpu_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,12 @@ def test_intel_gpu_stats(self, sp):
process.stdout = self.intel_results
sp.return_value = process
intel_stats = get_intel_gpu_stats(False)
print(f"the intel stats are {intel_stats}")
# rc6 values: 47.844741 and 100.0 → avg 73.92 → gpu = 100 - 73.92 = 26.08%
# Render/3D/0: 0.0 and 0.0 → enc = 0.0%
# Video/0: 4.533124 and 0.0 → dec = 2.27%
assert intel_stats == {
"gpu": "1.13%",
"gpu": "26.08%",
"mem": "-%",
"compute": "0.0%",
"dec": "2.27%",
}
103 changes: 63 additions & 40 deletions frigate/util/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,39 +265,49 @@ def get_amd_gpu_stats() -> Optional[dict[str, str]]:


def get_intel_gpu_stats(intel_gpu_device: Optional[str]) -> Optional[dict[str, str]]:
"""Get stats using intel_gpu_top."""
"""Get stats using intel_gpu_top.

Returns overall GPU usage derived from rc6 residency (idle time),
plus individual engine breakdowns:
- enc: Render/3D engine (compute/shader encoder, used by QSV)
- dec: Video engines (fixed-function codec, used by VAAPI)
"""

def get_stats_manually(output: str) -> dict[str, str]:
"""Find global stats via regex when json fails to parse."""
reading = "".join(output)
results: dict[str, str] = {}

# render is used for qsv
# rc6 residency for overall GPU usage
rc6_match = re.search(r'"rc6":\{"value":([\d.]+)', reading)
if rc6_match:
rc6_value = float(rc6_match.group(1))
results["gpu"] = f"{round(100.0 - rc6_value, 2)}%"
else:
results["gpu"] = "-%"

results["mem"] = "-%"

# Render/3D is the compute/encode engine
render = []
for result in re.findall(r'"Render/3D/0":{[a-z":\d.,%]+}', reading):
packet = json.loads(result[14:])
single = packet.get("busy", 0.0)
render.append(float(single))

if render:
render_avg = sum(render) / len(render)
else:
render_avg = 1
results["compute"] = f"{round(sum(render) / len(render), 2)}%"

# video is used for vaapi
# Video engines are the fixed-function decode engines
video = []
for result in re.findall(r'"Video/\d":{[a-z":\d.,%]+}', reading):
packet = json.loads(result[10:])
single = packet.get("busy", 0.0)
video.append(float(single))

if video:
video_avg = sum(video) / len(video)
else:
video_avg = 1
results["dec"] = f"{round(sum(video) / len(video), 2)}%"

results["gpu"] = f"{round((video_avg + render_avg) / 2, 2)}%"
results["mem"] = "-%"
return results

intel_gpu_top_command = [
Expand Down Expand Up @@ -336,59 +346,72 @@ def get_stats_manually(output: str) -> dict[str, str]:
return get_stats_manually(output)

results: dict[str, str] = {}
render = {"global": []}
video = {"global": []}
rc6_values = []
render_global = []
video_global = []
# per-client: {pid: [total_busy_per_sample, ...]}
client_usages: dict[str, list[float]] = {}

for block in data:
# rc6 residency: percentage of time GPU is idle
rc6 = block.get("rc6", {}).get("value")
if rc6 is not None:
rc6_values.append(float(rc6))

global_engine = block.get("engines")

if global_engine:
render_frame = global_engine.get("Render/3D/0", {}).get("busy")
video_frame = global_engine.get("Video/0", {}).get("busy")

if render_frame is not None:
render["global"].append(float(render_frame))
render_global.append(float(render_frame))

if video_frame is not None:
video["global"].append(float(video_frame))
video_global.append(float(video_frame))

clients = block.get("clients", {})

if clients and len(clients):
if clients:
for client_block in clients.values():
key = client_block["pid"]
pid = client_block["pid"]

if render.get(key) is None:
render[key] = []
video[key] = []
if pid not in client_usages:
client_usages[pid] = []

client_engine = client_block.get("engine-classes", {})
# Sum all engine-class busy values for this client
total_busy = 0.0
for engine in client_block.get("engine-classes", {}).values():
busy = engine.get("busy")
if busy is not None:
total_busy += float(busy)

render_frame = client_engine.get("Render/3D", {}).get("busy")
video_frame = client_engine.get("Video", {}).get("busy")
client_usages[pid].append(total_busy)

if render_frame is not None:
render[key].append(float(render_frame))
# Overall GPU usage from rc6 (idle) residency
if rc6_values:
rc6_avg = sum(rc6_values) / len(rc6_values)
results["gpu"] = f"{round(100.0 - rc6_avg, 2)}%"

if video_frame is not None:
video[key].append(float(video_frame))
results["mem"] = "-%"

if render["global"] and video["global"]:
results["gpu"] = (
f"{round(((sum(render['global']) / len(render['global'])) + (sum(video['global']) / len(video['global']))) / 2, 2)}%"
)
results["mem"] = "-%"
# Compute: Render/3D engine (compute/shader workloads and QSV encode)
if render_global:
results["compute"] = f"{round(sum(render_global) / len(render_global), 2)}%"

if len(render.keys()) > 1:
results["clients"] = {}
# Decoder: Video engine (fixed-function codec)
if video_global:
results["dec"] = f"{round(sum(video_global) / len(video_global), 2)}%"

for key in render.keys():
if key == "global" or not render[key] or not video[key]:
continue
# Per-client GPU usage (sum of all engines per process)
if client_usages:
results["clients"] = {}

results["clients"][key] = (
f"{round(((sum(render[key]) / len(render[key])) + (sum(video[key]) / len(video[key]))) / 2, 2)}%"
)
for pid, samples in client_usages.items():
if samples:
results["clients"][pid] = (
f"{round(sum(samples) / len(samples), 2)}%"
)

return results

Expand Down
2 changes: 2 additions & 0 deletions web/public/locales/en/views/system.json
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
"gpuUsage": "GPU Usage",
"gpuMemory": "GPU Memory",
"gpuEncoder": "GPU Encoder",
"gpuCompute": "GPU Compute / Encode",
"gpuDecoder": "GPU Decoder",
"gpuTemperature": "GPU Temperature",
"gpuInfo": {
Expand Down Expand Up @@ -188,6 +189,7 @@
"cameraFfmpeg": "{{camName}} FFmpeg",
"cameraCapture": "{{camName}} capture",
"cameraDetect": "{{camName}} detect",
"cameraGpu": "{{camName}} GPU",
"cameraFramesPerSecond": "{{camName}} frames per second",
"cameraDetectionsPerSecond": "{{camName}} detections per second",
"cameraSkippedDetectionsPerSecond": "{{camName}} skipped detections per second"
Expand Down
3 changes: 1 addition & 2 deletions web/src/components/Statusbar.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,7 @@ export default function Statusbar() {
case "amd-vaapi":
gpuTitle = "AMD GPU";
break;
case "intel-vaapi":
case "intel-qsv":
case "intel-gpu":
gpuTitle = "Intel GPU";
break;
case "rockchip":
Expand Down
2 changes: 2 additions & 0 deletions web/src/types/stats.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,10 @@ export type GpuStats = {
mem: string;
enc?: string;
dec?: string;
compute?: string;
pstate?: string;
temp?: number;
clients?: { [pid: string]: string };
};

export type NpuStats = {
Expand Down
Loading
Loading