Skip to content

Commit b2f5301

Browse files
Add benchmark scripts for performance validation
Add hack/benchmark-perf.sh for automated performance benchmarking with CPU timing (via GNU time) and dynamic linker profiling (via ltrace). Add hack/benchmark-compare.sh for side-by-side comparison of benchmark result JSON files. These are temporary additions for validating the NVML Init/Shutdown optimization and will be removed before merge. Signed-off-by: Rajath Agasthya <ragasthya@nvidia.com>
1 parent 3dd1e82 commit b2f5301

File tree

2 files changed

+575
-0
lines changed

2 files changed

+575
-0
lines changed

hack/benchmark-compare.sh

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
#!/usr/bin/env bash
2+
3+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
# benchmark-compare.sh — Compare two benchmark result JSON files.
18+
#
19+
# Reads the JSON files produced by benchmark-perf.sh and prints a
20+
# side-by-side comparison with deltas and percentage improvements
21+
# for CPU time, wall time, and dynamic linker call counts.
22+
#
23+
# Usage:
24+
# ./hack/benchmark-compare.sh <before.json> <after.json>
25+
#
26+
# Example:
27+
# ./hack/benchmark-compare.sh benchmark-results-20260303-111337.json benchmark-results-20260303-111809.json
28+
29+
set -euo pipefail
30+
31+
BEFORE="${1:?Usage: $0 <before.json> <after.json>}"
32+
AFTER="${2:?Usage: $0 <before.json> <after.json>}"
33+
34+
[[ -f "${BEFORE}" ]] || { echo "ERROR: File not found: ${BEFORE}" >&2; exit 1; }
35+
[[ -f "${AFTER}" ]] || { echo "ERROR: File not found: ${AFTER}" >&2; exit 1; }
36+
37+
command -v python3 >/dev/null 2>&1 || { echo "ERROR: python3 is required" >&2; exit 1; }
38+
39+
python3 - "${BEFORE}" "${AFTER}" <<'PYEOF'
40+
import json
41+
import sys
42+
43+
def load(path):
44+
with open(path) as f:
45+
return json.load(f)
46+
47+
before = load(sys.argv[1])
48+
after = load(sys.argv[2])
49+
50+
before_map = {s["name"]: s for s in before["scenarios"]}
51+
after_map = {s["name"]: s for s in after["scenarios"]}
52+
53+
def avg(scenario, key):
54+
v = scenario.get(key)
55+
if v is None:
56+
return None
57+
if isinstance(v, dict):
58+
return v.get("avg")
59+
return v
60+
61+
def fmt_delta(old, new):
62+
if old is None or new is None:
63+
return "-", "-"
64+
delta = new - old
65+
pct = "-" if old == 0 else f"{(delta / old) * 100:+.1f}%"
66+
return f"{delta:+.2f}", pct
67+
68+
def fmt_delta_int(old, new):
69+
if old is None or new is None:
70+
return "-", "-"
71+
delta = new - old
72+
if old == 0:
73+
pct = "0.0%" if new == 0 else "-"
74+
else:
75+
pct = f"{(delta / old) * 100:+.1f}%"
76+
return f"{delta:+d}", pct
77+
78+
def speedup(old, new):
79+
if old is None or new is None or new == 0:
80+
return "-"
81+
return f"{old / new:.1f}x"
82+
83+
def row(name, b_str, a_str, delta, pct, spd):
84+
print(f" {name:<33s} {b_str:>8s} {a_str:>8s} {delta:>8s} {pct:>8s} {spd:>7s}")
85+
86+
# Header
87+
print("=" * 90)
88+
print("nvidia-mig-parted benchmark comparison")
89+
print("=" * 90)
90+
print(f"Before: {sys.argv[1]}")
91+
print(f" Binary: {before.get('binary', '?')}")
92+
print(f" GPUs: {before.get('gpus', '?')}")
93+
print(f"After: {sys.argv[2]}")
94+
print(f" Binary: {after.get('binary', '?')}")
95+
print(f" GPUs: {after.get('gpus', '?')}")
96+
print()
97+
98+
# Table header
99+
print("-" * 90)
100+
print(f" {'Scenario':<33s} {'Before':>8s} {'After':>8s} {'Delta':>8s} {'Change':>8s} {'Speedup':>7s}")
101+
print("-" * 90)
102+
103+
# CPU time
104+
print("CPU time (user + sys, seconds):")
105+
for name in before_map:
106+
if name not in after_map:
107+
continue
108+
b, a = before_map[name], after_map[name]
109+
b_user, b_sys = avg(b, "user_cpu_s"), avg(b, "sys_cpu_s")
110+
a_user, a_sys = avg(a, "user_cpu_s"), avg(a, "sys_cpu_s")
111+
b_cpu = b_user + b_sys if None not in (b_user, b_sys) else None
112+
a_cpu = a_user + a_sys if None not in (a_user, a_sys) else None
113+
delta, pct = fmt_delta(b_cpu, a_cpu)
114+
row(name,
115+
f"{b_cpu:.2f}" if b_cpu is not None else "-",
116+
f"{a_cpu:.2f}" if a_cpu is not None else "-",
117+
delta, pct, speedup(b_cpu, a_cpu))
118+
119+
# Wall time
120+
print()
121+
print("Wall time (seconds):")
122+
for name in before_map:
123+
if name not in after_map:
124+
continue
125+
b, a = before_map[name], after_map[name]
126+
b_wall, a_wall = avg(b, "wall_s"), avg(a, "wall_s")
127+
delta, pct = fmt_delta(b_wall, a_wall)
128+
row(name,
129+
f"{b_wall:.2f}" if b_wall is not None else "-",
130+
f"{a_wall:.2f}" if a_wall is not None else "-",
131+
delta, pct, speedup(b_wall, a_wall))
132+
133+
# dlsym/dlopen/dlclose (if available)
134+
if any("dlsym" in s for s in before["scenarios"]):
135+
for dl_key, dl_label in [("dlsym", "dlsym calls"), ("dlopen", "dlopen calls"), ("dlclose", "dlclose calls")]:
136+
print()
137+
print(f"{dl_label}:")
138+
for name in before_map:
139+
if name not in after_map:
140+
continue
141+
b_val = before_map[name].get(dl_key)
142+
a_val = after_map[name].get(dl_key)
143+
if b_val is None and a_val is None:
144+
continue
145+
if isinstance(b_val, dict):
146+
b_val = b_val.get("avg")
147+
if isinstance(a_val, dict):
148+
a_val = a_val.get("avg")
149+
if b_val is not None:
150+
b_val = int(b_val)
151+
if a_val is not None:
152+
a_val = int(a_val)
153+
delta, pct = fmt_delta_int(b_val, a_val)
154+
row(name,
155+
str(b_val) if b_val is not None else "-",
156+
str(a_val) if a_val is not None else "-",
157+
delta, pct, speedup(b_val, a_val))
158+
159+
print()
160+
print("-" * 90)
161+
PYEOF

0 commit comments

Comments
 (0)