Skip to content

Commit 7c90fde

Browse files
authored
sai-cli diagnose: plot CPU usage if available (#75)
* sai-cli diagnose: plot CPU usage if available * Clean up * Fix typo
1 parent 836fc0a commit 7c90fde

File tree

3 files changed

+76
-12
lines changed

3 files changed

+76
-12
lines changed

python/cli/diagnose/diagnose.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def generateReport(args):
4343
'magnetometer': {"v": [], "t": [], "td": []},
4444
'barometer': {"v": [], "t": [], "td": []},
4545
'gnss': {"v": [], "t": [], "td": []},
46-
'cpu': {"v": [], "t": []},
46+
'cpu': {"v": [], "t": [], "td": [], "processes": {}},
4747
'cameras': {}
4848
}
4949

@@ -120,8 +120,19 @@ def addMeasurement(type, t, v):
120120
if "features" in f: cameras[ind]["features"].append(len(f["features"]))
121121
cameras[ind]["t"].append(t)
122122
elif metrics is not None and 'cpu' in metrics:
123-
data["cpu"]["t"].append(t)
124-
data["cpu"]["v"].append(metrics['cpu'].get('systemTotalUsagePercent', 0))
123+
addMeasurement("cpu", t, metrics['cpu'].get('systemTotalUsagePercent', 0))
124+
usedProcessNames = {} # Track duplicate process names
125+
for process in metrics['cpu'].get('processes', []):
126+
name = process.get('name')
127+
if not name: continue
128+
129+
count = usedProcessNames.get(name, 0)
130+
usedProcessNames[name] = count + 1
131+
uniqueName = f"{name} {count + 1}" if count else name
132+
133+
processData = data['cpu']["processes"].setdefault(uniqueName, {"v": [], "t": []})
134+
processData['v'].append(process['usagePercent'])
135+
processData['t'].append(t)
125136

126137
if nSkipped > 0: print(f'Skipped {nSkipped} lines')
127138

@@ -131,7 +142,7 @@ def addMeasurement(type, t, v):
131142
diagnoseMagnetometer(data, output)
132143
diagnoseBarometer(data, output)
133144
diagnoseGNSS(data, output)
134-
diagnoseCpu(data, output)
145+
diagnoseCPU(data, output)
135146

136147
if os.path.dirname(args.output_html):
137148
os.makedirs(os.path.dirname(args.output_html), exist_ok=True)

python/cli/diagnose/html.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ def generateHtml(output, outputHtml):
166166
camera["frequency"],
167167
camera["count"])))
168168

169-
SENSOR_NAMES = ["accelerometer", "gyroscope", "magnetometer", "barometer", "GNSS"]
169+
SENSOR_NAMES = ["accelerometer", "gyroscope", "magnetometer", "barometer", "GNSS", "CPU"]
170170
for sensor in SENSOR_NAMES:
171171
if sensor not in output: continue
172172
kvPairs.append((

python/cli/diagnose/sensors.py

Lines changed: 60 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,7 @@ def highpass(signal, fs, cutoff, order=3):
294294
plt.ylabel(measurementUnit)
295295

296296
fig.suptitle(f"Preview of {sensorName} signal noise (mean={noiseScale:.1f}{measurementUnit}, threshold={noiseThreshold:.1f}{measurementUnit})")
297-
fig.tight_layout(rect=[0, 0.03, 1, 0.95])
297+
fig.tight_layout()
298298
self.images.append(base64(fig))
299299

300300
if noiseScale > noiseThreshold:
@@ -365,6 +365,50 @@ def analyzeAccelerometerSignalHasGravity(self, signal):
365365
"This suggests the signal may be missing gravitational acceleration."
366366
)
367367

368+
def analyzeCpuUsage(self, signal, timestamps, processes):
369+
CPU_USAGE_THRESHOLD = 90.0
370+
371+
mean = np.mean(signal)
372+
p95 = np.percentile(signal, 95)
373+
p99 = np.percentile(signal, 99)
374+
375+
if mean > CPU_USAGE_THRESHOLD:
376+
self.__addIssue(DiagnosisLevel.WARNING,
377+
f"Average CPU usage {mean:.1f}% is above the threshold ({CPU_USAGE_THRESHOLD:.1f}%)."
378+
)
379+
elif p95 > CPU_USAGE_THRESHOLD:
380+
self.__addIssue(DiagnosisLevel.WARNING,
381+
f"95th percentile CPU usage {p95:.1f}% is above the threshold ({CPU_USAGE_THRESHOLD:.1f}%)."
382+
)
383+
elif p99 > CPU_USAGE_THRESHOLD:
384+
self.__addIssue(DiagnosisLevel.WARNING,
385+
f"99th percentile CPU usage {p99:.1f}% is above the threshold ({CPU_USAGE_THRESHOLD:.1f}%)."
386+
)
387+
388+
import matplotlib.pyplot as plt
389+
fig, ax = plt.subplots(figsize=(8, 6))
390+
391+
ax.plot(timestamps, signal, label="System total", linestyle='-')
392+
ax.set_title("CPU usage")
393+
ax.set_ylabel("CPU usage (%)")
394+
ax.set_xlabel("Time (s)")
395+
396+
legend = ['System total']
397+
ylim = 100
398+
for name, data in processes.items():
399+
if len(data['v']) == 0: continue
400+
ax.plot(data['t'], data['v'], label=name, linestyle='--')
401+
legend.append(name)
402+
ylim = max(ylim, np.max(data['v']) * 1.1)
403+
404+
ax.set_ylim(0, ylim)
405+
406+
leg = ax.legend(legend, fontsize='large', markerscale=10)
407+
for line in leg.get_lines(): line.set_linewidth(2)
408+
409+
fig.tight_layout()
410+
self.images.append(base64(fig))
411+
368412
def serializeIssues(self):
369413
self.issues = sorted(self.issues, key=lambda x: x[0], reverse=True)
370414
return [{
@@ -705,13 +749,22 @@ def diagnoseGNSS(data, output):
705749
if status.diagnosis == DiagnosisLevel.ERROR:
706750
output["passed"] = False
707751

708-
def diagnoseCpu(data, output):
709-
data = data["cpu"]
710-
timestamps = np.array(data["t"])
711-
values = data["v"]
752+
def diagnoseCPU(data, output):
753+
sensor = data["cpu"]
754+
timestamps = np.array(sensor["t"])
755+
deltaTimes = np.array(sensor["td"])
756+
signal = np.array(sensor['v'])
757+
processes = sensor["processes"]
712758

713759
if len(timestamps) == 0: return
714760

715-
output["cpu"] = {
716-
"image": plotFrame(timestamps, values, "CPU system load (%)", ymin=0, ymax=100)
761+
status = Status()
762+
status.analyzeCpuUsage(signal, timestamps, processes)
763+
764+
output["CPU"] = {
765+
"diagnosis": status.diagnosis.toString(),
766+
"issues": status.serializeIssues(),
767+
"frequency": computeSamplingRate(deltaTimes),
768+
"count": len(timestamps),
769+
"images": status.images
717770
}

0 commit comments

Comments
 (0)