Skip to content

Commit 1011b72

Browse files
feat: measure CPU load during inference to surface MLX vs GGUF difference
Two models can show identical tok/s yet feel very different: MLX offloads to GPU Metal leaving the CPU free, while GGUF via llama.cpp saturates it. This adds CPU load sampling (via systeminformation currentLoad) after each performance prompt, computes avg/peak, and displays the metric in the CLI table with a color-coded indicator. A warning fires when avg CPU > 90%. No scoring change — CPU load is a comfort indicator, not a performance one. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 7c9bd83 commit 1011b72

9 files changed

Lines changed: 118 additions & 3 deletions

File tree

src/benchmarks/performance.ts

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { abortOngoingRequests, generateStream, listModels, listRunningModels, getRuntimeName } from "../core/runtime.js";
2-
import { getMemoryUsage, detectThermalPressure, detectBatteryPowered, getSwapUsedGB } from "../core/hardware.js";
2+
import { getMemoryUsage, detectThermalPressure, detectBatteryPowered, getSwapUsedGB, getCpuLoad } from "../core/hardware.js";
33
import type { PerformanceMetrics, BenchEnvironment } from "../types.js";
44
import { avg, stddev, withTimeout, hasThinkingContent, estimateTokenCount } from "../utils.js";
55
import { createSpinner, subStep } from "../ui/progress.js";
@@ -115,6 +115,7 @@ export async function runPerformanceBench(
115115
let failedPrompts = 0;
116116
let thinkingDetected = false;
117117
let totalThinkingTokens = 0;
118+
const cpuLoadSamples: number[] = [];
118119

119120
for (let i = 0; i < BENCH_PROMPTS.length; i++) {
120121
spinner.start(`Running performance test ${i + 1}/${BENCH_PROMPTS.length}...`);
@@ -180,6 +181,10 @@ export async function runPerformanceBench(
180181
subStep(
181182
` Prompt ${i + 1}: ${tps.toFixed(1)} tok/s, first chunk ${firstChunkTime ?? "?"}ms, TTFT ${firstTokenTime ?? "?"}ms`
182183
);
184+
185+
// Sample CPU load after each prompt
186+
const cpuSample = await optionalProbe(() => getCpuLoad(), -1);
187+
if (cpuSample >= 0) cpuLoadSamples.push(cpuSample);
183188
} catch (err) {
184189
failedPrompts++;
185190
const message = err instanceof Error ? err.message : String(err);
@@ -232,11 +237,21 @@ export async function runPerformanceBench(
232237
swapBeforeResult.available && swapAfterResult.available
233238
? +(swapAfterResult.value - swapBeforeResult.value).toFixed(2)
234239
: undefined;
240+
// CPU load metrics from prompt samples
241+
const cpuAvgLoad = cpuLoadSamples.length > 0
242+
? +(cpuLoadSamples.reduce((a, b) => a + b, 0) / cpuLoadSamples.length).toFixed(1)
243+
: undefined;
244+
const cpuPeakLoad = cpuLoadSamples.length > 0
245+
? +Math.max(...cpuLoadSamples).toFixed(1)
246+
: undefined;
247+
235248
const benchEnvironment: BenchEnvironment = {
236249
thermalPressureBefore: thermalBefore,
237250
thermalPressureAfter: thermalAfter,
238251
...(swapDeltaGB !== undefined && swapDeltaGB > 0 ? { swapDeltaGB } : {}),
239252
...(batteryPowered != null ? { batteryPowered } : {}),
253+
...(cpuAvgLoad !== undefined ? { cpuAvgLoad } : {}),
254+
...(cpuPeakLoad !== undefined ? { cpuPeakLoad } : {}),
240255
};
241256

242257
return {

src/commands/bench.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ export async function benchCommand(options: BenchOptions): Promise<BenchOutcome>
213213
});
214214
const perf = perfResult.metrics;
215215
const benchEnvironment: BenchEnvironment | undefined = perfResult.benchEnvironment;
216-
if (!silent) printPerformanceTable(perf);
216+
if (!silent) printPerformanceTable(perf, benchEnvironment);
217217

218218
// Quality benchmarks (unless --perf-only)
219219
let quality: QualityMetrics | null = null;

src/core/hardware.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,15 @@ export async function getHardwareInfo(): Promise<HardwareInfo> {
189189
};
190190
}
191191

192+
export async function getCpuLoad(): Promise<number> {
193+
try {
194+
const load = await si.currentLoad();
195+
return +load.currentLoad.toFixed(1);
196+
} catch {
197+
return -1;
198+
}
199+
}
200+
192201
export async function getMemoryUsage(): Promise<{
193202
usedGB: number;
194203
totalGB: number;

src/scoring/fitness.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,12 @@ export function computeFitness(
183183
);
184184
}
185185

186+
if (benchEnv?.cpuAvgLoad != null && benchEnv.cpuAvgLoad > 90) {
187+
warnings.push(
188+
`High CPU load during inference (avg ${benchEnv.cpuAvgLoad.toFixed(0)}%). System may feel unresponsive — GPU-accelerated runtimes (MLX) can reduce CPU pressure.`
189+
);
190+
}
191+
186192
return {
187193
verdict,
188194
globalScore,

src/types.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,8 @@ export interface BenchEnvironment {
204204
thermalPressureAfter?: "nominal" | "moderate" | "heavy" | "critical" | "unknown";
205205
swapDeltaGB?: number;
206206
batteryPowered?: boolean;
207+
cpuAvgLoad?: number;
208+
cpuPeakLoad?: number;
207209
}
208210

209211
// ── Bench result ─────────────────────────────────────────

src/ui/results-table.ts

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import Table from "cli-table3";
22
import chalk from "chalk";
33
import type {
4+
BenchEnvironment,
45
BenchResult,
56
HardwareInfo,
67
PerformanceMetrics,
@@ -102,7 +103,7 @@ export function printHardwareTable(hw: HardwareInfo): void {
102103
console.log(table.toString());
103104
}
104105

105-
export function printPerformanceTable(perf: PerformanceMetrics): void {
106+
export function printPerformanceTable(perf: PerformanceMetrics, benchEnvironment?: BenchEnvironment): void {
106107
const table = new Table({
107108
head: [chalk.bold("Metric"), chalk.bold("Value")],
108109
style: { head: [], border: [] },
@@ -168,6 +169,17 @@ export function printPerformanceTable(perf: PerformanceMetrics): void {
168169
]);
169170
}
170171

172+
if (benchEnvironment?.cpuAvgLoad != null && benchEnvironment.cpuAvgLoad >= 0) {
173+
const cpuColor = benchEnvironment.cpuAvgLoad < 50 ? chalk.green
174+
: benchEnvironment.cpuAvgLoad < 80 ? chalk.yellow
175+
: chalk.red;
176+
table.push([
177+
"CPU Load During Bench",
178+
cpuColor(`avg ${benchEnvironment.cpuAvgLoad.toFixed(0)}%` +
179+
(benchEnvironment.cpuPeakLoad != null ? ` (peak ${benchEnvironment.cpuPeakLoad.toFixed(0)}%)` : "")),
180+
]);
181+
}
182+
171183
console.log(table.toString());
172184
}
173185

tests/hardware.test.ts

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ vi.mock("systeminformation", () => ({
5151
})),
5252
memLayout: vi.fn(async () => [{ type: "DDR5" }]),
5353
cpuCurrentSpeed: vi.fn(async () => ({ avg: 3.0 })),
54+
currentLoad: vi.fn(async () => ({ currentLoad: 42.5 })),
5455
battery: vi.fn(async () => ({
5556
hasBattery: true,
5657
acConnected: true,
@@ -264,6 +265,27 @@ describe("detectThermalPressure", () => {
264265
});
265266
});
266267

268+
describe("getCpuLoad", () => {
269+
beforeEach(() => {
270+
vi.clearAllMocks();
271+
});
272+
273+
it("returns CPU load percentage from systeminformation", async () => {
274+
const { getCpuLoad } = await import("../src/core/hardware.js");
275+
const load = await getCpuLoad();
276+
expect(load).toBe(42.5);
277+
});
278+
279+
it("returns -1 when currentLoad throws", async () => {
280+
const si = await import("systeminformation");
281+
vi.mocked(si.default.currentLoad).mockRejectedValueOnce(new Error("probe failed"));
282+
283+
const { getCpuLoad } = await import("../src/core/hardware.js");
284+
const load = await getCpuLoad();
285+
expect(load).toBe(-1);
286+
});
287+
});
288+
267289
describe("detectBatteryPowered", () => {
268290
const originalPlatform = process.platform;
269291

tests/performance-bench.test.ts

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ vi.mock("../src/core/hardware.js", () => ({
5353
detectThermalPressure: vi.fn(async () => "nominal"),
5454
detectBatteryPowered: vi.fn(async () => undefined),
5555
getSwapUsedGB: vi.fn(async () => 0),
56+
getCpuLoad: vi.fn(async () => 45.0),
5657
}));
5758

5859
vi.mock("../src/core/lm-studio-client.js", () => ({
@@ -258,6 +259,32 @@ describe("runPerformanceBench", () => {
258259
expect(result.benchEnvironment?.swapDeltaGB).toBeUndefined();
259260
});
260261

262+
it("reports cpuAvgLoad and cpuPeakLoad in benchEnvironment", async () => {
263+
generatePlan = ["ok", "ok", "ok", "ok", "ok", "ok"];
264+
265+
const result = await runPerformanceBench("test-model", {
266+
failOnPromptError: false,
267+
minSuccessfulPrompts: 3,
268+
});
269+
270+
expect(result.benchEnvironment?.cpuAvgLoad).toBe(45.0);
271+
expect(result.benchEnvironment?.cpuPeakLoad).toBe(45.0);
272+
});
273+
274+
it("omits cpuAvgLoad when all CPU probes fail", async () => {
275+
generatePlan = ["ok", "ok", "ok", "ok", "ok", "ok"];
276+
const getCpuLoadMock = vi.mocked(hardware.getCpuLoad);
277+
getCpuLoadMock.mockResolvedValue(-1);
278+
279+
const result = await runPerformanceBench("test-model", {
280+
failOnPromptError: false,
281+
minSuccessfulPrompts: 3,
282+
});
283+
284+
expect(result.benchEnvironment?.cpuAvgLoad).toBeUndefined();
285+
expect(result.benchEnvironment?.cpuPeakLoad).toBeUndefined();
286+
});
287+
261288
it("does not report swap delta when pre-bench swap probe fails but post-bench probe succeeds", async () => {
262289
generatePlan = ["ok", "ok", "ok", "ok", "ok", "ok"];
263290
const getSwapUsedGBMock = vi.mocked(hardware.getSwapUsedGB);

tests/scoring.test.ts

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -564,6 +564,28 @@ describe("computeFitness", () => {
564564
const fitness = computeFitness(perf, null, undefined, benchEnv);
565565
expect(fitness.warnings.some((w) => w.includes("battery power"))).toBe(false);
566566
});
567+
568+
it("adds high CPU load warning when cpuAvgLoad > 90%", () => {
569+
const perf = makePerf({ tokensPerSecond: 80, ttft: 300, memoryPercent: 20 });
570+
const benchEnv: BenchEnvironment = { cpuAvgLoad: 95.2 };
571+
const fitness = computeFitness(perf, null, undefined, benchEnv);
572+
expect(fitness.warnings.some((w) => w.includes("High CPU load"))).toBe(true);
573+
expect(fitness.warnings.some((w) => w.includes("95%"))).toBe(true);
574+
});
575+
576+
it("does not add CPU load warning when cpuAvgLoad <= 90%", () => {
577+
const perf = makePerf({ tokensPerSecond: 80, ttft: 300, memoryPercent: 20 });
578+
const benchEnv: BenchEnvironment = { cpuAvgLoad: 85.0 };
579+
const fitness = computeFitness(perf, null, undefined, benchEnv);
580+
expect(fitness.warnings.some((w) => w.includes("High CPU load"))).toBe(false);
581+
});
582+
583+
it("does not add CPU load warning when cpuAvgLoad is undefined", () => {
584+
const perf = makePerf({ tokensPerSecond: 80, ttft: 300, memoryPercent: 20 });
585+
const benchEnv: BenchEnvironment = {};
586+
const fitness = computeFitness(perf, null, undefined, benchEnv);
587+
expect(fitness.warnings.some((w) => w.includes("High CPU load"))).toBe(false);
588+
});
567589
});
568590

569591
// ── Time penalty tests ─────────────────────────────────────

0 commit comments

Comments
 (0)