feat: measure CPU load during inference to surface MLX vs GGUF difference

TheBlueHouse75 · claude · TheBlueHouse75 · commit 1011b72a3399 · 2026-03-03T15:43:26.000+01:00
Two models can show identical tok/s yet feel very different: MLX offloads
to GPU Metal leaving the CPU free, while GGUF via llama.cpp saturates it.
This adds CPU load sampling (via systeminformation currentLoad) after each
performance prompt, computes avg/peak, and displays the metric in the CLI
table with a color-coded indicator. A warning fires when avg CPU &gt; 90%.

No scoring change — CPU load is a comfort indicator, not a performance one.

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/src/benchmarks/performance.ts b/src/benchmarks/performance.ts
@@ -1,5 +1,5 @@
 import { abortOngoingRequests, generateStream, listModels, listRunningModels, getRuntimeName } from "../core/runtime.js";
-import { getMemoryUsage, detectThermalPressure, detectBatteryPowered, getSwapUsedGB } from "../core/hardware.js";
+import { getMemoryUsage, detectThermalPressure, detectBatteryPowered, getSwapUsedGB, getCpuLoad } from "../core/hardware.js";
 import type { PerformanceMetrics, BenchEnvironment } from "../types.js";
 import { avg, stddev, withTimeout, hasThinkingContent, estimateTokenCount } from "../utils.js";
 import { createSpinner, subStep } from "../ui/progress.js";
@@ -115,6 +115,7 @@ export async function runPerformanceBench(
     let failedPrompts = 0;
     let thinkingDetected = false;
     let totalThinkingTokens = 0;
+    const cpuLoadSamples: number[] = [];
 
     for (let i = 0; i < BENCH_PROMPTS.length; i++) {
       spinner.start(`Running performance test ${i + 1}/${BENCH_PROMPTS.length}...`);
@@ -180,6 +181,10 @@ export async function runPerformanceBench(
         subStep(
           `  Prompt ${i + 1}: ${tps.toFixed(1)} tok/s, first chunk ${firstChunkTime ?? "?"}ms, TTFT ${firstTokenTime ?? "?"}ms`
         );
+
+        // Sample CPU load after each prompt
+        const cpuSample = await optionalProbe(() => getCpuLoad(), -1);
+        if (cpuSample >= 0) cpuLoadSamples.push(cpuSample);
       } catch (err) {
         failedPrompts++;
         const message = err instanceof Error ? err.message : String(err);
@@ -232,11 +237,21 @@ export async function runPerformanceBench(
       swapBeforeResult.available && swapAfterResult.available
         ? +(swapAfterResult.value - swapBeforeResult.value).toFixed(2)
         : undefined;
+    // CPU load metrics from prompt samples
+    const cpuAvgLoad = cpuLoadSamples.length > 0
+      ? +(cpuLoadSamples.reduce((a, b) => a + b, 0) / cpuLoadSamples.length).toFixed(1)
+      : undefined;
+    const cpuPeakLoad = cpuLoadSamples.length > 0
+      ? +Math.max(...cpuLoadSamples).toFixed(1)
+      : undefined;
+
     const benchEnvironment: BenchEnvironment = {
       thermalPressureBefore: thermalBefore,
       thermalPressureAfter: thermalAfter,
       ...(swapDeltaGB !== undefined && swapDeltaGB > 0 ? { swapDeltaGB } : {}),
       ...(batteryPowered != null ? { batteryPowered } : {}),
+      ...(cpuAvgLoad !== undefined ? { cpuAvgLoad } : {}),
+      ...(cpuPeakLoad !== undefined ? { cpuPeakLoad } : {}),
     };
 
     return {
diff --git a/src/commands/bench.ts b/src/commands/bench.ts
@@ -213,7 +213,7 @@ export async function benchCommand(options: BenchOptions): Promise<BenchOutcome>
         });
         const perf = perfResult.metrics;
         const benchEnvironment: BenchEnvironment | undefined = perfResult.benchEnvironment;
-        if (!silent) printPerformanceTable(perf);
+        if (!silent) printPerformanceTable(perf, benchEnvironment);
 
         // Quality benchmarks (unless --perf-only)
         let quality: QualityMetrics | null = null;
diff --git a/src/core/hardware.ts b/src/core/hardware.ts
@@ -189,6 +189,15 @@ export async function getHardwareInfo(): Promise<HardwareInfo> {
   };
 }
 
+export async function getCpuLoad(): Promise<number> {
+  try {
+    const load = await si.currentLoad();
+    return +load.currentLoad.toFixed(1);
+  } catch {
+    return -1;
+  }
+}
+
 export async function getMemoryUsage(): Promise<{
   usedGB: number;
   totalGB: number;
diff --git a/src/scoring/fitness.ts b/src/scoring/fitness.ts
@@ -183,6 +183,12 @@ export function computeFitness(
     );
   }
 
+  if (benchEnv?.cpuAvgLoad != null && benchEnv.cpuAvgLoad > 90) {
+    warnings.push(
+      `High CPU load during inference (avg ${benchEnv.cpuAvgLoad.toFixed(0)}%). System may feel unresponsive — GPU-accelerated runtimes (MLX) can reduce CPU pressure.`
+    );
+  }
+
   return {
     verdict,
     globalScore,
diff --git a/src/types.ts b/src/types.ts
@@ -204,6 +204,8 @@ export interface BenchEnvironment {
   thermalPressureAfter?: "nominal" | "moderate" | "heavy" | "critical" | "unknown";
   swapDeltaGB?: number;
   batteryPowered?: boolean;
+  cpuAvgLoad?: number;
+  cpuPeakLoad?: number;
 }
 
 // ── Bench result ─────────────────────────────────────────
diff --git a/src/ui/results-table.ts b/src/ui/results-table.ts
@@ -1,6 +1,7 @@
 import Table from "cli-table3";
 import chalk from "chalk";
 import type {
+  BenchEnvironment,
   BenchResult,
   HardwareInfo,
   PerformanceMetrics,
@@ -102,7 +103,7 @@ export function printHardwareTable(hw: HardwareInfo): void {
   console.log(table.toString());
 }
 
-export function printPerformanceTable(perf: PerformanceMetrics): void {
+export function printPerformanceTable(perf: PerformanceMetrics, benchEnvironment?: BenchEnvironment): void {
   const table = new Table({
     head: [chalk.bold("Metric"), chalk.bold("Value")],
     style: { head: [], border: [] },
@@ -168,6 +169,17 @@ export function printPerformanceTable(perf: PerformanceMetrics): void {
     ]);
   }
 
+  if (benchEnvironment?.cpuAvgLoad != null && benchEnvironment.cpuAvgLoad >= 0) {
+    const cpuColor = benchEnvironment.cpuAvgLoad < 50 ? chalk.green
+      : benchEnvironment.cpuAvgLoad < 80 ? chalk.yellow
+      : chalk.red;
+    table.push([
+      "CPU Load During Bench",
+      cpuColor(`avg ${benchEnvironment.cpuAvgLoad.toFixed(0)}%` +
+        (benchEnvironment.cpuPeakLoad != null ? ` (peak ${benchEnvironment.cpuPeakLoad.toFixed(0)}%)` : "")),
+    ]);
+  }
+
   console.log(table.toString());
 }
 
diff --git a/tests/hardware.test.ts b/tests/hardware.test.ts
@@ -51,6 +51,7 @@ vi.mock("systeminformation", () => ({
     })),
     memLayout: vi.fn(async () => [{ type: "DDR5" }]),
     cpuCurrentSpeed: vi.fn(async () => ({ avg: 3.0 })),
+    currentLoad: vi.fn(async () => ({ currentLoad: 42.5 })),
     battery: vi.fn(async () => ({
       hasBattery: true,
       acConnected: true,
@@ -264,6 +265,27 @@ describe("detectThermalPressure", () => {
   });
 });
 
+describe("getCpuLoad", () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  it("returns CPU load percentage from systeminformation", async () => {
+    const { getCpuLoad } = await import("../src/core/hardware.js");
+    const load = await getCpuLoad();
+    expect(load).toBe(42.5);
+  });
+
+  it("returns -1 when currentLoad throws", async () => {
+    const si = await import("systeminformation");
+    vi.mocked(si.default.currentLoad).mockRejectedValueOnce(new Error("probe failed"));
+
+    const { getCpuLoad } = await import("../src/core/hardware.js");
+    const load = await getCpuLoad();
+    expect(load).toBe(-1);
+  });
+});
+
 describe("detectBatteryPowered", () => {
   const originalPlatform = process.platform;
 
diff --git a/tests/performance-bench.test.ts b/tests/performance-bench.test.ts
@@ -53,6 +53,7 @@ vi.mock("../src/core/hardware.js", () => ({
   detectThermalPressure: vi.fn(async () => "nominal"),
   detectBatteryPowered: vi.fn(async () => undefined),
   getSwapUsedGB: vi.fn(async () => 0),
+  getCpuLoad: vi.fn(async () => 45.0),
 }));
 
 vi.mock("../src/core/lm-studio-client.js", () => ({
@@ -258,6 +259,32 @@ describe("runPerformanceBench", () => {
     expect(result.benchEnvironment?.swapDeltaGB).toBeUndefined();
   });
 
+  it("reports cpuAvgLoad and cpuPeakLoad in benchEnvironment", async () => {
+    generatePlan = ["ok", "ok", "ok", "ok", "ok", "ok"];
+
+    const result = await runPerformanceBench("test-model", {
+      failOnPromptError: false,
+      minSuccessfulPrompts: 3,
+    });
+
+    expect(result.benchEnvironment?.cpuAvgLoad).toBe(45.0);
+    expect(result.benchEnvironment?.cpuPeakLoad).toBe(45.0);
+  });
+
+  it("omits cpuAvgLoad when all CPU probes fail", async () => {
+    generatePlan = ["ok", "ok", "ok", "ok", "ok", "ok"];
+    const getCpuLoadMock = vi.mocked(hardware.getCpuLoad);
+    getCpuLoadMock.mockResolvedValue(-1);
+
+    const result = await runPerformanceBench("test-model", {
+      failOnPromptError: false,
+      minSuccessfulPrompts: 3,
+    });
+
+    expect(result.benchEnvironment?.cpuAvgLoad).toBeUndefined();
+    expect(result.benchEnvironment?.cpuPeakLoad).toBeUndefined();
+  });
+
   it("does not report swap delta when pre-bench swap probe fails but post-bench probe succeeds", async () => {
     generatePlan = ["ok", "ok", "ok", "ok", "ok", "ok"];
     const getSwapUsedGBMock = vi.mocked(hardware.getSwapUsedGB);
diff --git a/tests/scoring.test.ts b/tests/scoring.test.ts
@@ -564,6 +564,28 @@ describe("computeFitness", () => {
     const fitness = computeFitness(perf, null, undefined, benchEnv);
     expect(fitness.warnings.some((w) => w.includes("battery power"))).toBe(false);
   });
+
+  it("adds high CPU load warning when cpuAvgLoad > 90%", () => {
+    const perf = makePerf({ tokensPerSecond: 80, ttft: 300, memoryPercent: 20 });
+    const benchEnv: BenchEnvironment = { cpuAvgLoad: 95.2 };
+    const fitness = computeFitness(perf, null, undefined, benchEnv);
+    expect(fitness.warnings.some((w) => w.includes("High CPU load"))).toBe(true);
+    expect(fitness.warnings.some((w) => w.includes("95%"))).toBe(true);
+  });
+
+  it("does not add CPU load warning when cpuAvgLoad <= 90%", () => {
+    const perf = makePerf({ tokensPerSecond: 80, ttft: 300, memoryPercent: 20 });
+    const benchEnv: BenchEnvironment = { cpuAvgLoad: 85.0 };
+    const fitness = computeFitness(perf, null, undefined, benchEnv);
+    expect(fitness.warnings.some((w) => w.includes("High CPU load"))).toBe(false);
+  });
+
+  it("does not add CPU load warning when cpuAvgLoad is undefined", () => {
+    const perf = makePerf({ tokensPerSecond: 80, ttft: 300, memoryPercent: 20 });
+    const benchEnv: BenchEnvironment = {};
+    const fitness = computeFitness(perf, null, undefined, benchEnv);
+    expect(fitness.warnings.some((w) => w.includes("High CPU load"))).toBe(false);
+  });
 });
 
 // ── Time penalty tests ─────────────────────────────────────

Original file line number	Diff line number	Diff line change
`@@ -204,6 +204,8 @@ export interface BenchEnvironment {`
`204`	`204`	`thermalPressureAfter?: "nominal" \| "moderate" \| "heavy" \| "critical" \| "unknown";`
`205`	`205`	`swapDeltaGB?: number;`
`206`	`206`	`batteryPowered?: boolean;`
	`207`	`+ cpuAvgLoad?: number;`
	`208`	`+ cpuPeakLoad?: number;`
`207`	`209`	`}`
`208`	`210`
`209`	`211`	`// ── Bench result ─────────────────────────────────────────`