diff --git a/.fallowrc.json b/.fallowrc.json
index a1206627f..ce94eba6e 100644
--- a/.fallowrc.json
+++ b/.fallowrc.json
@@ -25,6 +25,7 @@
   ],
   "ignorePatterns": [
     "examples/test-app/**",
+    "scripts/perf/**",
     "ios-runner/AgentDeviceRunner/AgentDeviceRunnerUITests.xctestplan",
     "scripts/write-xcuitest-cache-metadata.mjs"
   ],
diff --git a/.github/workflows/perf-nightly.yml b/.github/workflows/perf-nightly.yml
new file mode 100644
index 000000000..d317d33be
--- /dev/null
+++ b/.github/workflows/perf-nightly.yml
@@ -0,0 +1,127 @@
+name: Perf Nightly
+
+# End-to-end command perf benchmark (scripts/perf). Scheduled + manual only — perf timing on
+# shared CI runners is noisy, so treat this as a trend/regression signal, not absolute numbers.
+# Reuses the same build artifacts as the device suites: the cached iOS XCUITest runner
+# (setup-apple-replay, ios-runner-prebuilt cache) and the Android replay host, and runs the CLI
+# from source via --experimental-strip-types (no dist build), matching the replay workflows.
+
+on:
+  schedule:
+    - cron: "0 4 * * *"
+  workflow_dispatch:
+    inputs:
+      rounds:
+        description: "Measured rounds per command (samples)"
+        required: false
+        default: "5"
+
+permissions:
+  contents: read
+
+concurrency:
+  group: ci-${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+env:
+  AGENT_DEVICE_PERF_CLI: "--experimental-strip-types src/bin.ts"
+  PERF_ROUNDS: ${{ github.event.inputs.rounds || '5' }}
+
+jobs:
+  perf-ios:
+    name: iOS Command Perf
+    runs-on: macos-26
+    timeout-minutes: 80
+    env:
+      IOS_RUNTIME_VERSION: "26.2"
+      AGENT_DEVICE_IOS_RUNNER_DERIVED_PATH: ${{ github.workspace }}/.tmp/ios-runner-derived
+    steps:
+      - name: Checkout
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+
+      - name: Setup toolchain
+        uses: ./.github/actions/setup-node-pnpm
+
+      - name: Setup Apple replay
+        id: apple-replay
+        uses: ./.github/actions/setup-apple-replay
+        with:
+          derived-path: ${{ env.AGENT_DEVICE_IOS_RUNNER_DERIVED_PATH }}
+          cache-key-prefix: ios-runner-prebuilt
+          cache-key-suffix: -ios-${{ env.IOS_RUNTIME_VERSION }}
+          build-command: sh ./scripts/build-xcuitest-apple.sh
+          xcuitest-platform: ios
+          xcuitest-destination: generic/platform=iOS Simulator
+          clean-derived: "1"
+
+      - name: Boot iOS test simulator
+        uses: ./.github/actions/boot-ios-test-simulator
+        with:
+          runtime-version: ${{ env.IOS_RUNTIME_VERSION }}
+          preferred-device-name: iPhone 17 Pro
+
+      - name: Run iOS command perf benchmark
+        run: |
+          pnpm clean:daemon
+          node --experimental-strip-types scripts/perf/run.ts \
+            --platform ios \
+            --device "iPhone 17 Pro" \
+            --n "$PERF_ROUNDS" --warmup 1 \
+            --out-dir "$GITHUB_WORKSPACE/perf-results"
+
+      - name: Upload iOS perf report
+        if: always()
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+        with:
+          name: perf-ios
+          path: perf-results/
+          if-no-files-found: warn
+
+  perf-android:
+    name: Android Command Perf
+    runs-on: ubuntu-latest
+    timeout-minutes: 80
+    steps:
+      - name: Checkout
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+
+      - name: Setup toolchain
+        uses: ./.github/actions/setup-node-pnpm
+
+      - name: Setup Android replay host
+        id: android-replay-host
+        uses: ./.github/actions/setup-android-replay-host
+
+      - name: Package npm-bundled Android helpers
+        run: |
+          pnpm package:android-snapshot-helper:npm
+          pnpm package:android-multitouch-helper:npm
+
+      - name: Run Android command perf benchmark
+        uses: reactivecircus/android-emulator-runner@b530d96654c385303d652368551fb075bc2f0b6b # v2.35.0
+        with:
+          api-level: 36
+          arch: x86_64
+          profile: pixel_7
+          target: google_apis_playstore
+          emulator-options: -no-window -gpu swiftshader_indirect -no-snapshot -noaudio -no-boot-anim -no-metrics
+          script: |
+            set -e
+            # Disable animations up front so accessibility dumps don't time out (the harness
+            # also runs `settings animations off`, this is belt-and-suspenders).
+            adb -s emulator-5554 shell settings put global window_animation_scale 0 || true
+            adb -s emulator-5554 shell settings put global transition_animation_scale 0 || true
+            adb -s emulator-5554 shell settings put global animator_duration_scale 0 || true
+            node --experimental-strip-types scripts/perf/run.ts \
+              --platform android \
+              --serial emulator-5554 \
+              --n "$PERF_ROUNDS" --warmup 1 \
+              --out-dir "$GITHUB_WORKSPACE/perf-results"
+
+      - name: Upload Android perf report
+        if: always()
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+        with:
+          name: perf-android
+          path: perf-results/
+          if-no-files-found: warn
diff --git a/.gitignore b/.gitignore
index 1dc2da634..0f683df1c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 node_modules/
+scripts/perf/.results/
 .pnpm-store/
 .fallow/
 dist/
diff --git a/package.json b/package.json
index ed3bd7131..47428205b 100644
--- a/package.json
+++ b/package.json
@@ -98,6 +98,9 @@
     "ad": "node bin/agent-device.mjs",
     "size": "node scripts/size-report.mjs",
     "size:markdown": "node scripts/size-report.mjs --json .tmp/size-report.json --markdown .tmp/size-report.md",
+    "perf": "node --experimental-strip-types scripts/perf/run.ts",
+    "perf:ios": "node --experimental-strip-types scripts/perf/run.ts --platform ios",
+    "perf:android": "node --experimental-strip-types scripts/perf/run.ts --platform android",
     "lint": "oxlint . --deny-warnings",
     "format": "oxfmt --write src test skills package.json tsconfig.json tsconfig.lib.json rslib.config.ts vitest.config.ts .github/actions/setup-node-pnpm/action.yml .oxlintrc.json .oxfmtrc.json '!test/skillgym/.skillgym-results/**'",
     "fallow": "fallow --summary",
diff --git a/scripts/perf/cli.ts b/scripts/perf/cli.ts
new file mode 100644
index 000000000..486cc5eb8
--- /dev/null
+++ b/scripts/perf/cli.ts
@@ -0,0 +1,99 @@
+import { performance } from 'node:perf_hooks';
+import { runCmdSync } from '../../src/utils/exec.ts';
+import { resolveCliArgv, REPO_ROOT } from './config.ts';
+import type { BatchStepSpec } from './scenario.ts';
+import type { CliResult } from './types.ts';
+
+const MAX_BUFFER = 64 * 1024 * 1024;
+const CLI_ARGV = resolveCliArgv();
+
+function tryParseJson(stdout: string): unknown {
+  const trimmed = stdout.trim();
+  if (!trimmed) return undefined;
+  try {
+    return JSON.parse(trimmed);
+  } catch {
+    // Some commands print a trailing line after JSON; try the last JSON-looking block.
+    const start = trimmed.indexOf('{');
+    const end = trimmed.lastIndexOf('}');
+    if (start >= 0 && end > start) {
+      try {
+        return JSON.parse(trimmed.slice(start, end + 1));
+      } catch {
+        return undefined;
+      }
+    }
+    return undefined;
+  }
+}
+
+function jsonOk(json: unknown): boolean {
+  return !(json !== null && typeof json === 'object' && (json as { ok?: unknown }).ok === false);
+}
+
+// Invoke the built CLI once. `args` includes the command + positionals + dash-flags;
+// `baseFlags` carries the isolation + device flags shared by every call.
+export function invokeCli(args: string[], baseFlags: string[]): CliResult {
+  const full = [...CLI_ARGV, ...args, ...baseFlags, '--json'];
+  const t0 = performance.now();
+  let stdout = '';
+  let stderr = '';
+  let exitCode = -1;
+  try {
+    // allowFailure so non-zero exits are recorded as samples instead of thrown; maxBuffer
+    // raised because snapshot payloads exceed Node's ~1MB default.
+    const r = runCmdSync(process.execPath, full, {
+      cwd: REPO_ROOT,
+      maxBuffer: MAX_BUFFER,
+      allowFailure: true,
+    });
+    stdout = r.stdout;
+    stderr = r.stderr;
+    exitCode = r.exitCode;
+  } catch (error) {
+    // Spawn-level failures (missing executable, timeout) — record as a failed sample.
+    stderr = error instanceof Error ? error.message : String(error);
+  }
+  const wallClockMs = performance.now() - t0;
+  const json = tryParseJson(stdout);
+  return { exitCode, wallClockMs, stdout, stderr, json, ok: exitCode === 0 && jsonOk(json) };
+}
+
+// Wrap a single command in its own `batch` invocation to read per-step durationMs.
+export function invokeBatchStep(spec: BatchStepSpec, baseFlags: string[]): CliResult {
+  const result = invokeCli(['batch', '--steps', JSON.stringify([spec])], baseFlags);
+  // Defensive: today's stop-only batch surfaces a failed step as a top-level non-zero/ok:false
+  // (already caught by invokeCli). But if a future on-error mode keeps the batch ok while a step
+  // fails, don't silently count that step as a success — downgrade ok from the step's own ok.
+  const stepOk = firstBatchResult(result.json)?.ok;
+  if (result.ok && stepOk === false) {
+    return { ...result, ok: false };
+  }
+  return result;
+}
+
+function firstBatchResult(json: unknown): Record<string, unknown> | undefined {
+  const data = (json as { data?: { results?: unknown[] } } | undefined)?.data;
+  const first = data?.results?.[0];
+  return first && typeof first === 'object' ? (first as Record<string, unknown>) : undefined;
+}
+
+export function readBatchStepDurationMs(result: CliResult): number | undefined {
+  const v = firstBatchResult(result.json)?.durationMs;
+  return typeof v === 'number' ? v : undefined;
+}
+
+export function readBatchStepError(result: CliResult): { code?: string; message?: string } {
+  const err = (result.json as { error?: { code?: string; message?: string } } | undefined)?.error;
+  return { code: err?.code, message: err?.message };
+}
+
+// Proxy for a11y-tree size: snapshot node count (falls back to distinct @eN refs).
+export function countElements(result: CliResult): number | undefined {
+  const stepData = firstBatchResult(result.json)?.data;
+  if (stepData === undefined || typeof stepData !== 'object') return undefined;
+  const nodes = (stepData as { nodes?: unknown }).nodes;
+  if (Array.isArray(nodes)) return nodes.length;
+  const matches = JSON.stringify(stepData).match(/@e\d+/g);
+  return matches ? new Set(matches).size : 0;
+}
diff --git a/scripts/perf/config.ts b/scripts/perf/config.ts
new file mode 100644
index 000000000..10c3e8463
--- /dev/null
+++ b/scripts/perf/config.ts
@@ -0,0 +1,94 @@
+import path from 'node:path';
+import { fileURLToPath } from 'node:url';
+import type { Platform } from './types.ts';
+
+const HERE = path.dirname(fileURLToPath(import.meta.url));
+export const REPO_ROOT = path.resolve(HERE, '..', '..');
+const CLI_BIN = path.join(REPO_ROOT, 'bin', 'agent-device.mjs');
+const DEFAULT_OUT_DIR = path.join(HERE, '.results');
+
+export type PerfConfig = {
+  platform: Platform;
+  rounds: number; // measured rounds (samples per command)
+  warmup: number; // leading rounds dropped from stats
+  keepArtifacts: boolean; // keep temp state dir + leave device booted
+  outDir: string;
+  udid?: string; // iOS device override (UDID)
+  device?: string; // device override by name (e.g. "iPhone 17 Pro"); preferred over udid
+  serial?: string; // Android device override
+};
+
+// How to invoke the CLI. Defaults to the built dist binary (bin/agent-device.mjs).
+// Set AGENT_DEVICE_PERF_CLI to run from source instead, e.g. on CI:
+//   AGENT_DEVICE_PERF_CLI="--experimental-strip-types src/bin.ts"
+// (matches the device workflows, which run from source and skip the dist build).
+export function resolveCliArgv(): string[] {
+  const override = process.env.AGENT_DEVICE_PERF_CLI?.trim();
+  if (override) return override.split(/\s+/);
+  return [CLI_BIN];
+}
+
+export function usesSourceCli(): boolean {
+  return Boolean(process.env.AGENT_DEVICE_PERF_CLI?.trim());
+}
+
+function readValue(argv: string[], i: number, flag: string): string {
+  const v = argv[i + 1];
+  if (v === undefined) throw new Error(`Missing value for ${flag}`);
+  return v;
+}
+
+function readIntValue(argv: string[], i: number, flag: string, min: number): number {
+  const raw = readValue(argv, i, flag);
+  const n = Number(raw);
+  if (!Number.isInteger(n) || n < min) {
+    throw new Error(`${flag} must be an integer >= ${min} (got ${JSON.stringify(raw)})`);
+  }
+  return n;
+}
+
+export function parseConfig(argv: string[]): PerfConfig {
+  const cfg: PerfConfig = {
+    platform: 'ios',
+    rounds: 5,
+    warmup: 1,
+    keepArtifacts: false,
+    outDir: DEFAULT_OUT_DIR,
+  };
+  for (let i = 0; i < argv.length; i++) {
+    const a = argv[i];
+    switch (a) {
+      case '--platform': {
+        const v = readValue(argv, i++, a);
+        if (v !== 'ios' && v !== 'android') throw new Error(`Unknown platform: ${v}`);
+        cfg.platform = v;
+        break;
+      }
+      case '--n':
+      case '--rounds':
+        cfg.rounds = readIntValue(argv, i++, a, 1);
+        break;
+      case '--warmup':
+        cfg.warmup = readIntValue(argv, i++, a, 0);
+        break;
+      case '--keep-artifacts':
+        cfg.keepArtifacts = true;
+        break;
+      case '--out-dir':
+        cfg.outDir = path.resolve(readValue(argv, i++, a));
+        break;
+      case '--udid':
+        cfg.udid = readValue(argv, i++, a);
+        break;
+      case '--device':
+        cfg.device = readValue(argv, i++, a);
+        break;
+      case '--serial':
+        cfg.serial = readValue(argv, i++, a);
+        break;
+      default:
+        throw new Error(`Unknown flag: ${a}`);
+    }
+  }
+  return cfg;
+}
diff --git a/scripts/perf/harness.ts b/scripts/perf/harness.ts
new file mode 100644
index 000000000..a70253ab3
--- /dev/null
+++ b/scripts/perf/harness.ts
@@ -0,0 +1,195 @@
+import fs from 'node:fs';
+import os from 'node:os';
+import path from 'node:path';
+import {
+  countElements,
+  invokeBatchStep,
+  invokeCli,
+  readBatchStepDurationMs,
+  readBatchStepError,
+} from './cli.ts';
+import type { PerfConfig } from './config.ts';
+import { resolveProfile, type ResolvedProfile } from './platform-profiles.ts';
+import { buildSettingsTour, type ScenarioStep } from './scenario.ts';
+import { summarize } from './stats.ts';
+import type { CliResult, Measurement, Sample } from './types.ts';
+
+export type IsolationContext = {
+  stateDir: string;
+  artifactsDir: string;
+  baseFlags: string[];
+  profile: ResolvedProfile;
+};
+
+function log(msg: string): void {
+  process.stderr.write(`[perf] ${msg}\n`);
+}
+
+export function setupIsolation(cfg: PerfConfig): IsolationContext {
+  const profile = resolveProfile(cfg);
+  const stateDir = fs.mkdtempSync(path.join(os.tmpdir(), 'agent-device-perf-'));
+  const artifactsDir = path.join(stateDir, 'artifacts');
+  fs.mkdirSync(artifactsDir, { recursive: true });
+  const baseFlags = ['--state-dir', stateDir, '--session', 'perf', ...profile.platformFlags];
+  log(`state-dir: ${stateDir}`);
+  log(`device: ${profile.deviceName} (${profile.udid ?? profile.serial})`);
+  return { stateDir, artifactsDir, baseFlags, profile };
+}
+
+export function teardownIsolation(ctx: IsolationContext, cfg: PerfConfig): void {
+  log('teardown: closing session');
+  try {
+    const args = ['close'];
+    if (!cfg.keepArtifacts) args.push('--shutdown');
+    invokeCli(args, ctx.baseFlags);
+  } catch {
+    /* best-effort */
+  }
+  if (cfg.keepArtifacts) {
+    log(`keep-artifacts: leaving ${ctx.stateDir} and device in place`);
+    return;
+  }
+  try {
+    fs.rmSync(ctx.stateDir, { recursive: true, force: true });
+    log('teardown: removed temp state dir');
+  } catch {
+    /* best-effort */
+  }
+}
+
+function sampleError(r: CliResult): Pick<Sample, 'errorCode' | 'errorMessage'> {
+  const err = readBatchStepError(r);
+  return {
+    errorCode: err.code ?? `exit:${r.exitCode}`,
+    errorMessage: (err.message ?? r.stderr.trim().split('\n').pop() ?? '').slice(0, 200),
+  };
+}
+
+// Base sample (timing + ok + error note on failure) shared by every measured invocation.
+function toSample(r: CliResult, round: number): Sample {
+  const sample: Sample = { round, wallClockMs: r.wallClockMs, ok: r.ok };
+  if (!r.ok) Object.assign(sample, sampleError(r));
+  return sample;
+}
+
+// The first interaction after open/relaunch pays the iOS XCUITest runner startup (~10s+ cold)
+// and a per-relaunch first-AX-query settle cost. Run an untimed throwaway interaction so that
+// cost is never attributed to a measured command.
+function warmRunner(ctx: IsolationContext): void {
+  invokeCli(['snapshot', '-i'], ctx.baseFlags);
+}
+
+function runStep(step: ScenarioStep, ctx: IsolationContext, round: number): Sample {
+  // Untimed reset to root for steps whose precondition is a clean, top-of-list root.
+  if (step.freshRoot) {
+    invokeCli(['open', ctx.profile.appTarget, '--relaunch'], ctx.baseFlags);
+    warmRunner(ctx);
+  }
+  const r =
+    step.execMode === 'standalone'
+      ? invokeCli(step.args, ctx.baseFlags)
+      : invokeBatchStep(step.step, ctx.baseFlags);
+  const sample = toSample(r, round);
+  if (step.execMode === 'batch') {
+    sample.daemonDurationMs = readBatchStepDurationMs(r);
+    if (step.isSnapshot) sample.elementCount = countElements(r);
+  }
+  return sample;
+}
+
+function buildMeasurement(
+  step: Pick<ScenarioStep, 'command' | 'label' | 'execMode'>,
+  platform: ResolvedProfile['platform'],
+  samples: Sample[],
+  warmupDropped: number,
+): Measurement {
+  const ok = samples.filter((s) => s.ok);
+  const failures = samples.length - ok.length;
+  const notes: string[] = [];
+  if (failures > 0) {
+    const codes = [...new Set(samples.filter((s) => !s.ok).map((s) => s.errorCode))].join(', ');
+    notes.push(`${failures}/${samples.length} samples failed: ${codes}`);
+  }
+  const num = (xs: (number | undefined)[]) => xs.filter((n): n is number => typeof n === 'number');
+  return {
+    command: step.command,
+    label: step.label,
+    platform,
+    execMode: step.execMode,
+    samples,
+    warmupDropped,
+    wallClock: summarize(ok.map((s) => s.wallClockMs)),
+    daemonDuration: summarize(num(ok.map((s) => s.daemonDurationMs))),
+    elementCount: summarize(num(ok.map((s) => s.elementCount))),
+    failures,
+    notes,
+  };
+}
+
+// Boot the device once and time it. Runs WITHOUT --session so no session lock policy
+// applies and the device selectors are honored (selectors are rejected on locked sessions).
+function bootOnce(ctx: IsolationContext): Measurement {
+  log('booting device (no session lock; sampled once)');
+  const bootFlags = ['--state-dir', ctx.stateDir, ...ctx.profile.platformFlags];
+  const r = invokeCli(['boot', ...ctx.profile.selectorFlags], bootFlags);
+  const sample = toSample(r, 0);
+  return buildMeasurement(
+    { command: 'boot', label: 'boot device', execMode: 'standalone' },
+    ctx.profile.platform,
+    [sample],
+    0,
+  );
+}
+
+// Establish the session by opening Settings WITH device selectors (open is the only
+// interaction command allowed to carry selectors on a fresh session). Locks the session
+// to our device so every later call targets it via --session alone.
+function establishSession(ctx: IsolationContext): Measurement {
+  log('establishing session (open with device selectors)');
+  const r = invokeCli(['open', ctx.profile.appTarget, ...ctx.profile.selectorFlags], ctx.baseFlags);
+  const sample = toSample(r, 0);
+  return buildMeasurement(
+    { command: 'open', label: 'open (establish + cold)', execMode: 'standalone' },
+    ctx.profile.platform,
+    [sample],
+    0,
+  );
+}
+
+export function runScenario(ctx: IsolationContext, cfg: PerfConfig): Measurement[] {
+  const steps = buildSettingsTour(ctx.profile, { artifactsDir: ctx.artifactsDir });
+  const acc = new Map<string, Sample[]>();
+  for (const step of steps) acc.set(step.label, []);
+
+  const boot = bootOnce(ctx);
+  const establish = establishSession(ctx);
+  // Absorb the one-time runner startup before any round so it isn't charged to a measurement.
+  warmRunner(ctx);
+
+  // Android accessibility dumps time out while the UI is animating; disable animations
+  // up front (untimed) so snapshot/get/is/fill can read an idle hierarchy.
+  if (ctx.profile.platform === 'android') {
+    log('disabling animations (android)');
+    invokeCli(['settings', 'animations', 'off'], ctx.baseFlags);
+  }
+
+  const totalRounds = cfg.warmup + cfg.rounds;
+  for (let round = 0; round < totalRounds; round++) {
+    const measured = round >= cfg.warmup;
+    log(`round ${round + 1}/${totalRounds}${measured ? '' : ' (warmup, dropped)'}`);
+    for (const step of steps) {
+      const sample = runStep(step, ctx, round);
+      if (measured) acc.get(step.label)!.push(sample);
+      // After the round's reset-open relaunch, warm the runner (untimed) so the first measured
+      // read (snapshot -i) doesn't pay the post-relaunch first-AX-query cost.
+      if (step.command === 'open' && step.execMode === 'standalone') {
+        warmRunner(ctx);
+      }
+    }
+  }
+
+  const tourMeasurements = steps.map((step) =>
+    buildMeasurement(step, ctx.profile.platform, acc.get(step.label)!, cfg.warmup),
+  );
+  return [boot, establish, ...tourMeasurements];
+}
diff --git a/scripts/perf/platform-profiles.ts b/scripts/perf/platform-profiles.ts
new file mode 100644
index 000000000..3d02ff87d
--- /dev/null
+++ b/scripts/perf/platform-profiles.ts
@@ -0,0 +1,77 @@
+import type { PerfConfig } from './config.ts';
+import type { Platform } from './types.ts';
+
+// Local-convenience defaults for ad-hoc runs; CI always overrides them (--device / --serial).
+// The iOS UDID is a specific local "iPhone 17" sim; the Android serial is a dedicated emulator
+// port. Pass --udid/--device/--serial to target your own device.
+const DEFAULT_IOS_UDID = 'D74E0B66-57EB-4EC1-92DC-DA0A30581FE7';
+const DEFAULT_ANDROID_SERIAL = 'emulator-5556';
+
+export type ProfileSelectors = {
+  // A row on the Settings root that pushes a large sub-screen (big a11y tree).
+  deepScreen: string;
+  // The Settings search field (for press/focus; auto-picks a match).
+  searchField: string;
+  // A selector that uniquely targets the EDITABLE search field (for fill).
+  searchFieldEditable: string;
+  // iOS exposes an editable search field at the Settings root (fill works without focusing
+  // first; focusing then filling can hang). Android only reveals the editable after tapping
+  // the search card, so it must press the search entry before fill/type.
+  searchEditableAtRoot: boolean;
+  // A label reliably visible on the Settings root, for get/is (selector form).
+  anchorLabel: string;
+  // Plain text of the anchor, for wait text / find (not a selector).
+  anchorText: string;
+};
+
+export type ResolvedProfile = {
+  platform: Platform;
+  deviceName: string;
+  udid?: string;
+  serial?: string;
+  platformFlags: string[]; // --platform; applied to every call (only conflicts if it mismatches a locked session)
+  selectorFlags: string[]; // device selectors — ONLY on the session-establishing open / selectorless boot
+  appTarget: string; // `open` target for Settings
+  selectors: ProfileSelectors;
+};
+
+export function resolveProfile(cfg: PerfConfig): ResolvedProfile {
+  if (cfg.platform === 'ios') {
+    // Prefer targeting by device name (CI boots a named simulator); fall back to a UDID.
+    const useName = cfg.device !== undefined;
+    const udid = useName ? undefined : (cfg.udid ?? DEFAULT_IOS_UDID);
+    return {
+      platform: 'ios',
+      deviceName: cfg.device ?? 'iPhone 17',
+      udid,
+      platformFlags: ['--platform', 'ios'],
+      selectorFlags: useName ? ['--device', cfg.device!] : ['--udid', udid!],
+      appTarget: 'settings',
+      selectors: {
+        deepScreen: 'label="General"',
+        searchField: 'label="Search"',
+        searchFieldEditable: 'label="Search" editable',
+        searchEditableAtRoot: true,
+        anchorLabel: 'label="General"',
+        anchorText: 'General',
+      },
+    };
+  }
+  const serial = cfg.serial ?? DEFAULT_ANDROID_SERIAL;
+  return {
+    platform: 'android',
+    deviceName: cfg.serial ? `android (${serial})` : 'Pixel_9_Pro_XL_API_37',
+    serial,
+    platformFlags: ['--platform', 'android'],
+    selectorFlags: ['--serial', serial, '--android-device-allowlist', serial],
+    appTarget: 'com.android.settings',
+    selectors: {
+      deepScreen: 'text="Network & internet"',
+      searchField: 'text="Search Settings"',
+      searchFieldEditable: 'editable',
+      searchEditableAtRoot: false,
+      anchorLabel: 'label="Network & internet"',
+      anchorText: 'Network & internet',
+    },
+  };
+}
diff --git a/scripts/perf/report.ts b/scripts/perf/report.ts
new file mode 100644
index 000000000..3d863a5ff
--- /dev/null
+++ b/scripts/perf/report.ts
@@ -0,0 +1,67 @@
+import fs from 'node:fs';
+import path from 'node:path';
+import type { Measurement, RunResult, Stat } from './types.ts';
+
+function ms(n: number | undefined): string {
+  return typeof n === 'number' && Number.isFinite(n) ? n.toFixed(0) : '–';
+}
+
+function wallCells(s: Stat | null): string {
+  if (!s) return '– | – | – | –';
+  return `${ms(s.min)} | ${ms(s.median)} | ${ms(s.p95)} | ${ms(s.max)}`;
+}
+
+function stampName(platform: string, startedAt: string): string {
+  return `perf-${platform}-${startedAt.replace(/[:.]/g, '-')}`;
+}
+
+function measurementRow(m: Measurement): string {
+  const daemon = m.daemonDuration ? ms(m.daemonDuration.median) : '–';
+  const elements = m.elementCount ? ms(m.elementCount.median) : '–';
+  const n = m.wallClock?.n ?? 0;
+  return `| ${m.label} | ${m.command} | ${m.execMode} | ${n} | ${wallCells(m.wallClock)} | ${daemon} | ${elements} | ${m.notes.join('; ')} |`;
+}
+
+function toMarkdown(run: RunResult): string {
+  const lines: string[] = [];
+  lines.push(`# agent-device command perf — ${run.platform}`);
+  lines.push('');
+  lines.push(`- **Device**: ${run.device.name} (${run.device.udid ?? run.device.serial ?? '?'})`);
+  lines.push(`- **agent-device**: ${run.agentDeviceVersion}`);
+  lines.push(`- **Rounds**: ${run.config.rounds} (warmup ${run.config.warmup} dropped)`);
+  lines.push(`- **Started**: ${run.startedAt}`);
+  lines.push(`- **Finished**: ${run.finishedAt}`);
+  lines.push('');
+  lines.push('All times in milliseconds. `wall-clock` includes process spawn + socket overhead;');
+  lines.push('`daemon` is the batch step round-trip (spawn overhead ≈ wall-median − daemon-median).');
+  lines.push('`elements` = node count in the snapshot payload (tree-size proxy).');
+  lines.push('An untimed warmup interaction runs after each open/relaunch, so measured commands');
+  lines.push('do not pay the one-time iOS-runner startup or post-relaunch first-AX-query cost.');
+  lines.push('');
+  lines.push('| command | cli | mode | n | wall min | wall median | wall p95 | wall max | daemon median | elements | notes |');
+  lines.push('|---|---|---|---|---|---|---|---|---|---|---|');
+  for (const m of run.measurements) lines.push(measurementRow(m));
+  lines.push('');
+
+  const failed = run.measurements.filter((m) => m.failures > 0);
+  if (failed.length > 0) {
+    lines.push('## Failures');
+    lines.push('');
+    for (const m of failed) {
+      const sample = m.samples.find((s) => !s.ok);
+      lines.push(`- **${m.label}** — ${m.notes.join('; ')}${sample?.errorMessage ? ` — ${sample.errorMessage}` : ''}`);
+    }
+    lines.push('');
+  }
+  return lines.join('\n');
+}
+
+export function writeReports(run: RunResult, outDir: string): { jsonPath: string; mdPath: string } {
+  fs.mkdirSync(outDir, { recursive: true });
+  const base = stampName(run.platform, run.startedAt);
+  const jsonPath = path.join(outDir, `${base}.json`);
+  const mdPath = path.join(outDir, `${base}.md`);
+  fs.writeFileSync(jsonPath, JSON.stringify(run, null, 2));
+  fs.writeFileSync(mdPath, toMarkdown(run));
+  return { jsonPath, mdPath };
+}
diff --git a/scripts/perf/run.ts b/scripts/perf/run.ts
new file mode 100644
index 000000000..9a0010536
--- /dev/null
+++ b/scripts/perf/run.ts
@@ -0,0 +1,67 @@
+import fs from 'node:fs';
+import path from 'node:path';
+import { parseConfig, REPO_ROOT, usesSourceCli } from './config.ts';
+import { runScenario, setupIsolation, teardownIsolation, type IsolationContext } from './harness.ts';
+import { writeReports } from './report.ts';
+import type { RunResult } from './types.ts';
+
+function readVersion(): string {
+  try {
+    const pkg = JSON.parse(fs.readFileSync(path.join(REPO_ROOT, 'package.json'), 'utf8'));
+    return typeof pkg.version === 'string' ? pkg.version : 'unknown';
+  } catch {
+    return 'unknown';
+  }
+}
+
+function main(): void {
+  const cfg = parseConfig(process.argv.slice(2));
+  // The dist binary needs a build; running from source (AGENT_DEVICE_PERF_CLI) does not.
+  if (!usesSourceCli() && !fs.existsSync(path.join(REPO_ROOT, 'dist', 'src'))) {
+    process.stderr.write('[perf] dist/ is missing — run `pnpm build` first.\n');
+    process.exit(1);
+  }
+
+  const startedAt = new Date().toISOString();
+  let ctx: IsolationContext | null = null;
+  let exitCode = 0;
+
+  const cleanup = (): void => {
+    if (ctx) {
+      teardownIsolation(ctx, cfg);
+      ctx = null;
+    }
+  };
+  process.on('SIGINT', () => {
+    cleanup();
+    process.exit(130);
+  });
+  process.on('SIGTERM', () => {
+    cleanup();
+    process.exit(143);
+  });
+
+  try {
+    ctx = setupIsolation(cfg);
+    const measurements = runScenario(ctx, cfg);
+    const run: RunResult = {
+      startedAt,
+      finishedAt: new Date().toISOString(),
+      platform: cfg.platform,
+      device: { udid: ctx.profile.udid, serial: ctx.profile.serial, name: ctx.profile.deviceName },
+      config: { rounds: cfg.rounds, warmup: cfg.warmup, keepArtifacts: cfg.keepArtifacts },
+      agentDeviceVersion: readVersion(),
+      measurements,
+    };
+    const { jsonPath, mdPath } = writeReports(run, cfg.outDir);
+    process.stderr.write(`\n[perf] report: ${mdPath}\n[perf] json:   ${jsonPath}\n`);
+  } catch (e) {
+    process.stderr.write(`[perf] error: ${(e as Error).stack ?? String(e)}\n`);
+    exitCode = 1;
+  } finally {
+    cleanup();
+  }
+  process.exit(exitCode);
+}
+
+main();
diff --git a/scripts/perf/scenario.ts b/scripts/perf/scenario.ts
new file mode 100644
index 000000000..8c63ac08b
--- /dev/null
+++ b/scripts/perf/scenario.ts
@@ -0,0 +1,102 @@
+import path from 'node:path';
+import type { ResolvedProfile } from './platform-profiles.ts';
+
+// A legacy-form batch step: maps through the exact documented CLI grammar.
+// `flags` uses internal CliFlags field names (e.g. snapshotInteractiveOnly).
+export type BatchStepSpec = {
+  command: string;
+  positionals?: string[];
+  flags?: Record<string, unknown>;
+};
+
+type ScenarioStepBase = {
+  label: string;
+  command: string;
+  // When set, the harness runs an untimed `open --relaunch` (reset to root, top of list)
+  // before timing this step. Used for steps whose precondition is a clean root, since
+  // earlier commands (find/is, search) leave the list scrolled or in a different surface.
+  freshRoot?: boolean;
+};
+
+// Discriminated on execMode so the invoker gets the right payload without `!`/`?? []`:
+// standalone carries full CLI args; batch carries one legacy batch step.
+export type ScenarioStep =
+  | (ScenarioStepBase & { execMode: 'standalone'; args: string[] })
+  | (ScenarioStepBase & { execMode: 'batch'; step: BatchStepSpec; isSnapshot?: boolean });
+
+export type StepContext = { artifactsDir: string };
+
+function std(label: string, command: string, args: string[]): ScenarioStep {
+  return { label, command, execMode: 'standalone', args };
+}
+
+function bat(
+  label: string,
+  command: string,
+  step: BatchStepSpec,
+  opts: { isSnapshot?: boolean; freshRoot?: boolean } = {},
+): ScenarioStep {
+  return { label, command, execMode: 'batch' as const, step, ...opts };
+}
+
+// One ordered pass over Settings. The harness repeats this N (+warmup) times;
+// the leading `open --relaunch` resets the app to its root each round, so every
+// round starts from a known state while commands run in their natural order.
+export function buildSettingsTour(p: ResolvedProfile, ctx: StepContext): ScenarioStep[] {
+  const s = p.selectors;
+  const shot = path.join(ctx.artifactsDir, 'shot.png');
+  const rec = path.join(ctx.artifactsDir, 'rec.mp4');
+  const trace = path.join(ctx.artifactsDir, 'trace.log');
+
+  // Text entry differs per platform: iOS fills the root search field directly (focusing it
+  // first can hang); Android must open the search screen before an editable field exists.
+  const textEntry: ScenarioStep[] = p.selectors.searchEditableAtRoot
+    ? [
+        // iOS: editable search field exists at root; fill it directly (freshRoot resets scroll).
+        bat('fill search', 'fill', { command: 'fill', positionals: [s.searchFieldEditable, 'general'] }, { freshRoot: true }),
+        bat('type', 'type', { command: 'type', positionals: ['wifi'] }),
+      ]
+    : [
+        // Android: tap the search entry first to reveal the editable, then type/fill it.
+        bat('press search field', 'press', { command: 'press', positionals: [s.searchField] }, { freshRoot: true }),
+        bat('type', 'type', { command: 'type', positionals: ['wifi'] }),
+        bat('fill search', 'fill', { command: 'fill', positionals: [s.searchFieldEditable, 'general'] }),
+      ];
+
+  return [
+    // --- reset to root via relaunch ---
+    std('open (relaunch → root)', 'open', ['open', p.appTarget, '--relaunch']),
+
+    // --- reads on the root tree (snapshots first; anchor label is visible here) ---
+    bat('snapshot -i (root)', 'snapshot', { command: 'snapshot', flags: { snapshotInteractiveOnly: true } }, { isSnapshot: true }),
+    bat('snapshot (root)', 'snapshot', { command: 'snapshot' }, { isSnapshot: true }),
+
+    // --- navigate into a sub-screen from a fresh root (freshRoot resets scroll so the
+    //     deep-screen row is in view), read it, then return ---
+    bat('press → deep screen', 'press', { command: 'press', positionals: [s.deepScreen] }, { freshRoot: true }),
+    bat('snapshot (deep)', 'snapshot', { command: 'snapshot' }, { isSnapshot: true }),
+    bat('snapshot -i (deep)', 'snapshot', { command: 'snapshot', flags: { snapshotInteractiveOnly: true } }, { isSnapshot: true }),
+    bat('back', 'back', { command: 'back' }),
+
+    // --- targeted reads against the visible anchor (freshRoot so the anchor is on screen) ---
+    bat('wait text', 'wait', { command: 'wait', positionals: ['text', s.anchorText, '3000'] }, { freshRoot: true }),
+    bat('find', 'find', { command: 'find', positionals: [s.anchorText] }),
+    bat('get text', 'get', { command: 'get', positionals: ['text', s.anchorLabel] }),
+    bat('is visible', 'is', { command: 'is', positionals: ['visible', s.anchorLabel] }),
+
+    // --- text entry (platform-specific order; see textEntry above) then scroll results ---
+    ...textEntry,
+    bat('scroll down', 'scroll', { command: 'scroll', positionals: ['down'] }),
+
+    // --- artifact-producing commands; record brackets the rest so the clip has >1s of
+    //     footage (an instant start→stop makes simctl recordVideo fail to finalize) ---
+    std('record start', 'record', ['record', 'start', rec, '--hide-touches']),
+    bat('screenshot', 'screenshot', { command: 'screenshot', positionals: [shot] }),
+    bat('logs mark', 'logs', { command: 'logs', positionals: ['mark', 'perf-mark'] }),
+    bat('logs clear', 'logs', { command: 'logs', positionals: ['clear'] }),
+    std('trace start', 'trace', ['trace', 'start', trace]),
+    std('trace stop', 'trace', ['trace', 'stop']),
+    bat('perf', 'perf', { command: 'perf' }),
+    std('record stop', 'record', ['record', 'stop']),
+  ];
+}
diff --git a/scripts/perf/stats.ts b/scripts/perf/stats.ts
new file mode 100644
index 000000000..5a41ac4ea
--- /dev/null
+++ b/scripts/perf/stats.ts
@@ -0,0 +1,22 @@
+import type { Stat } from './types.ts';
+
+// Nearest-rank percentile over a copy of the values.
+function percentile(sorted: number[], p: number): number {
+  if (sorted.length === 0) return Number.NaN;
+  const rank = Math.ceil((p / 100) * sorted.length);
+  const idx = Math.min(sorted.length - 1, Math.max(0, rank - 1));
+  return sorted[idx];
+}
+
+export function summarize(values: number[]): Stat | null {
+  const clean = values.filter((v) => Number.isFinite(v));
+  if (clean.length === 0) return null;
+  const sorted = [...clean].sort((a, b) => a - b);
+  return {
+    n: sorted.length,
+    min: sorted[0],
+    median: percentile(sorted, 50),
+    p95: percentile(sorted, 95),
+    max: sorted[sorted.length - 1],
+  };
+}
diff --git a/scripts/perf/types.ts b/scripts/perf/types.ts
new file mode 100644
index 000000000..175481a88
--- /dev/null
+++ b/scripts/perf/types.ts
@@ -0,0 +1,50 @@
+// Shared data shapes for the e2e perf benchmark harness.
+
+export type Platform = 'ios' | 'android';
+
+export type ExecMode = 'batch' | 'standalone';
+
+export type CliResult = {
+  exitCode: number;
+  wallClockMs: number; // measured by the harness around the child process
+  stdout: string;
+  stderr: string;
+  json: unknown; // parsed --json payload (or undefined when not parseable)
+  ok: boolean; // exit 0 AND (json.ok !== false)
+};
+
+export type Sample = {
+  round: number;
+  wallClockMs: number;
+  daemonDurationMs?: number; // from batch results[0].durationMs (batch mode only)
+  elementCount?: number; // for snapshot rows: parsed @eN count, a tree-size proxy
+  ok: boolean;
+  errorCode?: string;
+  errorMessage?: string;
+};
+
+export type Stat = { n: number; min: number; median: number; p95: number; max: number };
+
+export type Measurement = {
+  command: string;
+  label: string;
+  platform: Platform;
+  execMode: ExecMode;
+  samples: Sample[]; // kept samples only (warmup rounds dropped)
+  warmupDropped: number;
+  wallClock: Stat | null;
+  daemonDuration: Stat | null; // null for standalone or when no ok samples
+  elementCount: Stat | null; // null unless snapshot row
+  failures: number;
+  notes: string[];
+};
+
+export type RunResult = {
+  startedAt: string;
+  finishedAt: string;
+  platform: Platform;
+  device: { udid?: string; serial?: string; name: string };
+  config: { rounds: number; warmup: number; keepArtifacts: boolean };
+  agentDeviceVersion: string;
+  measurements: Measurement[];
+};
diff --git a/src/utils/exec.ts b/src/utils/exec.ts
index 6191bb706..018a383c5 100644
--- a/src/utils/exec.ts
+++ b/src/utils/exec.ts
@@ -23,6 +23,8 @@ export type ExecOptions = {
   timeoutMs?: number;
   detached?: boolean;
   signal?: AbortSignal;
+  /** Max stdout/stderr bytes for synchronous runs (default Node ~1MB). */
+  maxBuffer?: number;
 };
 
 type ExecStreamOptions = ExecOptions & {
@@ -151,27 +153,29 @@ function runSpawnedCommand(
     child.on('error', (err) => {
       if (timeoutHandle) clearTimeout(timeoutHandle);
       abort.dispose();
-      reject(
-        abort.didAbort
-          ? createCommandCanceledError(executable, cmd, args)
-          : createSpawnError(executable, cmd, args, err),
-      );
+      reject(spawnRejectionError(abort, executable, cmd, args, err));
     });
 
     child.on('close', (code) => {
       if (timeoutHandle) clearTimeout(timeoutHandle);
       abort.dispose();
       const exitCode = code ?? 1;
-      if (abort.didAbort) {
-        reject(createCommandCanceledError(executable, cmd, args));
-        return;
-      }
-      if (didTimeout && timeoutMs) {
+      if (!abort.didAbort && didTimeout && timeoutMs) {
         reject(createTimeoutError(executable, cmd, args, timeoutMs, exitCode, stdout, stderr));
         return;
       }
-      if (exitCode !== 0 && !options.allowFailure) {
-        reject(createExitError(executable, cmd, args, exitCode, stdout, stderr));
+      const failure = commandCloseFailure(
+        abort,
+        executable,
+        cmd,
+        args,
+        exitCode,
+        options.allowFailure,
+        stdout,
+        stderr,
+      );
+      if (failure) {
+        reject(failure);
         return;
       }
       resolve({
@@ -251,6 +255,7 @@ export function runCmdSync(cmd: string, args: string[], options: ExecOptions = {
     timeout: normalizeTimeoutMs(options.timeoutMs),
     windowsHide: true,
     shell: false,
+    ...(options.maxBuffer !== undefined ? { maxBuffer: options.maxBuffer } : {}),
   });
 
   if (result.error) {
@@ -347,21 +352,23 @@ export function runCmdBackground(
   const wait = new Promise<ExecResult>((resolve, reject) => {
     child.on('error', (err) => {
       abort.dispose();
-      reject(
-        abort.didAbort
-          ? createCommandCanceledError(executable, cmd, args)
-          : createSpawnError(executable, cmd, args, err),
-      );
+      reject(spawnRejectionError(abort, executable, cmd, args, err));
     });
     child.on('close', (code) => {
       abort.dispose();
       const exitCode = code ?? 1;
-      if (abort.didAbort) {
-        reject(createCommandCanceledError(executable, cmd, args));
-        return;
-      }
-      if (exitCode !== 0 && !options.allowFailure) {
-        reject(createExitError(executable, cmd, args, exitCode, stdout, stderr));
+      const failure = commandCloseFailure(
+        abort,
+        executable,
+        cmd,
+        args,
+        exitCode,
+        options.allowFailure,
+        stdout,
+        stderr,
+      );
+      if (failure) {
+        reject(failure);
         return;
       }
       resolve({ stdout, stderr, exitCode });
@@ -463,6 +470,40 @@ function createExitError(
   });
 }
 
+type CommandAbort = { readonly didAbort: boolean };
+
+// Error to reject a spawned child's `error` event with: canceled if we aborted, else a spawn error.
+function spawnRejectionError(
+  abort: CommandAbort,
+  executable: string,
+  cmd: string,
+  args: string[],
+  err: Error,
+): AppError {
+  return abort.didAbort
+    ? createCommandCanceledError(executable, cmd, args)
+    : createSpawnError(executable, cmd, args, err);
+}
+
+// Failure (if any) for a spawned child's `close` event: canceled if we aborted, an exit error on
+// a non-zero code unless allowed, otherwise null (the command resolves successfully).
+function commandCloseFailure(
+  abort: CommandAbort,
+  executable: string,
+  cmd: string,
+  args: string[],
+  exitCode: number,
+  allowFailure: boolean | undefined,
+  stdout: string,
+  stderr: string,
+): AppError | null {
+  if (abort.didAbort) return createCommandCanceledError(executable, cmd, args);
+  if (exitCode !== 0 && !allowFailure) {
+    return createExitError(executable, cmd, args, exitCode, stdout, stderr);
+  }
+  return null;
+}
+
 function normalizeOverridePath(
   rawPath: string | undefined,
   envName: string,