Skip to content

Commit 1b052df

Browse files
authored
feat(core): implement Windows sandbox dynamic expansion Phase 1 and 2.1 (#23691)
1 parent f11bd3d commit 1b052df

18 files changed

+1162
-522
lines changed

packages/core/src/config/config.ts

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1197,10 +1197,7 @@ export class Config implements McpContext, AgentLoopContext {
11971197
...params.policyEngineConfig,
11981198
approvalMode: engineApprovalMode,
11991199
disableAlwaysAllow: this.disableAlwaysAllow,
1200-
toolSandboxEnabled: this.getSandboxEnabled(),
1201-
sandboxApprovedTools:
1202-
this.sandboxPolicyManager?.getModeConfig(engineApprovalMode)
1203-
?.approvedTools ?? [],
1200+
sandboxManager: this._sandboxManager,
12041201
},
12051202
checkerRunner,
12061203
);
@@ -2392,10 +2389,7 @@ export class Config implements McpContext, AgentLoopContext {
23922389
);
23932390
}
23942391

2395-
this.policyEngine.setApprovalMode(
2396-
mode,
2397-
this.sandboxPolicyManager?.getModeConfig(mode)?.approvedTools ?? [],
2398-
);
2392+
this.policyEngine.setApprovalMode(mode);
23992393
this.refreshSandboxManager();
24002394

24012395
const isPlanModeTransition =

packages/core/src/policy/policy-engine.test.ts

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,11 @@ import { SafetyCheckDecision } from '../safety/protocol.js';
2222
import type { CheckerRunner } from '../safety/checker-runner.js';
2323
import { initializeShellParsers } from '../utils/shell-utils.js';
2424
import { buildArgsPatterns } from './utils.js';
25+
import {
26+
NoopSandboxManager,
27+
LocalSandboxManager,
28+
type SandboxManager,
29+
} from '../services/sandboxManager.js';
2530

2631
// Mock shell-utils to ensure consistent behavior across platforms (especially Windows CI)
2732
// We want to test PolicyEngine logic, not the shell parser's ability to parse commands
@@ -96,7 +101,10 @@ describe('PolicyEngine', () => {
96101
runChecker: vi.fn(),
97102
} as unknown as CheckerRunner;
98103
engine = new PolicyEngine(
99-
{ approvalMode: ApprovalMode.DEFAULT },
104+
{
105+
approvalMode: ApprovalMode.DEFAULT,
106+
sandboxManager: new NoopSandboxManager(),
107+
},
100108
mockCheckerRunner,
101109
);
102110
});
@@ -332,7 +340,7 @@ describe('PolicyEngine', () => {
332340
engine = new PolicyEngine({
333341
rules,
334342
approvalMode: ApprovalMode.AUTO_EDIT,
335-
toolSandboxEnabled: true,
343+
sandboxManager: new LocalSandboxManager(),
336344
});
337345
expect((await engine.check({ name: 'edit' }, undefined)).decision).toBe(
338346
PolicyDecision.ALLOW,
@@ -345,6 +353,29 @@ describe('PolicyEngine', () => {
345353
);
346354
});
347355

356+
it('should respect tools approved by the SandboxManager', async () => {
357+
const mockSandboxManager = {
358+
enabled: true,
359+
prepareCommand: vi.fn(),
360+
isDangerousCommand: vi.fn().mockReturnValue(false),
361+
isKnownSafeCommand: vi
362+
.fn()
363+
.mockImplementation((args) => args[0] === 'npm'),
364+
} as unknown as SandboxManager;
365+
366+
engine = new PolicyEngine({
367+
sandboxManager: mockSandboxManager,
368+
defaultDecision: PolicyDecision.ASK_USER,
369+
});
370+
371+
const { decision } = await engine.check(
372+
{ name: 'run_shell_command', args: { command: 'npm install' } },
373+
undefined,
374+
);
375+
376+
expect(decision).toBe(PolicyDecision.ALLOW);
377+
});
378+
348379
it('should return ALLOW by default in YOLO mode when no rules match', async () => {
349380
engine = new PolicyEngine({ approvalMode: ApprovalMode.YOLO });
350381

@@ -1576,7 +1607,10 @@ describe('PolicyEngine', () => {
15761607
},
15771608
];
15781609

1579-
engine = new PolicyEngine({ rules, toolSandboxEnabled: true });
1610+
engine = new PolicyEngine({
1611+
rules,
1612+
sandboxManager: new LocalSandboxManager(),
1613+
});
15801614
engine.setApprovalMode(ApprovalMode.AUTO_EDIT);
15811615

15821616
const result = await engine.check(

packages/core/src/policy/policy-engine.ts

Lines changed: 20 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,12 @@
66

77
import { type FunctionCall } from '@google/genai';
88
import {
9-
isDangerousCommand,
10-
isKnownSafeCommand,
11-
} from '../sandbox/macos/commandSafety.js';
9+
SHELL_TOOL_NAMES,
10+
initializeShellParsers,
11+
splitCommands,
12+
hasRedirection,
13+
extractStringFromParseEntry,
14+
} from '../utils/shell-utils.js';
1215
import { parse as shellParse } from 'shell-quote';
1316
import {
1417
PolicyDecision,
@@ -24,12 +27,6 @@ import { stableStringify } from './stable-stringify.js';
2427
import { debugLogger } from '../utils/debugLogger.js';
2528
import type { CheckerRunner } from '../safety/checker-runner.js';
2629
import { SafetyCheckDecision } from '../safety/protocol.js';
27-
import {
28-
SHELL_TOOL_NAMES,
29-
initializeShellParsers,
30-
splitCommands,
31-
hasRedirection,
32-
} from '../utils/shell-utils.js';
3330
import { getToolAliases } from '../tools/tool-names.js';
3431
import {
3532
MCP_TOOL_PREFIX,
@@ -38,6 +35,10 @@ import {
3835
formatMcpToolName,
3936
isMcpToolName,
4037
} from '../tools/mcp-tool.js';
38+
import {
39+
type SandboxManager,
40+
NoopSandboxManager,
41+
} from '../services/sandboxManager.js';
4142

4243
function isWildcardPattern(name: string): boolean {
4344
return name === '*' || name.includes('*');
@@ -197,8 +198,7 @@ export class PolicyEngine {
197198
private readonly disableAlwaysAllow: boolean;
198199
private readonly checkerRunner?: CheckerRunner;
199200
private approvalMode: ApprovalMode;
200-
private toolSandboxEnabled: boolean;
201-
private sandboxApprovedTools: string[];
201+
private readonly sandboxManager: SandboxManager;
202202

203203
constructor(config: PolicyEngineConfig = {}, checkerRunner?: CheckerRunner) {
204204
this.rules = (config.rules ?? []).sort(
@@ -249,18 +249,14 @@ export class PolicyEngine {
249249
this.disableAlwaysAllow = config.disableAlwaysAllow ?? false;
250250
this.checkerRunner = checkerRunner;
251251
this.approvalMode = config.approvalMode ?? ApprovalMode.DEFAULT;
252-
this.toolSandboxEnabled = config.toolSandboxEnabled ?? false;
253-
this.sandboxApprovedTools = config.sandboxApprovedTools ?? [];
252+
this.sandboxManager = config.sandboxManager ?? new NoopSandboxManager();
254253
}
255254

256255
/**
257256
* Update the current approval mode.
258257
*/
259-
setApprovalMode(mode: ApprovalMode, sandboxApprovedTools?: string[]): void {
258+
setApprovalMode(mode: ApprovalMode): void {
260259
this.approvalMode = mode;
261-
if (sandboxApprovedTools !== undefined) {
262-
this.sandboxApprovedTools = sandboxApprovedTools;
263-
}
264260
}
265261

266262
/**
@@ -285,8 +281,9 @@ export class PolicyEngine {
285281
if (!hasRedirection(command)) return false;
286282

287283
// Do not downgrade (do not ask user) if sandboxing is enabled and in AUTO_EDIT or YOLO
284+
const sandboxEnabled = !(this.sandboxManager instanceof NoopSandboxManager);
288285
if (
289-
this.toolSandboxEnabled &&
286+
sandboxEnabled &&
290287
(this.approvalMode === ApprovalMode.AUTO_EDIT ||
291288
this.approvalMode === ApprovalMode.YOLO)
292289
) {
@@ -299,27 +296,24 @@ export class PolicyEngine {
299296
/**
300297
* Check if a shell command is allowed.
301298
*/
302-
303299
private async applyShellHeuristics(
304300
command: string,
305301
decision: PolicyDecision,
306302
): Promise<PolicyDecision> {
307303
await initializeShellParsers();
308304
try {
309305
const parsedObjArgs = shellParse(command);
310-
if (parsedObjArgs.some((arg) => typeof arg === 'object')) return decision;
311-
const parsedArgs = parsedObjArgs.map(String);
312-
if (isDangerousCommand(parsedArgs)) {
306+
const parsedArgs = parsedObjArgs.map(extractStringFromParseEntry);
307+
308+
if (this.sandboxManager.isDangerousCommand(parsedArgs)) {
313309
debugLogger.debug(
314310
`[PolicyEngine.check] Command evaluated as dangerous, forcing ASK_USER: ${command}`,
315311
);
316312
return PolicyDecision.ASK_USER;
317313
}
318-
const isApprovedBySandbox =
319-
this.toolSandboxEnabled &&
320-
this.sandboxApprovedTools.includes(parsedArgs[0]);
314+
321315
if (
322-
(isKnownSafeCommand(parsedArgs) || isApprovedBySandbox) &&
316+
this.sandboxManager.isKnownSafeCommand(parsedArgs) &&
323317
decision === PolicyDecision.ASK_USER
324318
) {
325319
debugLogger.debug(

packages/core/src/policy/types.ts

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
*/
66

77
import type { SafetyCheckInput } from '../safety/protocol.js';
8+
import type { SandboxManager } from '../services/sandboxManager.js';
89

910
export enum PolicyDecision {
1011
ALLOW = 'allow',
@@ -311,13 +312,9 @@ export interface PolicyEngineConfig {
311312
approvalMode?: ApprovalMode;
312313

313314
/**
314-
* Whether tool sandboxing is enabled.
315+
* The sandbox manager instance.
315316
*/
316-
toolSandboxEnabled?: boolean;
317-
/**
318-
* List of tools approved by the sandbox policy for the current mode.
319-
*/
320-
sandboxApprovedTools?: string[];
317+
sandboxManager?: SandboxManager;
321318
}
322319

323320
export interface PolicySettings {

packages/core/src/sandbox/linux/LinuxSandboxManager.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,12 +99,25 @@ function touch(filePath: string, isDirectory: boolean) {
9999
}
100100
}
101101

102+
import {
103+
isKnownSafeCommand,
104+
isDangerousCommand,
105+
} from '../macos/commandSafety.js';
106+
102107
/**
103108
* A SandboxManager implementation for Linux that uses Bubblewrap (bwrap).
104109
*/
105110
export class LinuxSandboxManager implements SandboxManager {
106111
constructor(private readonly options: GlobalSandboxOptions) {}
107112

113+
isKnownSafeCommand(args: string[]): boolean {
114+
return isKnownSafeCommand(args);
115+
}
116+
117+
isDangerousCommand(args: string[]): boolean {
118+
return isDangerousCommand(args);
119+
}
120+
108121
async prepareCommand(req: SandboxRequest): Promise<SandboxedCommand> {
109122
const sanitizationConfig = getSecureSanitizationConfig(
110123
req.policy?.sanitizationConfig,

packages/core/src/sandbox/macos/MacOsSandboxManager.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ describe('MacOsSandboxManager', () => {
6969
allowedPaths: mockAllowedPaths,
7070
networkAccess: mockNetworkAccess,
7171
forbiddenPaths: undefined,
72-
workspaceWrite: false,
72+
workspaceWrite: true,
7373
additionalPermissions: {
7474
fileSystem: {
7575
read: [],

packages/core/src/sandbox/macos/MacOsSandboxManager.ts

Lines changed: 21 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -14,23 +14,20 @@ import {
1414
import {
1515
sanitizeEnvironment,
1616
getSecureSanitizationConfig,
17-
type EnvironmentSanitizationConfig,
1817
} from '../../services/environmentSanitization.js';
1918
import { buildSeatbeltArgs } from './seatbeltArgsBuilder.js';
2019
import {
21-
getCommandRoots,
2220
initializeShellParsers,
23-
splitCommands,
24-
stripShellWrapper,
21+
getCommandName,
2522
} from '../../utils/shell-utils.js';
26-
import { isKnownSafeCommand } from './commandSafety.js';
27-
import { parse as shellParse } from 'shell-quote';
23+
import {
24+
isKnownSafeCommand,
25+
isDangerousCommand,
26+
isStrictlyApproved,
27+
} from './commandSafety.js';
2828
import { type SandboxPolicyManager } from '../../policy/sandboxPolicyManager.js';
29-
import path from 'node:path';
3029

3130
export interface MacOsSandboxOptions extends GlobalSandboxOptions {
32-
/** Optional base sanitization config. */
33-
sanitizationConfig?: EnvironmentSanitizationConfig;
3431
/** The current sandbox mode behavior from config. */
3532
modeConfig?: {
3633
readonly?: boolean;
@@ -48,52 +45,17 @@ export interface MacOsSandboxOptions extends GlobalSandboxOptions {
4845
export class MacOsSandboxManager implements SandboxManager {
4946
constructor(private readonly options: MacOsSandboxOptions) {}
5047

51-
private async isStrictlyApproved(req: SandboxRequest): Promise<boolean> {
52-
const approvedTools = this.options.modeConfig?.approvedTools;
53-
if (!approvedTools || approvedTools.length === 0) {
54-
return false;
55-
}
56-
57-
await initializeShellParsers();
58-
59-
const fullCmd = [req.command, ...req.args].join(' ');
60-
const stripped = stripShellWrapper(fullCmd);
61-
62-
const roots = getCommandRoots(stripped);
63-
if (roots.length === 0) return false;
64-
65-
const allRootsApproved = roots.every((root) =>
66-
approvedTools.includes(root),
67-
);
68-
if (allRootsApproved) {
48+
isKnownSafeCommand(args: string[]): boolean {
49+
const toolName = args[0];
50+
const approvedTools = this.options.modeConfig?.approvedTools ?? [];
51+
if (toolName && approvedTools.includes(toolName)) {
6952
return true;
7053
}
71-
72-
const pipelineCommands = splitCommands(stripped);
73-
if (pipelineCommands.length === 0) return false;
74-
75-
// For safety, every command in the pipeline must be considered safe.
76-
for (const cmdString of pipelineCommands) {
77-
const parsedArgs = shellParse(cmdString).map(String);
78-
if (!isKnownSafeCommand(parsedArgs)) {
79-
return false;
80-
}
81-
}
82-
83-
return true;
54+
return isKnownSafeCommand(args);
8455
}
8556

86-
private async getCommandName(req: SandboxRequest): Promise<string> {
87-
await initializeShellParsers();
88-
const fullCmd = [req.command, ...req.args].join(' ');
89-
const stripped = stripShellWrapper(fullCmd);
90-
const roots = getCommandRoots(stripped).filter(
91-
(r) => r !== 'shopt' && r !== 'set',
92-
);
93-
if (roots.length > 0) {
94-
return roots[0];
95-
}
96-
return path.basename(req.command);
57+
isDangerousCommand(args: string[]): boolean {
58+
return isDangerousCommand(args);
9759
}
9860

9961
async prepareCommand(req: SandboxRequest): Promise<SandboxedCommand> {
@@ -122,15 +84,19 @@ export class MacOsSandboxManager implements SandboxManager {
12284

12385
// If not in readonly mode OR it's a strictly approved pipeline, allow workspace writes
12486
const isApproved = allowOverrides
125-
? await this.isStrictlyApproved(req)
87+
? await isStrictlyApproved(
88+
req.command,
89+
req.args,
90+
this.options.modeConfig?.approvedTools,
91+
)
12692
: false;
12793

12894
const workspaceWrite = !isReadonlyMode || isApproved;
129-
const networkAccess =
95+
const defaultNetwork =
13096
this.options.modeConfig?.network ?? req.policy?.networkAccess ?? false;
13197

13298
// Fetch persistent approvals for this command
133-
const commandName = await this.getCommandName(req);
99+
const commandName = await getCommandName(req.command, req.args);
134100
const persistentPermissions = allowOverrides
135101
? this.options.policyManager?.getCommandPermissions(commandName)
136102
: undefined;
@@ -148,7 +114,7 @@ export class MacOsSandboxManager implements SandboxManager {
148114
],
149115
},
150116
network:
151-
networkAccess ||
117+
defaultNetwork ||
152118
persistentPermissions?.network ||
153119
req.policy?.additionalPermissions?.network ||
154120
false,

0 commit comments

Comments
 (0)