Skip to content

Commit 4da2ae8

Browse files
CopilotlpcoxCopilot
authored
feat(docker-manager): add --diagnostic-logs flag for container failure diagnostics (#1906)
* Initial plan * feat: implement --diagnostic-logs flag for container failure diagnostics Agent-Logs-Url: https://github.com/github/gh-aw-firewall/sessions/86fd7c63-7633-4d6a-981d-659d13b69098 * fix: address code review feedback on diagnostic-logs implementation Agent-Logs-Url: https://github.com/github/gh-aw-firewall/sessions/86fd7c63-7633-4d6a-981d-659d13b69098 * fix: use \\w* in redaction regex and clean up help text Agent-Logs-Url: https://github.com/github/gh-aw-firewall/sessions/86fd7c63-7633-4d6a-981d-659d13b69098 * Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> * Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> * Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> * Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: Landon Cox <landon.cox@microsoft.com> Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
1 parent b5b5c5a commit 4da2ae8

File tree

6 files changed

+521
-1
lines changed

6 files changed

+521
-1
lines changed

src/cli-workflow.test.ts

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,4 +224,90 @@ describe('runMainWorkflow', () => {
224224
expect(logger.warn).toHaveBeenCalledWith('Command completed with exit code: 42');
225225
expect(logger.success).not.toHaveBeenCalled();
226226
});
227+
228+
it('calls collectDiagnosticLogs before cleanup on non-zero exit when diagnosticLogs is enabled', async () => {
229+
const callOrder: string[] = [];
230+
const configWithDiagnostics: WrapperConfig = {
231+
...baseConfig,
232+
diagnosticLogs: true,
233+
};
234+
const dependencies: WorkflowDependencies = {
235+
ensureFirewallNetwork: jest.fn().mockResolvedValue({ squidIp: '172.30.0.10' }),
236+
setupHostIptables: jest.fn().mockResolvedValue(undefined),
237+
writeConfigs: jest.fn().mockResolvedValue(undefined),
238+
startContainers: jest.fn().mockResolvedValue(undefined),
239+
runAgentCommand: jest.fn().mockImplementation(async () => {
240+
callOrder.push('runAgentCommand');
241+
return { exitCode: 1 };
242+
}),
243+
collectDiagnosticLogs: jest.fn().mockImplementation(async () => {
244+
callOrder.push('collectDiagnosticLogs');
245+
}),
246+
};
247+
const performCleanup = jest.fn().mockImplementation(async () => {
248+
callOrder.push('performCleanup');
249+
});
250+
const logger = createLogger();
251+
252+
await runMainWorkflow(configWithDiagnostics, dependencies, { logger, performCleanup });
253+
254+
expect(callOrder).toEqual(['runAgentCommand', 'collectDiagnosticLogs', 'performCleanup']);
255+
expect(dependencies.collectDiagnosticLogs).toHaveBeenCalledWith(configWithDiagnostics.workDir);
256+
});
257+
258+
it('does not call collectDiagnosticLogs when diagnosticLogs is disabled', async () => {
259+
const collectDiagnosticLogs = jest.fn().mockResolvedValue(undefined);
260+
const dependencies: WorkflowDependencies = {
261+
ensureFirewallNetwork: jest.fn().mockResolvedValue({ squidIp: '172.30.0.10' }),
262+
setupHostIptables: jest.fn().mockResolvedValue(undefined),
263+
writeConfigs: jest.fn().mockResolvedValue(undefined),
264+
startContainers: jest.fn().mockResolvedValue(undefined),
265+
runAgentCommand: jest.fn().mockResolvedValue({ exitCode: 1 }),
266+
collectDiagnosticLogs,
267+
};
268+
const logger = createLogger();
269+
270+
await runMainWorkflow(baseConfig, dependencies, { logger, performCleanup: jest.fn() });
271+
272+
expect(collectDiagnosticLogs).not.toHaveBeenCalled();
273+
});
274+
275+
it('does not call collectDiagnosticLogs on zero exit even when diagnosticLogs is enabled', async () => {
276+
const collectDiagnosticLogs = jest.fn().mockResolvedValue(undefined);
277+
const configWithDiagnostics: WrapperConfig = {
278+
...baseConfig,
279+
diagnosticLogs: true,
280+
};
281+
const dependencies: WorkflowDependencies = {
282+
ensureFirewallNetwork: jest.fn().mockResolvedValue({ squidIp: '172.30.0.10' }),
283+
setupHostIptables: jest.fn().mockResolvedValue(undefined),
284+
writeConfigs: jest.fn().mockResolvedValue(undefined),
285+
startContainers: jest.fn().mockResolvedValue(undefined),
286+
runAgentCommand: jest.fn().mockResolvedValue({ exitCode: 0 }),
287+
collectDiagnosticLogs,
288+
};
289+
const logger = createLogger();
290+
291+
await runMainWorkflow(configWithDiagnostics, dependencies, { logger, performCleanup: jest.fn() });
292+
293+
expect(collectDiagnosticLogs).not.toHaveBeenCalled();
294+
});
295+
296+
it('does not call collectDiagnosticLogs when dependency is not provided', async () => {
297+
const configWithDiagnostics: WrapperConfig = {
298+
...baseConfig,
299+
diagnosticLogs: true,
300+
};
301+
const dependencies: WorkflowDependencies = {
302+
ensureFirewallNetwork: jest.fn().mockResolvedValue({ squidIp: '172.30.0.10' }),
303+
setupHostIptables: jest.fn().mockResolvedValue(undefined),
304+
writeConfigs: jest.fn().mockResolvedValue(undefined),
305+
startContainers: jest.fn().mockResolvedValue(undefined),
306+
runAgentCommand: jest.fn().mockResolvedValue({ exitCode: 1 }),
307+
// collectDiagnosticLogs not provided
308+
};
309+
const logger = createLogger();
310+
311+
await expect(runMainWorkflow(configWithDiagnostics, dependencies, { logger, performCleanup: jest.fn() })).resolves.toBe(1);
312+
});
227313
});

src/cli-workflow.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ export interface WorkflowDependencies {
1414
proxyLogsDir?: string,
1515
agentTimeoutMinutes?: number
1616
) => Promise<{ exitCode: number }>;
17+
collectDiagnosticLogs?: (workDir: string) => Promise<void>;
1718
}
1819

1920
export interface WorkflowCallbacks {
@@ -76,6 +77,16 @@ export async function runMainWorkflow(
7677
// Step 3: Wait for agent to complete
7778
const result = await dependencies.runAgentCommand(config.workDir, config.allowedDomains, config.proxyLogsDir, config.agentTimeout);
7879

80+
// Step 3.5: Collect diagnostic logs before containers are stopped
81+
// Must run BEFORE performCleanup() which calls docker compose down -v.
82+
if (config.diagnosticLogs && result.exitCode !== 0 && dependencies.collectDiagnosticLogs) {
83+
try {
84+
await dependencies.collectDiagnosticLogs(config.workDir);
85+
} catch (error) {
86+
logger.warn('Failed to collect diagnostic logs; continuing with cleanup.', error);
87+
}
88+
}
89+
7990
// Step 4: Cleanup (logs will be preserved automatically if they exist)
8091
await performCleanup();
8192

src/cli.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import {
1515
cleanup,
1616
preserveIptablesAudit,
1717
fastKillAgentContainer,
18+
collectDiagnosticLogs,
1819
} from './docker-manager';
1920
import {
2021
ensureFirewallNetwork,
@@ -1484,6 +1485,13 @@ program
14841485
'--session-state-dir <path>',
14851486
'Directory to save Copilot CLI session state (events.jsonl, session data)'
14861487
)
1488+
.option(
1489+
'--diagnostic-logs',
1490+
'Collect container logs, exit state, and sanitized config on non-zero exit.\n' +
1491+
' Useful for debugging container startup failures (e.g. Squid crashes in DinD).\n' +
1492+
' Written to <workDir>/diagnostics/ (or <audit-dir>/diagnostics/ when set).',
1493+
false
1494+
)
14871495
.argument('[args...]', 'Command and arguments to execute (use -- to separate from options)')
14881496
.action(async (args: string[], options) => {
14891497
// Require -- separator for passing command arguments
@@ -1826,6 +1834,7 @@ program
18261834
difcProxyHost: options.difcProxyHost,
18271835
difcProxyCaCert: options.difcProxyCaCert,
18281836
githubToken: process.env.GITHUB_TOKEN || process.env.GH_TOKEN,
1837+
diagnosticLogs: options.diagnosticLogs || false,
18291838
};
18301839

18311840
// Parse and validate --agent-timeout
@@ -2006,6 +2015,7 @@ program
20062015
writeConfigs,
20072016
startContainers,
20082017
runAgentCommand,
2018+
collectDiagnosticLogs,
20092019
},
20102020
{
20112021
logger,

src/docker-manager.test.ts

Lines changed: 198 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { generateDockerCompose, subnetsOverlap, writeConfigs, startContainers, stopContainers, fastKillAgentContainer, isAgentExternallyKilled, resetAgentExternallyKilled, AGENT_CONTAINER_NAME, cleanup, runAgentCommand, validateIdNotInSystemRange, getSafeHostUid, getSafeHostGid, getRealUserHome, extractGhHostFromServerUrl, readGitHubPathEntries, mergeGitHubPathEntries, readEnvFile, MIN_REGULAR_UID, ACT_PRESET_BASE_IMAGE, stripScheme } from './docker-manager';
1+
import { generateDockerCompose, subnetsOverlap, writeConfigs, startContainers, stopContainers, fastKillAgentContainer, isAgentExternallyKilled, resetAgentExternallyKilled, AGENT_CONTAINER_NAME, cleanup, runAgentCommand, validateIdNotInSystemRange, getSafeHostUid, getSafeHostGid, getRealUserHome, extractGhHostFromServerUrl, readGitHubPathEntries, mergeGitHubPathEntries, readEnvFile, MIN_REGULAR_UID, ACT_PRESET_BASE_IMAGE, stripScheme, collectDiagnosticLogs } from './docker-manager';
22
import { WrapperConfig } from './types';
33
import * as fs from 'fs';
44
import * as path from 'path';
@@ -4222,4 +4222,201 @@ describe('docker-manager', () => {
42224222
expect(() => readEnvFile(path.join(tmpDir, 'missing.env'))).toThrow();
42234223
});
42244224
});
4225+
4226+
describe('collectDiagnosticLogs', () => {
4227+
let testDir: string;
4228+
4229+
beforeEach(() => {
4230+
testDir = fs.mkdtempSync(path.join(os.tmpdir(), 'awf-'));
4231+
jest.clearAllMocks();
4232+
});
4233+
4234+
afterEach(() => {
4235+
if (fs.existsSync(testDir)) {
4236+
fs.rmSync(testDir, { recursive: true, force: true });
4237+
}
4238+
});
4239+
4240+
it('should create diagnostics directory and write container logs', async () => {
4241+
// Mock docker logs returning content
4242+
mockExecaFn
4243+
.mockResolvedValueOnce({ stdout: 'squid log output', stderr: '', exitCode: 0 }) // docker logs awf-squid
4244+
.mockResolvedValueOnce({ stdout: '0 ', stderr: '', exitCode: 0 }) // docker inspect state awf-squid
4245+
.mockResolvedValueOnce({ stdout: '[{"Type":"bind"}]', stderr: '', exitCode: 0 }) // docker inspect mounts awf-squid
4246+
.mockResolvedValueOnce({ stdout: 'agent log output', stderr: '', exitCode: 0 }) // docker logs awf-agent
4247+
.mockResolvedValueOnce({ stdout: '1 container crashed', stderr: '', exitCode: 0 }) // docker inspect state awf-agent
4248+
.mockResolvedValueOnce({ stdout: '[{"Type":"volume"}]', stderr: '', exitCode: 0 }) // docker inspect mounts awf-agent
4249+
.mockResolvedValueOnce({ stdout: '', stderr: '', exitCode: 1 }) // docker logs awf-api-proxy (not started)
4250+
.mockResolvedValueOnce({ stdout: '', stderr: '', exitCode: 1 }) // docker inspect state awf-api-proxy
4251+
.mockResolvedValueOnce({ stdout: 'null', stderr: '', exitCode: 0 }) // docker inspect mounts awf-api-proxy (null)
4252+
.mockResolvedValueOnce({ stdout: '', stderr: '', exitCode: 0 }) // docker logs awf-iptables-init
4253+
.mockResolvedValueOnce({ stdout: '0 ', stderr: '', exitCode: 0 }) // docker inspect state awf-iptables-init
4254+
.mockResolvedValueOnce({ stdout: '[]', stderr: '', exitCode: 0 }); // docker inspect mounts awf-iptables-init
4255+
4256+
// Create a docker-compose.yml with a secret env var
4257+
const composeContent = [
4258+
'services:',
4259+
' squid:',
4260+
' environment:',
4261+
' AWF_SQUID_CONFIG_B64: secretvalue',
4262+
' GITHUB_TOKEN: ghp_abc123',
4263+
' SOME_KEY: mykey',
4264+
' NORMAL_VAR: normalvalue',
4265+
].join('\n');
4266+
fs.writeFileSync(path.join(testDir, 'docker-compose.yml'), composeContent);
4267+
4268+
await collectDiagnosticLogs(testDir);
4269+
4270+
const diagnosticsDir = path.join(testDir, 'diagnostics');
4271+
expect(fs.existsSync(diagnosticsDir)).toBe(true);
4272+
4273+
// awf-squid.log should have content
4274+
expect(fs.existsSync(path.join(diagnosticsDir, 'awf-squid.log'))).toBe(true);
4275+
expect(fs.readFileSync(path.join(diagnosticsDir, 'awf-squid.log'), 'utf8')).toContain('squid log output');
4276+
4277+
// awf-agent.log should have content
4278+
expect(fs.existsSync(path.join(diagnosticsDir, 'awf-agent.log'))).toBe(true);
4279+
expect(fs.readFileSync(path.join(diagnosticsDir, 'awf-agent.log'), 'utf8')).toContain('agent log output');
4280+
4281+
// State files should be written
4282+
expect(fs.existsSync(path.join(diagnosticsDir, 'awf-squid.state'))).toBe(true);
4283+
expect(fs.existsSync(path.join(diagnosticsDir, 'awf-agent.state'))).toBe(true);
4284+
4285+
// Mounts files for containers that returned non-null JSON
4286+
expect(fs.existsSync(path.join(diagnosticsDir, 'awf-squid.mounts.json'))).toBe(true);
4287+
expect(fs.existsSync(path.join(diagnosticsDir, 'awf-agent.mounts.json'))).toBe(true);
4288+
// awf-api-proxy returned 'null' so no mounts file
4289+
expect(fs.existsSync(path.join(diagnosticsDir, 'awf-api-proxy.mounts.json'))).toBe(false);
4290+
4291+
// Sanitized docker-compose.yml should exist with secrets redacted
4292+
const sanitizedCompose = fs.readFileSync(path.join(diagnosticsDir, 'docker-compose.yml'), 'utf8');
4293+
expect(sanitizedCompose).toContain('[REDACTED]');
4294+
// GITHUB_TOKEN and SOME_KEY contain TOKEN/KEY → redacted
4295+
expect(sanitizedCompose).not.toContain('ghp_abc123');
4296+
expect(sanitizedCompose).not.toContain('mykey');
4297+
// AWF_SQUID_CONFIG_B64 does not contain TOKEN/KEY/SECRET → preserved
4298+
expect(sanitizedCompose).toContain('secretvalue');
4299+
// Non-secret env vars should be unchanged
4300+
expect(sanitizedCompose).toContain('NORMAL_VAR: normalvalue');
4301+
});
4302+
4303+
it('should not write log file when docker logs returns empty output', async () => {
4304+
mockExecaFn
4305+
.mockResolvedValue({ stdout: '', stderr: '', exitCode: 0 }); // all containers return empty
4306+
4307+
await collectDiagnosticLogs(testDir);
4308+
4309+
const diagnosticsDir = path.join(testDir, 'diagnostics');
4310+
// No .log files should be written for empty output
4311+
const files = fs.existsSync(diagnosticsDir) ? fs.readdirSync(diagnosticsDir) : [];
4312+
const logFiles = files.filter(f => f.endsWith('.log'));
4313+
expect(logFiles).toHaveLength(0);
4314+
});
4315+
4316+
it('should skip docker-compose.yml sanitization when file does not exist', async () => {
4317+
mockExecaFn.mockResolvedValue({ stdout: '', stderr: '', exitCode: 0 });
4318+
4319+
// No docker-compose.yml created in testDir
4320+
await expect(collectDiagnosticLogs(testDir)).resolves.not.toThrow();
4321+
4322+
const diagnosticsDir = path.join(testDir, 'diagnostics');
4323+
expect(fs.existsSync(path.join(diagnosticsDir, 'docker-compose.yml'))).toBe(false);
4324+
});
4325+
4326+
it('should handle docker command failures gracefully', async () => {
4327+
// All docker commands throw errors
4328+
mockExecaFn.mockRejectedValue(new Error('docker not found'));
4329+
4330+
await expect(collectDiagnosticLogs(testDir)).resolves.not.toThrow();
4331+
});
4332+
4333+
it('should redact lowercase and mixed-case secret env var names', async () => {
4334+
mockExecaFn.mockResolvedValue({ stdout: '', stderr: '', exitCode: 0 });
4335+
4336+
const composeContent = [
4337+
'services:',
4338+
' agent:',
4339+
' environment:',
4340+
' github_token: ghp_lowercase',
4341+
' Api_Key: mixedcase_value',
4342+
' OAUTH_SECRET: uppercase_secret',
4343+
' not_sensitive: keepme',
4344+
].join('\n');
4345+
fs.writeFileSync(path.join(testDir, 'docker-compose.yml'), composeContent);
4346+
4347+
await collectDiagnosticLogs(testDir);
4348+
4349+
const sanitized = fs.readFileSync(path.join(testDir, 'diagnostics', 'docker-compose.yml'), 'utf8');
4350+
// All three secret patterns should be redacted
4351+
expect(sanitized).not.toContain('ghp_lowercase');
4352+
expect(sanitized).not.toContain('mixedcase_value');
4353+
expect(sanitized).not.toContain('uppercase_secret');
4354+
// Non-secret var must be preserved
4355+
expect(sanitized).toContain('not_sensitive: keepme');
4356+
});
4357+
});
4358+
4359+
describe('cleanup - diagnostics preservation', () => {
4360+
let testDir: string;
4361+
4362+
beforeEach(() => {
4363+
testDir = fs.mkdtempSync(path.join(os.tmpdir(), 'awf-'));
4364+
jest.clearAllMocks();
4365+
mockExecaSync.mockReturnValue({ stdout: '', stderr: '', exitCode: 0 });
4366+
});
4367+
4368+
afterEach(() => {
4369+
if (fs.existsSync(testDir)) {
4370+
fs.rmSync(testDir, { recursive: true, force: true });
4371+
}
4372+
const timestamp = path.basename(testDir).replace('awf-', '');
4373+
const diagDir = path.join(os.tmpdir(), `awf-diagnostics-${timestamp}`);
4374+
if (fs.existsSync(diagDir)) {
4375+
fs.rmSync(diagDir, { recursive: true, force: true });
4376+
}
4377+
});
4378+
4379+
it('should preserve diagnostics to /tmp when no auditDir is specified', async () => {
4380+
const diagnosticsDir = path.join(testDir, 'diagnostics');
4381+
fs.mkdirSync(diagnosticsDir, { recursive: true });
4382+
fs.writeFileSync(path.join(diagnosticsDir, 'awf-squid.log'), 'squid crashed\n');
4383+
4384+
await cleanup(testDir, false);
4385+
4386+
const timestamp = path.basename(testDir).replace('awf-', '');
4387+
const preserved = path.join(os.tmpdir(), `awf-diagnostics-${timestamp}`);
4388+
expect(fs.existsSync(preserved)).toBe(true);
4389+
expect(fs.readFileSync(path.join(preserved, 'awf-squid.log'), 'utf8')).toBe('squid crashed\n');
4390+
});
4391+
4392+
it('should co-locate diagnostics under auditDir/diagnostics when auditDir is specified', async () => {
4393+
const auditDir = fs.mkdtempSync(path.join(os.tmpdir(), 'awf-audit-test-'));
4394+
try {
4395+
const diagnosticsDir = path.join(testDir, 'diagnostics');
4396+
fs.mkdirSync(diagnosticsDir, { recursive: true });
4397+
fs.writeFileSync(path.join(diagnosticsDir, 'awf-agent.log'), 'agent output\n');
4398+
4399+
await cleanup(testDir, false, undefined, auditDir);
4400+
4401+
const auditDiagnosticsDir = path.join(auditDir, 'diagnostics');
4402+
expect(fs.existsSync(auditDiagnosticsDir)).toBe(true);
4403+
expect(fs.readFileSync(path.join(auditDiagnosticsDir, 'awf-agent.log'), 'utf8')).toBe('agent output\n');
4404+
expect(mockExecaSync).toHaveBeenCalledWith('chmod', ['-R', 'a+rX', auditDiagnosticsDir]);
4405+
} finally {
4406+
fs.rmSync(auditDir, { recursive: true, force: true });
4407+
}
4408+
});
4409+
4410+
it('should not create diagnostics destination when diagnostics dir is empty', async () => {
4411+
// Empty diagnostics dir
4412+
const diagnosticsDir = path.join(testDir, 'diagnostics');
4413+
fs.mkdirSync(diagnosticsDir, { recursive: true });
4414+
4415+
await cleanup(testDir, false);
4416+
4417+
const timestamp = path.basename(testDir).replace('awf-', '');
4418+
const preserved = path.join(os.tmpdir(), `awf-diagnostics-${timestamp}`);
4419+
expect(fs.existsSync(preserved)).toBe(false);
4420+
});
4421+
});
42254422
});

0 commit comments

Comments
 (0)