Skip to content

Commit 9c69ea6

Browse files
Mossakaclaude
andauthored
feat: add DLP scanning to block credential exfiltration in URLs (#1288)
Add opt-in --enable-dlp flag that configures Squid proxy URL regex ACLs to detect and block outbound requests containing credential patterns (GitHub tokens, OpenAI/Anthropic API keys, AWS keys, Slack tokens, etc.) in URLs. This protects against accidental credential leakage via query parameters, path segments, and encoded URL content. Closes #308 Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 0fd834a commit 9c69ea6

File tree

7 files changed

+552
-3
lines changed

7 files changed

+552
-3
lines changed

src/cli.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1037,6 +1037,12 @@ program
10371037
' WARNING: allows firewall bypass via docker run',
10381038
false
10391039
)
1040+
.option(
1041+
'--enable-dlp',
1042+
'Enable DLP (Data Loss Prevention) scanning to block credential\n' +
1043+
' exfiltration in outbound request URLs.',
1044+
false
1045+
)
10401046

10411047
// -- API Proxy --
10421048
.option(
@@ -1334,6 +1340,11 @@ program
13341340
logger.warn('⚠️ SSL Bump intercepts HTTPS traffic. Only use for trusted workloads.');
13351341
}
13361342

1343+
// Log DLP mode
1344+
if (options.enableDlp) {
1345+
logger.info('DLP scanning enabled - outbound requests will be scanned for credential patterns');
1346+
}
1347+
13371348
// Validate memory limit
13381349
const memoryLimit = parseMemoryLimit(options.memoryLimit);
13391350
if (memoryLimit.error) {
@@ -1376,6 +1387,7 @@ program
13761387
allowHostPorts: options.allowHostPorts,
13771388
sslBump: options.sslBump,
13781389
enableDind: options.enableDind,
1390+
enableDlp: options.enableDlp,
13791391
allowedUrls,
13801392
enableApiProxy: options.enableApiProxy,
13811393
openaiApiKey: process.env.OPENAI_API_KEY,

src/dlp.test.ts

Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
import { DLP_PATTERNS, scanForCredentials, generateDlpSquidConfig } from './dlp';
2+
3+
describe('DLP Patterns', () => {
4+
describe('DLP_PATTERNS', () => {
5+
it('should have at least 10 built-in patterns', () => {
6+
expect(DLP_PATTERNS.length).toBeGreaterThanOrEqual(10);
7+
});
8+
9+
it('should have name, description, and regex for each pattern', () => {
10+
for (const pattern of DLP_PATTERNS) {
11+
expect(pattern.name).toBeTruthy();
12+
expect(pattern.description).toBeTruthy();
13+
expect(pattern.regex).toBeTruthy();
14+
}
15+
});
16+
17+
it('should have valid regex patterns', () => {
18+
for (const pattern of DLP_PATTERNS) {
19+
expect(() => new RegExp(pattern.regex, 'i')).not.toThrow();
20+
}
21+
});
22+
});
23+
24+
describe('scanForCredentials', () => {
25+
// GitHub tokens
26+
it('should detect GitHub personal access token (ghp_)', () => {
27+
const matches = scanForCredentials(
28+
'https://api.example.com/data?token=ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij'
29+
);
30+
expect(matches).toContain('GitHub Personal Access Token (classic)');
31+
});
32+
33+
it('should detect GitHub OAuth token (gho_)', () => {
34+
const matches = scanForCredentials(
35+
'https://api.example.com/gho_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij/resource'
36+
);
37+
expect(matches).toContain('GitHub OAuth Access Token');
38+
});
39+
40+
it('should detect GitHub App installation token (ghs_)', () => {
41+
const matches = scanForCredentials(
42+
'https://api.example.com/?key=ghs_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij'
43+
);
44+
expect(matches).toContain('GitHub App Installation Token');
45+
});
46+
47+
it('should detect GitHub App user-to-server token (ghu_)', () => {
48+
const matches = scanForCredentials(
49+
'https://api.example.com/?key=ghu_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij'
50+
);
51+
expect(matches).toContain('GitHub App User-to-Server Token');
52+
});
53+
54+
it('should detect GitHub fine-grained PAT (github_pat_)', () => {
55+
const matches = scanForCredentials(
56+
'https://api.example.com/?key=github_pat_1234567890abcdefghijkl_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456'
57+
);
58+
expect(matches).toContain('GitHub Fine-Grained PAT');
59+
});
60+
61+
// OpenAI - use concatenation to avoid push protection triggering on test data
62+
it('should detect OpenAI API key (sk-...T3BlbkFJ)', () => {
63+
const fakeKey = 'sk-' + '1'.repeat(20) + 'T3BlbkFJ' + '2'.repeat(20);
64+
const matches = scanForCredentials(
65+
'https://api.example.com/?key=' + fakeKey
66+
);
67+
expect(matches).toContain('OpenAI API Key');
68+
});
69+
70+
it('should detect OpenAI project API key (sk-proj-)', () => {
71+
const matches = scanForCredentials(
72+
'https://api.example.com/?key=sk-proj-' + 'a'.repeat(50)
73+
);
74+
expect(matches).toContain('OpenAI Project API Key');
75+
});
76+
77+
// Anthropic
78+
it('should detect Anthropic API key (sk-ant-)', () => {
79+
const matches = scanForCredentials(
80+
'https://api.example.com/?key=sk-ant-' + 'a'.repeat(50)
81+
);
82+
expect(matches).toContain('Anthropic API Key');
83+
});
84+
85+
// AWS
86+
it('should detect AWS access key ID (AKIA)', () => {
87+
const matches = scanForCredentials(
88+
'https://api.example.com/?key=AKIAIOSFODNN7EXAMPLE'
89+
);
90+
expect(matches).toContain('AWS Access Key ID');
91+
});
92+
93+
// Google
94+
it('should detect Google API key (AIza)', () => {
95+
const matches = scanForCredentials(
96+
'https://api.example.com/?key=AIzaSyA' + 'a'.repeat(32)
97+
);
98+
expect(matches).toContain('Google API Key');
99+
});
100+
101+
// Slack - use concatenation to avoid push protection triggering on test data
102+
it('should detect Slack bot token (xoxb-)', () => {
103+
const fakeToken = 'xoxb-' + '1234567890' + '-' + '1234567890' + '-' + 'ABCDEFGHIJKLMNOPQRSTUV' + 'wx';
104+
const matches = scanForCredentials(
105+
'https://api.example.com/?token=' + fakeToken
106+
);
107+
expect(matches).toContain('Slack Bot Token');
108+
});
109+
110+
// Generic patterns
111+
it('should detect bearer token in URL parameter', () => {
112+
const matches = scanForCredentials(
113+
'https://api.example.com/data?bearer=abcdefghijklmnopqrstuvwxyz1234'
114+
);
115+
expect(matches).toContain('Bearer Token in URL');
116+
});
117+
118+
it('should detect authorization in URL parameter', () => {
119+
const matches = scanForCredentials(
120+
'https://api.example.com/data?authorization=abcdefghijklmnopqrstuvwxyz1234'
121+
);
122+
expect(matches).toContain('Authorization in URL');
123+
});
124+
125+
it('should detect private key markers', () => {
126+
const matches = scanForCredentials(
127+
'https://api.example.com/data?content=BEGIN+PRIVATE+KEY'
128+
);
129+
expect(matches).toContain('Private Key Marker');
130+
});
131+
132+
it('should detect URL-encoded private key markers', () => {
133+
const matches = scanForCredentials(
134+
'https://api.example.com/data?content=BEGIN%20PRIVATE%20KEY'
135+
);
136+
expect(matches).toContain('Private Key Marker');
137+
});
138+
139+
// Negative cases
140+
it('should not match short strings that look like token prefixes', () => {
141+
const matches = scanForCredentials('https://api.example.com/ghp_short');
142+
expect(matches).not.toContain('GitHub Personal Access Token (classic)');
143+
});
144+
145+
it('should return empty array for clean URLs', () => {
146+
const matches = scanForCredentials('https://api.github.com/repos/owner/repo');
147+
expect(matches).toHaveLength(0);
148+
});
149+
150+
it('should return empty array for empty string', () => {
151+
const matches = scanForCredentials('');
152+
expect(matches).toHaveLength(0);
153+
});
154+
155+
it('should not match normal domain names or paths', () => {
156+
const urls = [
157+
'https://github.com/settings/tokens',
158+
'https://api.openai.com/v1/chat/completions',
159+
'https://docs.anthropic.com/getting-started',
160+
'https://console.aws.amazon.com/',
161+
'https://slack.com/api/chat.postMessage',
162+
];
163+
for (const url of urls) {
164+
const matches = scanForCredentials(url);
165+
expect(matches).toHaveLength(0);
166+
}
167+
});
168+
169+
it('should detect multiple credential types in one URL', () => {
170+
const matches = scanForCredentials(
171+
'https://evil.com/?gh=ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij&aws=AKIAIOSFODNN7EXAMPLE'
172+
);
173+
expect(matches).toContain('GitHub Personal Access Token (classic)');
174+
expect(matches).toContain('AWS Access Key ID');
175+
expect(matches.length).toBeGreaterThanOrEqual(2);
176+
});
177+
});
178+
179+
describe('generateDlpSquidConfig', () => {
180+
it('should generate ACL lines for all patterns', () => {
181+
const { aclLines } = generateDlpSquidConfig();
182+
183+
// Should have header comments
184+
expect(aclLines[0]).toContain('DLP');
185+
186+
// Should have one url_regex ACL per pattern
187+
const aclEntries = aclLines.filter(l => l.startsWith('acl dlp_blocked'));
188+
expect(aclEntries.length).toBe(DLP_PATTERNS.length);
189+
190+
// Each ACL should use url_regex -i
191+
for (const entry of aclEntries) {
192+
expect(entry).toMatch(/^acl dlp_blocked url_regex -i .+/);
193+
}
194+
});
195+
196+
it('should generate deny access rules', () => {
197+
const { accessRules } = generateDlpSquidConfig();
198+
199+
expect(accessRules.some(r => r.includes('http_access deny dlp_blocked'))).toBe(true);
200+
});
201+
202+
it('should have a DLP comment in access rules', () => {
203+
const { accessRules } = generateDlpSquidConfig();
204+
expect(accessRules.some(r => r.includes('DLP'))).toBe(true);
205+
});
206+
207+
it('should produce valid Squid ACL syntax', () => {
208+
const { aclLines, accessRules } = generateDlpSquidConfig();
209+
210+
// All non-comment ACL lines should start with 'acl '
211+
for (const line of aclLines) {
212+
if (!line.startsWith('#')) {
213+
expect(line).toMatch(/^acl /);
214+
}
215+
}
216+
217+
// All non-comment access rules should start with 'http_access '
218+
for (const line of accessRules) {
219+
if (!line.startsWith('#')) {
220+
expect(line).toMatch(/^http_access /);
221+
}
222+
}
223+
});
224+
});
225+
});

0 commit comments

Comments
 (0)