From 4cf1569fe80f0350d92859cd067c17ccfc44669d Mon Sep 17 00:00:00 2001 From: Steven C Date: Thu, 20 Nov 2025 12:51:23 -0500 Subject: [PATCH 1/7] More robust allow list matching --- .../unit/checks/keywords-urls.test.ts | 221 ++++++++++++- src/checks/urls.ts | 308 ++++++++++++++---- 2 files changed, 458 insertions(+), 71 deletions(-) diff --git a/src/__tests__/unit/checks/keywords-urls.test.ts b/src/__tests__/unit/checks/keywords-urls.test.ts index 8f5cfa5..59e7b8b 100644 --- a/src/__tests__/unit/checks/keywords-urls.test.ts +++ b/src/__tests__/unit/checks/keywords-urls.test.ts @@ -4,7 +4,7 @@ import { describe, it, expect } from 'vitest'; import { keywordsCheck, KeywordsConfig } from '../../../checks/keywords'; -import { urls } from '../../../checks/urls'; +import { urls, UrlsConfig } from '../../../checks/urls'; import { competitorsCheck } from '../../../checks/competitors'; import { GuardrailResult } from '../../../types'; @@ -34,6 +34,16 @@ describe('keywords guardrail', () => { }); }); +describe('UrlsConfig', () => { + it('normalizes allowed scheme inputs', () => { + const config = UrlsConfig.parse({ + allowed_schemes: ['HTTPS://', 'http:', ' https '], + }); + + expect(Array.from(config.allowed_schemes).sort()).toEqual(['http', 'https']); + }); +}); + describe('urls guardrail', () => { it('allows https URLs listed in the allow list', async () => { const result = await urls( @@ -92,6 +102,215 @@ describe('urls guardrail', () => { expect(result.info?.blocked).toContain('https://other.com'); expect(result.tripwireTriggered).toBe(true); }); + + it('allows full URLs with explicit paths in the allow list', async () => { + const text = [ + 'https://suntropy.es', + 'https://api.example.com/v1/tools?id=2', + 'https://api.example.com/v2', + ].join(' '); + + const result = await urls( + {}, + text, + { + url_allow_list: ['https://suntropy.es', 'https://api.example.com/v1'], + allowed_schemes: new Set(['https']), + allow_subdomains: false, + block_userinfo: true, + } + ); + + expect(result.info?.allowed).toEqual( + expect.arrayContaining([ + 'https://suntropy.es', + 'https://api.example.com/v1/tools?id=2', + ]) + ); + expect(result.info?.blocked).toContain('https://api.example.com/v2'); + }); + + it('respects path segment boundaries to avoid prefix bypasses', async () => { + const text = [ + 'https://example.com/api', + 'https://example.com/api/users', + 'https://example.com/api2', + 'https://example.com/api-v2', + ].join(' '); + + const result = await urls( + {}, + text, + { + url_allow_list: ['https://example.com/api'], + allowed_schemes: new Set(['https']), + allow_subdomains: false, + block_userinfo: true, + } + ); + + expect(result.info?.allowed).toEqual( + expect.arrayContaining([ + 'https://example.com/api', + 'https://example.com/api/users', + ]) + ); + expect(result.info?.blocked).toEqual( + expect.arrayContaining([ + 'https://example.com/api2', + 'https://example.com/api-v2', + ]) + ); + }); + + it('matches scheme-less allow list entries across configured schemes', async () => { + const text = ['https://example.com', 'http://example.com'].join(' '); + + const result = await urls( + {}, + text, + { + url_allow_list: ['example.com'], + allowed_schemes: new Set(['https', 'http']), + allow_subdomains: false, + block_userinfo: true, + } + ); + + expect(result.info?.allowed).toEqual( + expect.arrayContaining(['https://example.com', 'http://example.com']) + ); + expect(result.info?.blocked).toEqual([]); + }); + + it('enforces explicit scheme matches when allow list entries include schemes', async () => { + const text = ['https://bank.example.com', 'http://bank.example.com'].join(' '); + + const result = await urls( + {}, + text, + { + url_allow_list: ['https://bank.example.com'], + allowed_schemes: new Set(['https', 'http']), + allow_subdomains: false, + block_userinfo: true, + } + ); + + expect(result.info?.allowed).toEqual(expect.arrayContaining(['https://bank.example.com'])); + expect(result.info?.blocked).toContain('http://bank.example.com'); + }); + + it('supports CIDR ranges and explicit port matching', async () => { + const text = [ + 'https://10.5.5.5', + 'https://192.168.1.100', + 'https://192.168.2.1', + 'https://example.com:8443', + 'https://example.com', + 'https://api.internal.com:9000', + ].join(' '); + + const result = await urls( + {}, + text, + { + url_allow_list: ['10.0.0.0/8', '192.168.1.0/24', 'https://example.com:8443', 'api.internal.com'], + allowed_schemes: new Set(['https']), + allow_subdomains: false, + block_userinfo: true, + } + ); + + expect(result.info?.allowed).toEqual( + expect.arrayContaining([ + 'https://10.5.5.5', + 'https://192.168.1.100', + 'https://example.com:8443', + 'https://api.internal.com:9000', + ]) + ); + expect(result.info?.blocked).toEqual( + expect.arrayContaining(['https://192.168.2.1', 'https://example.com']) + ); + }); + + it('requires query strings and fragments to match exactly when configured', async () => { + const text = [ + 'https://example.com/search?q=test', + 'https://example.com/search?q=other', + 'https://example.com/docs#intro', + 'https://example.com/docs#outro', + ].join(' '); + + const result = await urls( + {}, + text, + { + url_allow_list: [ + 'https://example.com/search?q=test', + 'https://example.com/docs#intro', + ], + allowed_schemes: new Set(['https']), + allow_subdomains: false, + block_userinfo: true, + } + ); + + expect(result.info?.allowed).toEqual( + expect.arrayContaining([ + 'https://example.com/search?q=test', + 'https://example.com/docs#intro', + ]) + ); + expect(result.info?.blocked).toEqual( + expect.arrayContaining([ + 'https://example.com/search?q=other', + 'https://example.com/docs#outro', + ]) + ); + }); + + it('blocks URLs containing only a password in userinfo when configured', async () => { + const result = await urls( + {}, + 'https://:secret@example.com', + { + url_allow_list: ['example.com'], + allowed_schemes: new Set(['https']), + allow_subdomains: false, + block_userinfo: true, + } + ); + + expect(result.info?.blocked).toContain('https://:secret@example.com'); + expect( + (result.info?.blocked_reasons as string[]).some((reason) => reason.includes('userinfo')) + ).toBe(true); + }); + + it('handles malformed ports gracefully without crashing', async () => { + const text = [ + 'https://example.com:99999', + 'https://example.com:abc', + 'https://example.com:-1', + ].join(' '); + + const result = await urls( + {}, + text, + { + url_allow_list: ['example.com'], + allowed_schemes: new Set(['https']), + allow_subdomains: false, + block_userinfo: true, + } + ); + + expect(result.tripwireTriggered).toBe(true); + expect(result.info?.blocked).toHaveLength(3); + expect(result.info?.blocked_reasons).toHaveLength(3); + }); }); describe('competitors guardrail', () => { diff --git a/src/checks/urls.ts b/src/checks/urls.ts index 5725830..621057b 100644 --- a/src/checks/urls.ts +++ b/src/checks/urls.ts @@ -9,6 +9,55 @@ import { z } from 'zod'; import { CheckFn } from '../types'; import { defaultSpecRegistry } from '../registry'; +const DEFAULT_PORTS: Record = { + http: 80, + https: 443, +}; + +const SCHEME_PREFIX_RE = /^[a-z][a-z0-9+.-]*:\/\//i; +const HOSTLESS_SCHEMES = new Set(['data', 'javascript', 'vbscript', 'mailto']); + +function normalizeAllowedSchemes(value: unknown): Set { + if (value === undefined || value === null) { + return new Set(['https']); + } + + let rawValues: unknown[]; + if (typeof value === 'string') { + rawValues = [value]; + } else if (value instanceof Set) { + rawValues = Array.from(value.values()); + } else if (Array.isArray(value)) { + rawValues = value; + } else { + throw new Error('allowed_schemes entries must be strings'); + } + + const normalized = new Set(); + for (const entry of rawValues) { + if (typeof entry !== 'string') { + throw new Error('allowed_schemes entries must be strings'); + } + let cleaned = entry.trim().toLowerCase(); + if (!cleaned) { + continue; + } + if (cleaned.endsWith('://')) { + cleaned = cleaned.slice(0, -3); + } + cleaned = cleaned.replace(/:+$/, ''); + if (cleaned) { + normalized.add(cleaned); + } + } + + if (normalized.size === 0) { + throw new Error('allowed_schemes must include at least one scheme'); + } + + return normalized; +} + /** * Configuration schema for URL filtering. */ @@ -17,7 +66,7 @@ export const UrlsConfig = z.object({ url_allow_list: z.array(z.string()).default([]), /** Allowed URL schemes/protocols (default: HTTPS only for security) */ allowed_schemes: z - .preprocess((val) => (Array.isArray(val) ? new Set(val) : val), z.set(z.string())) + .preprocess((val) => normalizeAllowedSchemes(val), z.set(z.string())) .default(new Set(['https'])), /** Block URLs with userinfo (user:pass@domain) to prevent credential injection */ block_userinfo: z.boolean().default(true), @@ -45,6 +94,35 @@ function ipToInt(ip: string): number { return (parts[0] << 24) + (parts[1] << 16) + (parts[2] << 8) + parts[3]; } +function extractHostCandidate(url: string): string | null { + if (!url.includes('://')) { + return null; + } + + const [, rest] = url.split('://', 2); + if (!rest) { + return null; + } + + const hostAndRest = rest.split(/[/?#]/, 1)[0]; + const withoutCreds = hostAndRest.includes('@') + ? hostAndRest.split('@').pop() ?? '' + : hostAndRest; + if (!withoutCreds) { + return null; + } + + if (withoutCreds.startsWith('[')) { + const closingIndex = withoutCreds.indexOf(']'); + if (closingIndex !== -1) { + return withoutCreds.slice(0, closingIndex + 1); + } + return withoutCreds; + } + + return withoutCreds.split(':', 1)[0]; +} + /** * Detect URLs in text using robust regex patterns. */ @@ -129,8 +207,12 @@ function detectUrls(text: string): string[] { schemeUrlDomains.add(bareDomain); } } catch { - // Skip URLs with parsing errors (malformed URLs, encoding issues) - // This is expected for edge cases and doesn't require logging + const fallbackHost = extractHostCandidate(url); + if (fallbackHost) { + const normalizedHost = fallbackHost.toLowerCase(); + schemeUrlDomains.add(normalizedHost); + schemeUrlDomains.add(normalizedHost.replace(/^www\./, '')); + } } finalUrls.push(url); } @@ -152,7 +234,11 @@ function detectUrls(text: string): string[] { } /** - * Validate URL against security configuration. + * Validate URL security properties using WHATWG URL parsing. + * + * Ensures scheme compliance, hostname presence (for host-based schemes), and + * blocks userinfo when configured. Returns structured errors for guardrail + * reporting while keeping the parsed URL when valid. */ function validateUrlSecurity( urlString: string, @@ -166,14 +252,14 @@ function validateUrlSecurity( if (urlString.includes('://')) { // Standard URL with double-slash scheme (http://, https://, ftp://, etc.) parsedUrl = new URL(urlString); - originalScheme = parsedUrl.protocol.replace(':', ''); + originalScheme = parsedUrl.protocol.replace(/:$/, ''); } else if ( urlString.includes(':') && urlString.split(':', 1)[0].match(/^(data|javascript|vbscript|mailto)$/) ) { // Special single-colon schemes parsedUrl = new URL(urlString); - originalScheme = parsedUrl.protocol.replace(':', ''); + originalScheme = parsedUrl.protocol.replace(/:$/, ''); } else { // Add http scheme for parsing, but remember this is a default parsedUrl = new URL(`http://${urlString}`); @@ -186,17 +272,19 @@ function validateUrlSecurity( } // Special schemes like data: and javascript: don't need hostname - const specialSchemes = new Set(['data:', 'javascript:', 'vbscript:', 'mailto:']); - if (!specialSchemes.has(parsedUrl.protocol) && !parsedUrl.hostname) { + const parsedScheme = parsedUrl.protocol.replace(/:$/, '').toLowerCase(); + if (!HOSTLESS_SCHEMES.has(parsedScheme) && !parsedUrl.hostname) { return { parsedUrl: null, reason: 'Invalid URL format' }; } // Security validations - use original scheme - if (!config.allowed_schemes.has(originalScheme)) { - return { parsedUrl: null, reason: `Blocked scheme: ${originalScheme}` }; + const normalizedScheme = originalScheme.toLowerCase(); + + if (!config.allowed_schemes.has(normalizedScheme)) { + return { parsedUrl: null, reason: `Blocked scheme: ${normalizedScheme}` }; } - if (config.block_userinfo && parsedUrl.username) { + if (config.block_userinfo && (parsedUrl.username || parsedUrl.password)) { return { parsedUrl: null, reason: 'Contains userinfo (potential credential injection)' }; } @@ -204,8 +292,37 @@ function validateUrlSecurity( return { parsedUrl, reason: '' }; } catch (error) { // Provide specific error information for debugging + const errorName = error instanceof Error ? error.name : 'Error'; const errorMessage = error instanceof Error ? error.message : String(error); - return { parsedUrl: null, reason: `Invalid URL format: ${errorMessage}` }; + return { parsedUrl: null, reason: `URL parsing error: ${errorName}: ${errorMessage}` }; + } +} + +function safeGetPort(parsed: URL, scheme: string): number | null { + if (parsed.port) { + const portNumber = Number(parsed.port); + if (Number.isInteger(portNumber) && portNumber >= 0 && portNumber <= 65535) { + return portNumber; + } + return null; + } + + if (scheme) { + const defaultPort = DEFAULT_PORTS[scheme as keyof typeof DEFAULT_PORTS]; + if (typeof defaultPort === 'number') { + return defaultPort; + } + } + + return null; +} + +function isIpv4Address(value: string): boolean { + try { + ipToInt(value); + return true; + } catch { + return false; } } @@ -222,82 +339,133 @@ function isUrlAllowed(parsedUrl: URL, allowList: string[], allowSubdomains: bool return false; } - for (const allowedEntry of allowList) { - const entry = allowedEntry.toLowerCase().trim(); + const urlDomain = urlHost.replace(/^www\./, ''); + const schemeLower = parsedUrl.protocol ? parsedUrl.protocol.replace(/:$/, '').toLowerCase() : ''; + const urlPort = safeGetPort(parsedUrl, schemeLower); + const hostIndicatesPort = Boolean(parsedUrl.host) && parsedUrl.host.includes(':') && !parsedUrl.host.startsWith('['); + if (urlPort === null && hostIndicatesPort) { + return false; + } - // Handle full URLs with specific paths - if (entry.includes('://')) { - try { - const allowedUrl = new URL(entry); - const allowedHost = allowedUrl.hostname?.toLowerCase(); - const allowedPath = allowedUrl.pathname; - - if (urlHost === allowedHost) { - // Check if the URL path starts with the allowed path - if (!allowedPath || allowedPath === '/' || parsedUrl.pathname.startsWith(allowedPath)) { - return true; - } - } - } catch (error) { - // Invalid URL in allow list - log warning for configuration issues - console.warn( - `Warning: Invalid URL in allow list: "${entry}" - ${error instanceof Error ? error.message : error}` - ); - } + const urlPath = parsedUrl.pathname || '/'; + const urlQuery = parsedUrl.search ? parsedUrl.search.slice(1) : ''; + const urlFragment = parsedUrl.hash ? parsedUrl.hash.slice(1) : ''; + const urlIsIp = isIpv4Address(urlHost); + const urlIpInt = urlIsIp ? ipToInt(urlHost) : null; + + for (const allowedEntry of allowList) { + const normalizedEntry = allowedEntry.toLowerCase().trim(); + if (!normalizedEntry) { continue; } - // Handle IP addresses and CIDR blocks + const hasExplicitScheme = SCHEME_PREFIX_RE.test(normalizedEntry); + + let parsedAllowed: URL; try { - // Basic IP pattern check - if (/^\d+\.\d+\.\d+\.\d+/.test(entry.split('/')[0])) { - if (entry === urlHost) { - return true; + parsedAllowed = hasExplicitScheme + ? new URL(normalizedEntry) + : new URL(`http://${normalizedEntry}`); + } catch (error) { + console.warn( + `Warning: Invalid URL in allow list: "${normalizedEntry}" - ${error instanceof Error ? error.message : error}` + ); + continue; + } + + const allowedHost = (parsedAllowed.hostname || '').toLowerCase(); + if (!allowedHost) { + continue; + } + + const allowedScheme = hasExplicitScheme ? parsedAllowed.protocol.replace(/:$/, '').toLowerCase() : ''; + const allowedPort = safeGetPort(parsedAllowed, allowedScheme); + const allowIndicatesPort = parsedAllowed.host.includes(':') && !parsedAllowed.host.startsWith('['); + if (allowedPort === null && allowIndicatesPort) { + continue; + } + + const allowedPath = parsedAllowed.pathname || ''; + const allowedQuery = parsedAllowed.search ? parsedAllowed.search.slice(1) : ''; + const allowedFragment = parsedAllowed.hash ? parsedAllowed.hash.slice(1) : ''; + + const allowedHostIsIp = isIpv4Address(allowedHost); + if (allowedHostIsIp) { + if (!urlIsIp || urlIpInt === null) { + continue; + } + + if (hasExplicitScheme && allowedScheme && allowedScheme !== schemeLower) { + continue; + } + + if (allowedPort !== null) { + if (urlPort === null || allowedPort !== urlPort) { + continue; } - // Proper CIDR validation - if (entry.includes('/') && urlHost.match(/^\d+\.\d+\.\d+\.\d+$/)) { - const [network, prefixStr] = entry.split('/'); - const prefix = parseInt(prefixStr); + } - if (prefix >= 0 && prefix <= 32) { - // Convert IPs to 32-bit integers for bitwise comparison - const networkInt = ipToInt(network); - const hostInt = ipToInt(urlHost); + if (ipToInt(allowedHost) === urlIpInt) { + return true; + } - // Create subnet mask - const mask = (0xffffffff << (32 - prefix)) >>> 0; + let networkSpec = allowedHost; + if (allowedPath && allowedPath !== '/') { + networkSpec = `${networkSpec}${allowedPath}`; + } - // Check if host is in the network - if ((networkInt & mask) === (hostInt & mask)) { + if (networkSpec.includes('/')) { + const [network, prefixStr] = networkSpec.split('/'); + const prefix = Number(prefixStr); + if (Number.isInteger(prefix) && prefix >= 0 && prefix <= 32) { + try { + const networkInt = ipToInt(network); + const mask = prefix === 0 ? 0 : (0xffffffff << (32 - prefix)) >>> 0; + if ((networkInt & mask) === (urlIpInt & mask)) { return true; } + } catch { + // Invalid CIDR entry; ignore. } } - continue; } - } catch (error) { - // Expected: entry is not an IP address/CIDR, continue to domain matching - // Only log if it looks like it was intended to be an IP but failed parsing - if (/^\d+\.\d+/.test(entry)) { - console.warn( - `Warning: Malformed IP address in allow list: "${entry}" - ${error instanceof Error ? error.message : error}` - ); + + continue; + } + + const allowedDomain = allowedHost.replace(/^www\./, ''); + + if (allowedPort !== null) { + if (urlPort === null || allowedPort !== urlPort) { + continue; } } - // Handle domain matching - const allowedDomain = entry.replace(/^www\./, ''); - const urlDomain = urlHost.replace(/^www\./, ''); + const hostMatches = + urlDomain === allowedDomain || (allowSubdomains && urlDomain.endsWith(`.${allowedDomain}`)); + if (!hostMatches) { + continue; + } + + if (hasExplicitScheme && allowedScheme && allowedScheme !== schemeLower) { + continue; + } + + if (allowedPath && allowedPath !== '/') { + if (urlPath !== allowedPath && !urlPath.startsWith(`${allowedPath}/`)) { + continue; + } + } - // Exact match always allowed - if (urlDomain === allowedDomain) { - return true; + if (allowedQuery && allowedQuery !== urlQuery) { + continue; } - // Subdomain matching if enabled - if (allowSubdomains && urlDomain.endsWith(`.${allowedDomain}`)) { - return true; + if (allowedFragment && allowedFragment !== urlFragment) { + continue; } + + return true; } return false; @@ -329,8 +497,8 @@ export const urls: CheckFn = async (ctx, data, // Check against allow list // Special schemes (data:, javascript:, mailto:) don't have meaningful hosts // so they only need scheme validation, not host-based allow list checking - const hostlessSchemes = new Set(['data:', 'javascript:', 'vbscript:', 'mailto:']); - if (hostlessSchemes.has(parsedUrl.protocol)) { + const parsedScheme = parsedUrl.protocol.replace(/:$/, '').toLowerCase(); + if (HOSTLESS_SCHEMES.has(parsedScheme)) { // For hostless schemes, only scheme permission matters (no allow list needed) // They were already validated for scheme permission in validateUrlSecurity allowed.push(urlString); @@ -349,7 +517,7 @@ export const urls: CheckFn = async (ctx, data, return { tripwireTriggered: tripwireTriggered, info: { - guardrail_name: 'URL Filter (Direct Config)', + guardrail_name: 'URL Filter', config: { allowed_schemes: Array.from(actualConfig.allowed_schemes), block_userinfo: actualConfig.block_userinfo, From da7fc94a20dd1e62ec20c083bba094a26f2ae611 Mon Sep 17 00:00:00 2001 From: Steven C Date: Thu, 20 Nov 2025 12:57:48 -0500 Subject: [PATCH 2/7] Handle trailing slashes in allow list --- .../unit/checks/keywords-urls.test.ts | 29 +++++++++++++++++++ src/checks/urls.ts | 8 ++++- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/src/__tests__/unit/checks/keywords-urls.test.ts b/src/__tests__/unit/checks/keywords-urls.test.ts index 59e7b8b..1288d62 100644 --- a/src/__tests__/unit/checks/keywords-urls.test.ts +++ b/src/__tests__/unit/checks/keywords-urls.test.ts @@ -311,6 +311,35 @@ describe('urls guardrail', () => { expect(result.info?.blocked).toHaveLength(3); expect(result.info?.blocked_reasons).toHaveLength(3); }); + + it('handles trailing slashes in allow list paths correctly', async () => { + // Regression test: allow list entries with trailing slashes should match subpaths + // Previously, '/api/' + '/' created '/api//' which wouldn't match '/api/users' + const text = [ + 'https://example.com/api/users', + 'https://example.com/api/v2/data', + 'https://example.com/other', + ].join(' '); + + const result = await urls( + {}, + text, + { + url_allow_list: ['https://example.com/api/'], + allowed_schemes: new Set(['https']), + allow_subdomains: false, + block_userinfo: true, + } + ); + + expect(result.info?.allowed).toEqual( + expect.arrayContaining([ + 'https://example.com/api/users', + 'https://example.com/api/v2/data', + ]) + ); + expect(result.info?.blocked).toContain('https://example.com/other'); + }); }); describe('competitors guardrail', () => { diff --git a/src/checks/urls.ts b/src/checks/urls.ts index 621057b..63fd7fc 100644 --- a/src/checks/urls.ts +++ b/src/checks/urls.ts @@ -452,7 +452,13 @@ function isUrlAllowed(parsedUrl: URL, allowList: string[], allowSubdomains: bool } if (allowedPath && allowedPath !== '/') { - if (urlPath !== allowedPath && !urlPath.startsWith(`${allowedPath}/`)) { + // Normalize trailing slashes to avoid double-slash issues when checking subpaths + // e.g., if allowedPath is "/api/", we normalize to "/api" before adding "/" + // so we check "/api/" not "/api//" when matching "/api/users" + const normalizedAllowedPath = allowedPath.replace(/\/+$/, ''); + const normalizedUrlPath = urlPath.replace(/\/+$/, ''); + + if (normalizedUrlPath !== normalizedAllowedPath && !normalizedUrlPath.startsWith(`${normalizedAllowedPath}/`)) { continue; } } From bf90130d48f82c0d669611fe260341c314f6e003 Mon Sep 17 00:00:00 2001 From: Steven C Date: Thu, 20 Nov 2025 13:48:16 -0500 Subject: [PATCH 3/7] Support schemeless matching --- .../unit/checks/keywords-urls.test.ts | 44 ++++++++++++++++++ src/checks/urls.ts | 46 +++++++++++++------ 2 files changed, 75 insertions(+), 15 deletions(-) diff --git a/src/__tests__/unit/checks/keywords-urls.test.ts b/src/__tests__/unit/checks/keywords-urls.test.ts index 1288d62..cf69e4a 100644 --- a/src/__tests__/unit/checks/keywords-urls.test.ts +++ b/src/__tests__/unit/checks/keywords-urls.test.ts @@ -340,6 +340,50 @@ describe('urls guardrail', () => { ); expect(result.info?.blocked).toContain('https://example.com/other'); }); + + it('matches scheme-less URLs against scheme-qualified allow list entries', async () => { + // Test exact behavior: scheme-qualified allow list vs scheme-less/explicit URLs + const config = { + url_allow_list: ['https://suntropy.es'], + allowed_schemes: new Set(['https']), + allow_subdomains: false, + block_userinfo: true, + }; + + // Test scheme-less URL (should be allowed) + const result1 = await urls({}, 'Visit suntropy.es', config); + expect(result1.info?.allowed).toContain('suntropy.es'); + expect(result1.tripwireTriggered).toBe(false); + + // Test HTTPS URL (should match allow list scheme) + const result2 = await urls({}, 'Visit https://suntropy.es', config); + expect(result2.info?.allowed).toContain('https://suntropy.es'); + expect(result2.tripwireTriggered).toBe(false); + + // Test HTTP URL (wrong explicit scheme should be blocked) + const result3 = await urls({}, 'Visit http://suntropy.es', config); + expect(result3.info?.blocked).toContain('http://suntropy.es'); + expect(result3.tripwireTriggered).toBe(true); + }); + + it('blocks subdomains and paths correctly with scheme-qualified allow list', async () => { + // Verify subdomains and paths are still blocked according to allow list rules + const config = { + url_allow_list: ['https://suntropy.es'], + allowed_schemes: new Set(['https']), + allow_subdomains: false, + block_userinfo: true, + }; + + const text = 'Visit help-suntropy.es and help.suntropy.es'; + const result = await urls({}, text, config); + + // Both should be blocked - not in allow list + expect(result.tripwireTriggered).toBe(true); + expect(result.info?.blocked).toHaveLength(2); + expect(result.info?.blocked).toContain('help-suntropy.es'); + expect(result.info?.blocked).toContain('help.suntropy.es'); + }); }); describe('competitors guardrail', () => { diff --git a/src/checks/urls.ts b/src/checks/urls.ts index 63fd7fc..9f03a4d 100644 --- a/src/checks/urls.ts +++ b/src/checks/urls.ts @@ -243,16 +243,18 @@ function detectUrls(text: string): string[] { function validateUrlSecurity( urlString: string, config: UrlsConfig -): { parsedUrl: URL | null; reason: string } { +): { parsedUrl: URL | null; reason: string; hadScheme: boolean } { try { let parsedUrl: URL; let originalScheme: string; + let hadScheme: boolean; // Parse URL - preserve original scheme for validation if (urlString.includes('://')) { // Standard URL with double-slash scheme (http://, https://, ftp://, etc.) parsedUrl = new URL(urlString); originalScheme = parsedUrl.protocol.replace(/:$/, ''); + hadScheme = true; } else if ( urlString.includes(':') && urlString.split(':', 1)[0].match(/^(data|javascript|vbscript|mailto)$/) @@ -260,41 +262,44 @@ function validateUrlSecurity( // Special single-colon schemes parsedUrl = new URL(urlString); originalScheme = parsedUrl.protocol.replace(/:$/, ''); + hadScheme = true; } else { // Add http scheme for parsing, but remember this is a default parsedUrl = new URL(`http://${urlString}`); originalScheme = 'http'; // Default scheme for scheme-less URLs + hadScheme = false; } // Basic validation: must have scheme and hostname (except for special schemes) if (!parsedUrl.protocol) { - return { parsedUrl: null, reason: 'Invalid URL format' }; + return { parsedUrl: null, reason: 'Invalid URL format', hadScheme: false }; } // Special schemes like data: and javascript: don't need hostname const parsedScheme = parsedUrl.protocol.replace(/:$/, '').toLowerCase(); if (!HOSTLESS_SCHEMES.has(parsedScheme) && !parsedUrl.hostname) { - return { parsedUrl: null, reason: 'Invalid URL format' }; + return { parsedUrl: null, reason: 'Invalid URL format', hadScheme }; } // Security validations - use original scheme + // Only check allowed_schemes if the URL explicitly had a scheme const normalizedScheme = originalScheme.toLowerCase(); - if (!config.allowed_schemes.has(normalizedScheme)) { - return { parsedUrl: null, reason: `Blocked scheme: ${normalizedScheme}` }; + if (hadScheme && !config.allowed_schemes.has(normalizedScheme)) { + return { parsedUrl: null, reason: `Blocked scheme: ${normalizedScheme}`, hadScheme }; } if (config.block_userinfo && (parsedUrl.username || parsedUrl.password)) { - return { parsedUrl: null, reason: 'Contains userinfo (potential credential injection)' }; + return { parsedUrl: null, reason: 'Contains userinfo (potential credential injection)', hadScheme }; } // Everything else (IPs, localhost, private IPs) goes through allow list logic - return { parsedUrl, reason: '' }; + return { parsedUrl, reason: '', hadScheme }; } catch (error) { // Provide specific error information for debugging const errorName = error instanceof Error ? error.name : 'Error'; const errorMessage = error instanceof Error ? error.message : String(error); - return { parsedUrl: null, reason: `URL parsing error: ${errorName}: ${errorMessage}` }; + return { parsedUrl: null, reason: `URL parsing error: ${errorName}: ${errorMessage}`, hadScheme: false }; } } @@ -328,8 +333,13 @@ function isIpv4Address(value: string): boolean { /** * Check if URL is allowed based on the allow list configuration. + * + * @param parsedUrl - The parsed URL to check + * @param allowList - List of allowed URL patterns + * @param allowSubdomains - Whether to allow subdomains + * @param hadScheme - Whether the original URL had an explicit scheme */ -function isUrlAllowed(parsedUrl: URL, allowList: string[], allowSubdomains: boolean): boolean { +function isUrlAllowed(parsedUrl: URL, allowList: string[], allowSubdomains: boolean, hadScheme: boolean): boolean { if (allowList.length === 0) { return false; } @@ -395,11 +405,14 @@ function isUrlAllowed(parsedUrl: URL, allowList: string[], allowSubdomains: bool continue; } - if (hasExplicitScheme && allowedScheme && allowedScheme !== schemeLower) { + // Scheme matching for IPs: only enforce when BOTH allow list entry AND URL have explicit schemes + if (hasExplicitScheme && hadScheme && allowedScheme && allowedScheme !== schemeLower) { continue; } - if (allowedPort !== null) { + // Port matching: only enforce when allow list entry explicitly specifies a port + // Check parsedAllowed.port (empty string when no port specified) not allowedPort (always has default) + if (parsedAllowed.port) { if (urlPort === null || allowedPort !== urlPort) { continue; } @@ -435,7 +448,9 @@ function isUrlAllowed(parsedUrl: URL, allowList: string[], allowSubdomains: bool const allowedDomain = allowedHost.replace(/^www\./, ''); - if (allowedPort !== null) { + // Port matching: only enforce when allow list entry explicitly specifies a port + // Check parsedAllowed.port (empty string when no port specified) not allowedPort (always has default) + if (parsedAllowed.port) { if (urlPort === null || allowedPort !== urlPort) { continue; } @@ -447,7 +462,8 @@ function isUrlAllowed(parsedUrl: URL, allowList: string[], allowSubdomains: bool continue; } - if (hasExplicitScheme && allowedScheme && allowedScheme !== schemeLower) { + // Scheme matching for domains: only enforce when BOTH allow list entry AND URL have explicit schemes + if (hasExplicitScheme && hadScheme && allowedScheme && allowedScheme !== schemeLower) { continue; } @@ -492,7 +508,7 @@ export const urls: CheckFn = async (ctx, data, for (const urlString of detectedUrls) { // Validate URL with security checks - const { parsedUrl, reason } = validateUrlSecurity(urlString, actualConfig); + const { parsedUrl, reason, hadScheme } = validateUrlSecurity(urlString, actualConfig); if (parsedUrl === null) { blocked.push(urlString); @@ -509,7 +525,7 @@ export const urls: CheckFn = async (ctx, data, // They were already validated for scheme permission in validateUrlSecurity allowed.push(urlString); } else if ( - isUrlAllowed(parsedUrl, actualConfig.url_allow_list, actualConfig.allow_subdomains) + isUrlAllowed(parsedUrl, actualConfig.url_allow_list, actualConfig.allow_subdomains, hadScheme) ) { allowed.push(urlString); } else { From f52fb8946600d38548e260e39e6983c41f729639 Mon Sep 17 00:00:00 2001 From: Steven C Date: Thu, 20 Nov 2025 14:27:34 -0500 Subject: [PATCH 4/7] Port normalization and null-safety check --- .../unit/checks/keywords-urls.test.ts | 38 +++++++++++++++++++ src/checks/urls.ts | 32 +++++++++++----- 2 files changed, 61 insertions(+), 9 deletions(-) diff --git a/src/__tests__/unit/checks/keywords-urls.test.ts b/src/__tests__/unit/checks/keywords-urls.test.ts index cf69e4a..6ba3627 100644 --- a/src/__tests__/unit/checks/keywords-urls.test.ts +++ b/src/__tests__/unit/checks/keywords-urls.test.ts @@ -384,6 +384,44 @@ describe('urls guardrail', () => { expect(result.info?.blocked).toContain('help-suntropy.es'); expect(result.info?.blocked).toContain('help.suntropy.es'); }); + + it('treats explicit default ports as equivalent to no port', async () => { + // URLs with explicit default ports should match allow list entries without ports + const config = { + url_allow_list: ['example.com'], + allowed_schemes: new Set(['https', 'http']), + allow_subdomains: false, + block_userinfo: true, + }; + + const text = 'Visit https://example.com:443 and http://example.com:80'; + const result = await urls({}, text, config); + + // Both should be allowed (443 is default for https, 80 is default for http) + expect(result.tripwireTriggered).toBe(false); + expect(result.info?.allowed).toContain('https://example.com:443'); + expect(result.info?.allowed).toContain('http://example.com:80'); + expect(result.info?.blocked).toEqual([]); + }); + + it('blocks non-default ports when allow list has no port', async () => { + // URLs with non-default ports should be blocked when allow list doesn't specify a port + const config = { + url_allow_list: ['example.com'], + allowed_schemes: new Set(['https']), + allow_subdomains: false, + block_userinfo: true, + }; + + const text = 'Visit https://example.com:8443 and https://example.com:9000'; + const result = await urls({}, text, config); + + // Both should be allowed - when allow list has no port, any port is OK + // (This matches the behavior: no port restriction when not specified) + expect(result.tripwireTriggered).toBe(false); + expect(result.info?.allowed).toContain('https://example.com:8443'); + expect(result.info?.allowed).toContain('https://example.com:9000'); + }); }); describe('competitors guardrail', () => { diff --git a/src/checks/urls.ts b/src/checks/urls.ts index 9f03a4d..b631d59 100644 --- a/src/checks/urls.ts +++ b/src/checks/urls.ts @@ -390,7 +390,7 @@ function isUrlAllowed(parsedUrl: URL, allowList: string[], allowSubdomains: bool const allowedScheme = hasExplicitScheme ? parsedAllowed.protocol.replace(/:$/, '').toLowerCase() : ''; const allowedPort = safeGetPort(parsedAllowed, allowedScheme); - const allowIndicatesPort = parsedAllowed.host.includes(':') && !parsedAllowed.host.startsWith('['); + const allowIndicatesPort = Boolean(parsedAllowed.host) && parsedAllowed.host.includes(':') && !parsedAllowed.host.startsWith('['); if (allowedPort === null && allowIndicatesPort) { continue; } @@ -410,10 +410,17 @@ function isUrlAllowed(parsedUrl: URL, allowList: string[], allowSubdomains: bool continue; } - // Port matching: only enforce when allow list entry explicitly specifies a port - // Check parsedAllowed.port (empty string when no port specified) not allowedPort (always has default) - if (parsedAllowed.port) { - if (urlPort === null || allowedPort !== urlPort) { + // Port matching: only enforce when allow list entry explicitly specifies a non-default port + // Explicit default ports (e.g., :443 for https) should be treated as no port specified + const allowedHasNonDefaultPort = parsedAllowed.port && + (allowedPort !== DEFAULT_PORTS[allowedScheme as keyof typeof DEFAULT_PORTS]); + + if (allowedHasNonDefaultPort) { + // Allow list has explicit non-default port, so URL must match exactly + const urlHasNonDefaultPort = parsedUrl.port && + (urlPort !== DEFAULT_PORTS[schemeLower as keyof typeof DEFAULT_PORTS]); + + if (!urlHasNonDefaultPort || allowedPort !== urlPort) { continue; } } @@ -448,10 +455,17 @@ function isUrlAllowed(parsedUrl: URL, allowList: string[], allowSubdomains: bool const allowedDomain = allowedHost.replace(/^www\./, ''); - // Port matching: only enforce when allow list entry explicitly specifies a port - // Check parsedAllowed.port (empty string when no port specified) not allowedPort (always has default) - if (parsedAllowed.port) { - if (urlPort === null || allowedPort !== urlPort) { + // Port matching: only enforce when allow list entry explicitly specifies a non-default port + // Explicit default ports (e.g., :443 for https) should be treated as no port specified + const allowedHasNonDefaultPort = parsedAllowed.port && + (allowedPort !== DEFAULT_PORTS[allowedScheme as keyof typeof DEFAULT_PORTS]); + + if (allowedHasNonDefaultPort) { + // Allow list has explicit non-default port, so URL must match exactly + const urlHasNonDefaultPort = parsedUrl.port && + (urlPort !== DEFAULT_PORTS[schemeLower as keyof typeof DEFAULT_PORTS]); + + if (!urlHasNonDefaultPort || allowedPort !== urlPort) { continue; } } From 28cabf9c62b4066d72db7bebff4e1fc9c01dbd57 Mon Sep 17 00:00:00 2001 From: Steven C Date: Thu, 20 Nov 2025 14:53:46 -0500 Subject: [PATCH 5/7] Address copilot comments --- src/checks/urls.ts | 76 +++++++++++++++++++++++++++++----------------- 1 file changed, 48 insertions(+), 28 deletions(-) diff --git a/src/checks/urls.ts b/src/checks/urls.ts index b631d59..7a8cbb9 100644 --- a/src/checks/urls.ts +++ b/src/checks/urls.ts @@ -14,7 +14,7 @@ const DEFAULT_PORTS: Record = { https: 443, }; -const SCHEME_PREFIX_RE = /^[a-z][a-z0-9+.-]*:\/\//i; +const SCHEME_PREFIX_RE = /^[a-z][a-z0-9+.-]*:\/\//; const HOSTLESS_SCHEMES = new Set(['data', 'javascript', 'vbscript', 'mailto']); function normalizeAllowedSchemes(value: unknown): Set { @@ -30,7 +30,7 @@ function normalizeAllowedSchemes(value: unknown): Set { } else if (Array.isArray(value)) { rawValues = value; } else { - throw new Error('allowed_schemes entries must be strings'); + throw new Error('allowed_schemes must be a string, Set, or Array'); } const normalized = new Set(); @@ -331,6 +331,44 @@ function isIpv4Address(value: string): boolean { } } +/** + * Check if port matching should block the URL. + * + * Only enforces port matching when the allow list entry explicitly specifies + * a non-default port. Explicit default ports (e.g., :443 for https) are + * treated as equivalent to no port being specified. + * + * @param urlPort - The URL's port number (or default for its scheme) + * @param urlParsed - The parsed URL object + * @param allowedPort - The allow list entry's port number (or default for its scheme) + * @param allowedParsed - The parsed allow list entry URL object + * @param urlScheme - The URL's scheme + * @param allowedScheme - The allow list entry's scheme + * @returns true if the port doesn't match and should be blocked, false otherwise + */ +function shouldBlockDueToPortMismatch( + urlPort: number | null, + urlParsed: URL, + allowedPort: number | null, + allowedParsed: URL, + urlScheme: string, + allowedScheme: string +): boolean { + // Only enforce port matching when allow list entry explicitly specifies a non-default port + const allowedHasNonDefaultPort = allowedParsed.port && + (allowedPort !== DEFAULT_PORTS[allowedScheme as keyof typeof DEFAULT_PORTS]); + + if (!allowedHasNonDefaultPort) { + return false; // No port restriction when allow list has no non-default port + } + + // Allow list has explicit non-default port, so URL must match exactly + const urlHasNonDefaultPort = urlParsed.port && + (urlPort !== DEFAULT_PORTS[urlScheme as keyof typeof DEFAULT_PORTS]); + + return !urlHasNonDefaultPort || allowedPort !== urlPort; +} + /** * Check if URL is allowed based on the allow list configuration. * @@ -406,23 +444,13 @@ function isUrlAllowed(parsedUrl: URL, allowList: string[], allowSubdomains: bool } // Scheme matching for IPs: only enforce when BOTH allow list entry AND URL have explicit schemes - if (hasExplicitScheme && hadScheme && allowedScheme && allowedScheme !== schemeLower) { + if (hasExplicitScheme && hadScheme && allowedScheme !== schemeLower) { continue; } // Port matching: only enforce when allow list entry explicitly specifies a non-default port - // Explicit default ports (e.g., :443 for https) should be treated as no port specified - const allowedHasNonDefaultPort = parsedAllowed.port && - (allowedPort !== DEFAULT_PORTS[allowedScheme as keyof typeof DEFAULT_PORTS]); - - if (allowedHasNonDefaultPort) { - // Allow list has explicit non-default port, so URL must match exactly - const urlHasNonDefaultPort = parsedUrl.port && - (urlPort !== DEFAULT_PORTS[schemeLower as keyof typeof DEFAULT_PORTS]); - - if (!urlHasNonDefaultPort || allowedPort !== urlPort) { - continue; - } + if (shouldBlockDueToPortMismatch(urlPort, parsedUrl, allowedPort, parsedAllowed, schemeLower, allowedScheme)) { + continue; } if (ipToInt(allowedHost) === urlIpInt) { @@ -456,18 +484,8 @@ function isUrlAllowed(parsedUrl: URL, allowList: string[], allowSubdomains: bool const allowedDomain = allowedHost.replace(/^www\./, ''); // Port matching: only enforce when allow list entry explicitly specifies a non-default port - // Explicit default ports (e.g., :443 for https) should be treated as no port specified - const allowedHasNonDefaultPort = parsedAllowed.port && - (allowedPort !== DEFAULT_PORTS[allowedScheme as keyof typeof DEFAULT_PORTS]); - - if (allowedHasNonDefaultPort) { - // Allow list has explicit non-default port, so URL must match exactly - const urlHasNonDefaultPort = parsedUrl.port && - (urlPort !== DEFAULT_PORTS[schemeLower as keyof typeof DEFAULT_PORTS]); - - if (!urlHasNonDefaultPort || allowedPort !== urlPort) { - continue; - } + if (shouldBlockDueToPortMismatch(urlPort, parsedUrl, allowedPort, parsedAllowed, schemeLower, allowedScheme)) { + continue; } const hostMatches = @@ -477,10 +495,12 @@ function isUrlAllowed(parsedUrl: URL, allowList: string[], allowSubdomains: bool } // Scheme matching for domains: only enforce when BOTH allow list entry AND URL have explicit schemes - if (hasExplicitScheme && hadScheme && allowedScheme && allowedScheme !== schemeLower) { + if (hasExplicitScheme && hadScheme && allowedScheme !== schemeLower) { continue; } + // Path matching: only enforce when allow list entry explicitly specifies a non-root path + // Note: Empty string ('') and root ('/') are both treated as "no path restriction" if (allowedPath && allowedPath !== '/') { // Normalize trailing slashes to avoid double-slash issues when checking subpaths // e.g., if allowedPath is "/api/", we normalize to "/api" before adding "/" From ce6b65d2ffc8e65c16deb76ae70381144e0642c4 Mon Sep 17 00:00:00 2001 From: Steven C Date: Thu, 20 Nov 2025 15:05:57 -0500 Subject: [PATCH 6/7] Better CIDR handling --- .../unit/checks/keywords-urls.test.ts | 52 +++++++++++++++ src/checks/urls.ts | 63 ++++++++++++------- 2 files changed, 94 insertions(+), 21 deletions(-) diff --git a/src/__tests__/unit/checks/keywords-urls.test.ts b/src/__tests__/unit/checks/keywords-urls.test.ts index 6ba3627..5e62658 100644 --- a/src/__tests__/unit/checks/keywords-urls.test.ts +++ b/src/__tests__/unit/checks/keywords-urls.test.ts @@ -422,6 +422,58 @@ describe('urls guardrail', () => { expect(result.info?.allowed).toContain('https://example.com:8443'); expect(result.info?.allowed).toContain('https://example.com:9000'); }); + + it('accepts CIDR /0 with 0.0.0.0 network address', async () => { + // 0.0.0.0/0 should match all IPs + const config = { + url_allow_list: ['0.0.0.0/0'], + allowed_schemes: new Set(['https']), + allow_subdomains: false, + block_userinfo: true, + }; + + const text = 'Visit https://1.2.3.4 and https://192.168.1.1'; + const result = await urls({}, text, config); + + expect(result.tripwireTriggered).toBe(false); + expect(result.info?.allowed).toContain('https://1.2.3.4'); + expect(result.info?.allowed).toContain('https://192.168.1.1'); + }); + + it('rejects CIDR /0 with non-zero network address', async () => { + // 10.0.0.0/0 is ambiguous - /0 should only use 0.0.0.0 + const config = { + url_allow_list: ['10.0.0.0/0'], + allowed_schemes: new Set(['https']), + allow_subdomains: false, + block_userinfo: true, + }; + + const text = 'Visit https://10.5.5.5 and https://192.168.1.1'; + const result = await urls({}, text, config); + + // Should block both because 10.0.0.0/0 is invalid (emits warning) + expect(result.tripwireTriggered).toBe(true); + expect(result.info?.blocked).toContain('https://10.5.5.5'); + expect(result.info?.blocked).toContain('https://192.168.1.1'); + }); + + it('rejects invalid CIDR prefix values', async () => { + // Test various invalid CIDR prefixes + const config = { + url_allow_list: ['10.0.0.0/33', '192.168.0.0/-1', '172.16.0.0/abc'], + allowed_schemes: new Set(['https']), + allow_subdomains: false, + block_userinfo: true, + }; + + const text = 'Visit https://10.5.5.5 and https://192.168.1.1 and https://172.16.1.1'; + const result = await urls({}, text, config); + + // All should be blocked due to invalid CIDR configurations + expect(result.tripwireTriggered).toBe(true); + expect(result.info?.blocked).toHaveLength(3); + }); }); describe('competitors guardrail', () => { diff --git a/src/checks/urls.ts b/src/checks/urls.ts index 7a8cbb9..c712d3d 100644 --- a/src/checks/urls.ts +++ b/src/checks/urls.ts @@ -407,6 +407,47 @@ function isUrlAllowed(parsedUrl: URL, allowList: string[], allowSubdomains: bool continue; } + // Handle CIDR notation before URL parsing + // CIDR blocks like "10.0.0.0/8" should not be parsed as URLs + const cidrMatch = normalizedEntry.match(/^(\d+\.\d+\.\d+\.\d+)\/(\d+)$/); + if (cidrMatch) { + // Only match against IP URLs + if (!urlIsIp || urlIpInt === null) { + continue; + } + + const [, network, prefixStr] = cidrMatch; + const prefix = Number(prefixStr); + + if (!Number.isInteger(prefix) || prefix < 0 || prefix > 32) { + console.warn(`Warning: Invalid CIDR prefix in allow list: "${normalizedEntry}"`); + continue; + } + + // Validate /0 must use 0.0.0.0 for clarity + // Any other network address with /0 is ambiguous and likely a configuration error + if (prefix === 0 && network !== '0.0.0.0') { + console.warn( + `Warning: CIDR /0 prefix must use 0.0.0.0, not "${network}". Entry: "${normalizedEntry}"` + ); + continue; + } + + try { + const networkInt = ipToInt(network); + const mask = prefix === 0 ? 0 : (0xffffffff << (32 - prefix)) >>> 0; + if ((networkInt & mask) === (urlIpInt & mask)) { + return true; + } + } catch (error) { + console.warn( + `Warning: Invalid CIDR network address in allow list: "${normalizedEntry}" - ${error instanceof Error ? error.message : error}` + ); + } + + continue; // Skip URL parsing for CIDR entries + } + const hasExplicitScheme = SCHEME_PREFIX_RE.test(normalizedEntry); let parsedAllowed: URL; @@ -453,31 +494,11 @@ function isUrlAllowed(parsedUrl: URL, allowList: string[], allowSubdomains: bool continue; } + // Exact IP match if (ipToInt(allowedHost) === urlIpInt) { return true; } - let networkSpec = allowedHost; - if (allowedPath && allowedPath !== '/') { - networkSpec = `${networkSpec}${allowedPath}`; - } - - if (networkSpec.includes('/')) { - const [network, prefixStr] = networkSpec.split('/'); - const prefix = Number(prefixStr); - if (Number.isInteger(prefix) && prefix >= 0 && prefix <= 32) { - try { - const networkInt = ipToInt(network); - const mask = prefix === 0 ? 0 : (0xffffffff << (32 - prefix)) >>> 0; - if ((networkInt & mask) === (urlIpInt & mask)) { - return true; - } - } catch { - // Invalid CIDR entry; ignore. - } - } - } - continue; } From 8f51f79919b79e54503e39775de88382f1e25348 Mon Sep 17 00:00:00 2001 From: Steven C Date: Thu, 20 Nov 2025 15:13:14 -0500 Subject: [PATCH 7/7] Update test description --- src/__tests__/unit/checks/keywords-urls.test.ts | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/__tests__/unit/checks/keywords-urls.test.ts b/src/__tests__/unit/checks/keywords-urls.test.ts index 5e62658..6a293f9 100644 --- a/src/__tests__/unit/checks/keywords-urls.test.ts +++ b/src/__tests__/unit/checks/keywords-urls.test.ts @@ -404,8 +404,8 @@ describe('urls guardrail', () => { expect(result.info?.blocked).toEqual([]); }); - it('blocks non-default ports when allow list has no port', async () => { - // URLs with non-default ports should be blocked when allow list doesn't specify a port + it('allows any port when allow list entry has no port specification', async () => { + // When the allow list entry omits a port, URLs with any port (default or non-default) are allowed const config = { url_allow_list: ['example.com'], allowed_schemes: new Set(['https']), @@ -417,7 +417,6 @@ describe('urls guardrail', () => { const result = await urls({}, text, config); // Both should be allowed - when allow list has no port, any port is OK - // (This matches the behavior: no port restriction when not specified) expect(result.tripwireTriggered).toBe(false); expect(result.info?.allowed).toContain('https://example.com:8443'); expect(result.info?.allowed).toContain('https://example.com:9000');