-
Notifications
You must be signed in to change notification settings - Fork 9
Enhance URL allow list matching #49
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 4 commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
4cf1569
More robust allow list matching
steven10a da7fc94
Handle trailing slashes in allow list
steven10a bf90130
Support schemeless matching
steven10a f52fb89
Port normalization and null-safety check
steven10a 28cabf9
Address copilot comments
steven10a ce6b65d
Better CIDR handling
steven10a 8f51f79
Update test description
steven10a File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,7 +4,7 @@ | |
|
|
||
| import { describe, it, expect } from 'vitest'; | ||
| import { keywordsCheck, KeywordsConfig } from '../../../checks/keywords'; | ||
| import { urls } from '../../../checks/urls'; | ||
| import { urls, UrlsConfig } from '../../../checks/urls'; | ||
| import { competitorsCheck } from '../../../checks/competitors'; | ||
| import { GuardrailResult } from '../../../types'; | ||
|
|
||
|
|
@@ -34,6 +34,16 @@ describe('keywords guardrail', () => { | |
| }); | ||
| }); | ||
|
|
||
| describe('UrlsConfig', () => { | ||
| it('normalizes allowed scheme inputs', () => { | ||
| const config = UrlsConfig.parse({ | ||
| allowed_schemes: ['HTTPS://', 'http:', ' https '], | ||
| }); | ||
|
|
||
| expect(Array.from(config.allowed_schemes).sort()).toEqual(['http', 'https']); | ||
| }); | ||
| }); | ||
|
|
||
| describe('urls guardrail', () => { | ||
| it('allows https URLs listed in the allow list', async () => { | ||
| const result = await urls( | ||
|
|
@@ -92,6 +102,326 @@ describe('urls guardrail', () => { | |
| expect(result.info?.blocked).toContain('https://other.com'); | ||
| expect(result.tripwireTriggered).toBe(true); | ||
| }); | ||
|
|
||
| it('allows full URLs with explicit paths in the allow list', async () => { | ||
| const text = [ | ||
| 'https://suntropy.es', | ||
| 'https://api.example.com/v1/tools?id=2', | ||
| 'https://api.example.com/v2', | ||
| ].join(' '); | ||
|
|
||
| const result = await urls( | ||
| {}, | ||
| text, | ||
| { | ||
| url_allow_list: ['https://suntropy.es', 'https://api.example.com/v1'], | ||
| allowed_schemes: new Set(['https']), | ||
| allow_subdomains: false, | ||
| block_userinfo: true, | ||
| } | ||
| ); | ||
|
|
||
| expect(result.info?.allowed).toEqual( | ||
| expect.arrayContaining([ | ||
| 'https://suntropy.es', | ||
| 'https://api.example.com/v1/tools?id=2', | ||
| ]) | ||
| ); | ||
| expect(result.info?.blocked).toContain('https://api.example.com/v2'); | ||
| }); | ||
|
|
||
| it('respects path segment boundaries to avoid prefix bypasses', async () => { | ||
| const text = [ | ||
| 'https://example.com/api', | ||
| 'https://example.com/api/users', | ||
| 'https://example.com/api2', | ||
| 'https://example.com/api-v2', | ||
| ].join(' '); | ||
|
|
||
| const result = await urls( | ||
| {}, | ||
| text, | ||
| { | ||
| url_allow_list: ['https://example.com/api'], | ||
| allowed_schemes: new Set(['https']), | ||
| allow_subdomains: false, | ||
| block_userinfo: true, | ||
| } | ||
| ); | ||
|
|
||
| expect(result.info?.allowed).toEqual( | ||
| expect.arrayContaining([ | ||
| 'https://example.com/api', | ||
| 'https://example.com/api/users', | ||
| ]) | ||
| ); | ||
| expect(result.info?.blocked).toEqual( | ||
| expect.arrayContaining([ | ||
| 'https://example.com/api2', | ||
| 'https://example.com/api-v2', | ||
| ]) | ||
| ); | ||
| }); | ||
|
|
||
| it('matches scheme-less allow list entries across configured schemes', async () => { | ||
| const text = ['https://example.com', 'http://example.com'].join(' '); | ||
|
|
||
| const result = await urls( | ||
| {}, | ||
| text, | ||
| { | ||
| url_allow_list: ['example.com'], | ||
| allowed_schemes: new Set(['https', 'http']), | ||
| allow_subdomains: false, | ||
| block_userinfo: true, | ||
| } | ||
| ); | ||
|
|
||
| expect(result.info?.allowed).toEqual( | ||
| expect.arrayContaining(['https://example.com', 'http://example.com']) | ||
| ); | ||
| expect(result.info?.blocked).toEqual([]); | ||
| }); | ||
|
|
||
| it('enforces explicit scheme matches when allow list entries include schemes', async () => { | ||
| const text = ['https://bank.example.com', 'http://bank.example.com'].join(' '); | ||
|
|
||
| const result = await urls( | ||
| {}, | ||
| text, | ||
| { | ||
| url_allow_list: ['https://bank.example.com'], | ||
| allowed_schemes: new Set(['https', 'http']), | ||
| allow_subdomains: false, | ||
| block_userinfo: true, | ||
| } | ||
| ); | ||
|
|
||
| expect(result.info?.allowed).toEqual(expect.arrayContaining(['https://bank.example.com'])); | ||
| expect(result.info?.blocked).toContain('http://bank.example.com'); | ||
| }); | ||
|
|
||
| it('supports CIDR ranges and explicit port matching', async () => { | ||
| const text = [ | ||
| 'https://10.5.5.5', | ||
| 'https://192.168.1.100', | ||
| 'https://192.168.2.1', | ||
| 'https://example.com:8443', | ||
| 'https://example.com', | ||
| 'https://api.internal.com:9000', | ||
| ].join(' '); | ||
|
|
||
| const result = await urls( | ||
| {}, | ||
| text, | ||
| { | ||
| url_allow_list: ['10.0.0.0/8', '192.168.1.0/24', 'https://example.com:8443', 'api.internal.com'], | ||
| allowed_schemes: new Set(['https']), | ||
| allow_subdomains: false, | ||
| block_userinfo: true, | ||
| } | ||
| ); | ||
|
|
||
| expect(result.info?.allowed).toEqual( | ||
| expect.arrayContaining([ | ||
| 'https://10.5.5.5', | ||
| 'https://192.168.1.100', | ||
| 'https://example.com:8443', | ||
| 'https://api.internal.com:9000', | ||
| ]) | ||
| ); | ||
| expect(result.info?.blocked).toEqual( | ||
| expect.arrayContaining(['https://192.168.2.1', 'https://example.com']) | ||
| ); | ||
| }); | ||
|
|
||
| it('requires query strings and fragments to match exactly when configured', async () => { | ||
| const text = [ | ||
| 'https://example.com/search?q=test', | ||
| 'https://example.com/search?q=other', | ||
| 'https://example.com/docs#intro', | ||
| 'https://example.com/docs#outro', | ||
| ].join(' '); | ||
|
|
||
| const result = await urls( | ||
| {}, | ||
| text, | ||
| { | ||
| url_allow_list: [ | ||
| 'https://example.com/search?q=test', | ||
| 'https://example.com/docs#intro', | ||
| ], | ||
| allowed_schemes: new Set(['https']), | ||
| allow_subdomains: false, | ||
| block_userinfo: true, | ||
| } | ||
| ); | ||
|
|
||
| expect(result.info?.allowed).toEqual( | ||
| expect.arrayContaining([ | ||
| 'https://example.com/search?q=test', | ||
| 'https://example.com/docs#intro', | ||
| ]) | ||
| ); | ||
| expect(result.info?.blocked).toEqual( | ||
| expect.arrayContaining([ | ||
| 'https://example.com/search?q=other', | ||
| 'https://example.com/docs#outro', | ||
| ]) | ||
| ); | ||
| }); | ||
|
|
||
| it('blocks URLs containing only a password in userinfo when configured', async () => { | ||
| const result = await urls( | ||
| {}, | ||
| 'https://:[email protected]', | ||
| { | ||
| url_allow_list: ['example.com'], | ||
| allowed_schemes: new Set(['https']), | ||
| allow_subdomains: false, | ||
| block_userinfo: true, | ||
| } | ||
| ); | ||
|
|
||
| expect(result.info?.blocked).toContain('https://:[email protected]'); | ||
| expect( | ||
| (result.info?.blocked_reasons as string[]).some((reason) => reason.includes('userinfo')) | ||
| ).toBe(true); | ||
| }); | ||
|
|
||
| it('handles malformed ports gracefully without crashing', async () => { | ||
| const text = [ | ||
| 'https://example.com:99999', | ||
| 'https://example.com:abc', | ||
| 'https://example.com:-1', | ||
| ].join(' '); | ||
|
|
||
| const result = await urls( | ||
| {}, | ||
| text, | ||
| { | ||
| url_allow_list: ['example.com'], | ||
| allowed_schemes: new Set(['https']), | ||
| allow_subdomains: false, | ||
| block_userinfo: true, | ||
| } | ||
| ); | ||
|
|
||
| expect(result.tripwireTriggered).toBe(true); | ||
| expect(result.info?.blocked).toHaveLength(3); | ||
| expect(result.info?.blocked_reasons).toHaveLength(3); | ||
| }); | ||
|
|
||
| it('handles trailing slashes in allow list paths correctly', async () => { | ||
| // Regression test: allow list entries with trailing slashes should match subpaths | ||
| // Previously, '/api/' + '/' created '/api//' which wouldn't match '/api/users' | ||
| const text = [ | ||
| 'https://example.com/api/users', | ||
| 'https://example.com/api/v2/data', | ||
| 'https://example.com/other', | ||
| ].join(' '); | ||
|
|
||
| const result = await urls( | ||
| {}, | ||
| text, | ||
| { | ||
| url_allow_list: ['https://example.com/api/'], | ||
| allowed_schemes: new Set(['https']), | ||
| allow_subdomains: false, | ||
| block_userinfo: true, | ||
| } | ||
| ); | ||
|
|
||
| expect(result.info?.allowed).toEqual( | ||
| expect.arrayContaining([ | ||
| 'https://example.com/api/users', | ||
| 'https://example.com/api/v2/data', | ||
| ]) | ||
| ); | ||
| expect(result.info?.blocked).toContain('https://example.com/other'); | ||
| }); | ||
|
|
||
| it('matches scheme-less URLs against scheme-qualified allow list entries', async () => { | ||
| // Test exact behavior: scheme-qualified allow list vs scheme-less/explicit URLs | ||
| const config = { | ||
| url_allow_list: ['https://suntropy.es'], | ||
| allowed_schemes: new Set(['https']), | ||
| allow_subdomains: false, | ||
| block_userinfo: true, | ||
| }; | ||
|
|
||
| // Test scheme-less URL (should be allowed) | ||
| const result1 = await urls({}, 'Visit suntropy.es', config); | ||
| expect(result1.info?.allowed).toContain('suntropy.es'); | ||
| expect(result1.tripwireTriggered).toBe(false); | ||
|
|
||
| // Test HTTPS URL (should match allow list scheme) | ||
| const result2 = await urls({}, 'Visit https://suntropy.es', config); | ||
| expect(result2.info?.allowed).toContain('https://suntropy.es'); | ||
| expect(result2.tripwireTriggered).toBe(false); | ||
|
|
||
| // Test HTTP URL (wrong explicit scheme should be blocked) | ||
| const result3 = await urls({}, 'Visit http://suntropy.es', config); | ||
| expect(result3.info?.blocked).toContain('http://suntropy.es'); | ||
| expect(result3.tripwireTriggered).toBe(true); | ||
| }); | ||
|
|
||
| it('blocks subdomains and paths correctly with scheme-qualified allow list', async () => { | ||
| // Verify subdomains and paths are still blocked according to allow list rules | ||
| const config = { | ||
| url_allow_list: ['https://suntropy.es'], | ||
| allowed_schemes: new Set(['https']), | ||
| allow_subdomains: false, | ||
| block_userinfo: true, | ||
| }; | ||
|
|
||
| const text = 'Visit help-suntropy.es and help.suntropy.es'; | ||
| const result = await urls({}, text, config); | ||
|
|
||
| // Both should be blocked - not in allow list | ||
| expect(result.tripwireTriggered).toBe(true); | ||
| expect(result.info?.blocked).toHaveLength(2); | ||
| expect(result.info?.blocked).toContain('help-suntropy.es'); | ||
| expect(result.info?.blocked).toContain('help.suntropy.es'); | ||
| }); | ||
|
|
||
| it('treats explicit default ports as equivalent to no port', async () => { | ||
| // URLs with explicit default ports should match allow list entries without ports | ||
| const config = { | ||
| url_allow_list: ['example.com'], | ||
| allowed_schemes: new Set(['https', 'http']), | ||
| allow_subdomains: false, | ||
| block_userinfo: true, | ||
| }; | ||
|
|
||
| const text = 'Visit https://example.com:443 and http://example.com:80'; | ||
| const result = await urls({}, text, config); | ||
|
|
||
| // Both should be allowed (443 is default for https, 80 is default for http) | ||
| expect(result.tripwireTriggered).toBe(false); | ||
| expect(result.info?.allowed).toContain('https://example.com:443'); | ||
| expect(result.info?.allowed).toContain('http://example.com:80'); | ||
| expect(result.info?.blocked).toEqual([]); | ||
| }); | ||
|
|
||
| it('blocks non-default ports when allow list has no port', async () => { | ||
| // URLs with non-default ports should be blocked when allow list doesn't specify a port | ||
| const config = { | ||
| url_allow_list: ['example.com'], | ||
| allowed_schemes: new Set(['https']), | ||
| allow_subdomains: false, | ||
| block_userinfo: true, | ||
| }; | ||
|
|
||
| const text = 'Visit https://example.com:8443 and https://example.com:9000'; | ||
| const result = await urls({}, text, config); | ||
|
|
||
| // Both should be allowed - when allow list has no port, any port is OK | ||
| // (This matches the behavior: no port restriction when not specified) | ||
| expect(result.tripwireTriggered).toBe(false); | ||
| expect(result.info?.allowed).toContain('https://example.com:8443'); | ||
| expect(result.info?.allowed).toContain('https://example.com:9000'); | ||
| }); | ||
| }); | ||
|
|
||
| describe('competitors guardrail', () => { | ||
|
|
||
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.