Skip to content

Commit 75f8b1f

Browse files
sgomessirreal
andauthored
Add URL type detection to lib/url. (#37030)
* Add URL type detection to lib/url. It adds a new method, `determineUrlType`, which categorises a given URL into one of five types: absolute, protocol relative, root relative, path relative, and invalid. * Change type names to follow spec. See https://url.spec.whatwg.org/#urls * Add some more tests based on spec examples. * Add tests for naked params and fragments. * Add explicit return type to determineUrlType Co-Authored-By: Jon Surrell <[email protected]>
1 parent fcf11c3 commit 75f8b1f

File tree

3 files changed

+141
-0
lines changed

3 files changed

+141
-0
lines changed

client/lib/url/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ export { addQueryArgs } from 'lib/route';
1717
export { withoutHttp, urlToSlug, urlToDomainAndPath } from './http-utils';
1818
export { default as isExternal } from './is-external';
1919
export { default as resemblesUrl } from './resembles-url';
20+
export { URL_TYPE, determineUrlType } from './url-type';
2021

2122
/**
2223
* Check if a URL is located outside of Calypso.

client/lib/url/test/url-type.js

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
/**
2+
* @jest-environment jsdom
3+
*/
4+
5+
/**
6+
* Internal dependencies
7+
*/
8+
import { determineUrlType } from '../url-type';
9+
10+
describe( 'determineUrlType', () => {
11+
test( 'should detect the correct type for absolute URLs', () => {
12+
expect( determineUrlType( 'http://example.com/' ) ).toBe( 'ABSOLUTE' );
13+
expect( determineUrlType( 'http://www.example.com' ) ).toBe( 'ABSOLUTE' );
14+
expect( determineUrlType( 'http://example.com/bar' ) ).toBe( 'ABSOLUTE' );
15+
expect( determineUrlType( 'http://example.com/bar?baz=1' ) ).toBe( 'ABSOLUTE' );
16+
expect( determineUrlType( new URL( 'http://example.com' ) ) ).toBe( 'ABSOLUTE' );
17+
// From https://url.spec.whatwg.org/#urls
18+
expect( determineUrlType( 'https:example.org' ) ).toBe( 'ABSOLUTE' );
19+
expect( determineUrlType( 'https://////example.com///' ) ).toBe( 'ABSOLUTE' );
20+
expect( determineUrlType( 'https://example.com/././foo' ) ).toBe( 'ABSOLUTE' );
21+
expect( determineUrlType( 'hello:world' ) ).toBe( 'ABSOLUTE' );
22+
expect( determineUrlType( 'file:///C|/demo' ) ).toBe( 'ABSOLUTE' );
23+
expect( determineUrlType( 'file://loc%61lhost/' ) ).toBe( 'ABSOLUTE' );
24+
expect( determineUrlType( 'https://user:[email protected]/' ) ).toBe( 'ABSOLUTE' );
25+
expect( determineUrlType( 'https://example.org/foo bar' ) ).toBe( 'ABSOLUTE' );
26+
expect( determineUrlType( 'https://EXAMPLE.com/../x' ) ).toBe( 'ABSOLUTE' );
27+
} );
28+
29+
test( 'should detect the correct type for protocol-relative URLs', () => {
30+
expect( determineUrlType( '//example.com/' ) ).toBe( 'SCHEME_RELATIVE' );
31+
expect( determineUrlType( '//www.example.com' ) ).toBe( 'SCHEME_RELATIVE' );
32+
expect( determineUrlType( '//example.com/bar' ) ).toBe( 'SCHEME_RELATIVE' );
33+
expect( determineUrlType( '//example.com/bar?baz=1' ) ).toBe( 'SCHEME_RELATIVE' );
34+
} );
35+
36+
test( 'should detect the correct type for root-relative URLs', () => {
37+
expect( determineUrlType( '/' ) ).toBe( 'PATH_ABSOLUTE' );
38+
expect( determineUrlType( '/bar' ) ).toBe( 'PATH_ABSOLUTE' );
39+
expect( determineUrlType( '/bar?baz=1' ) ).toBe( 'PATH_ABSOLUTE' );
40+
} );
41+
42+
test( 'should detect the correct type for path-relative URLs', () => {
43+
expect( determineUrlType( '' ) ).toBe( 'PATH_RELATIVE' );
44+
expect( determineUrlType( 'bar' ) ).toBe( 'PATH_RELATIVE' );
45+
expect( determineUrlType( 'bar?baz=1' ) ).toBe( 'PATH_RELATIVE' );
46+
expect( determineUrlType( 'bar#anchor' ) ).toBe( 'PATH_RELATIVE' );
47+
expect( determineUrlType( '?query=param' ) ).toBe( 'PATH_RELATIVE' );
48+
expect( determineUrlType( '#fragment' ) ).toBe( 'PATH_RELATIVE' );
49+
} );
50+
51+
test( 'should detect the correct type for invalid URLs', () => {
52+
expect( determineUrlType( null ) ).toBe( 'INVALID' );
53+
expect( determineUrlType( 0 ) ).toBe( 'INVALID' );
54+
expect( determineUrlType( '///' ) ).toBe( 'INVALID' );
55+
// From https://url.spec.whatwg.org/#urls
56+
expect( determineUrlType( 'https://ex ample.org/' ) ).toBe( 'INVALID' );
57+
expect( determineUrlType( 'https://example.com:demo' ) ).toBe( 'INVALID' );
58+
expect( determineUrlType( 'http://[www.example.com]/' ) ).toBe( 'INVALID' );
59+
} );
60+
} );

client/lib/url/url-type.ts

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
/**
2+
* External dependencies
3+
*/
4+
import { URL as URLString } from 'types';
5+
import { Falsey } from 'utility-types';
6+
7+
// For complete definitions of these classifications, see:
8+
// https://url.spec.whatwg.org/#urls
9+
export enum URL_TYPE {
10+
// A complete URL, with (at least) protocol and host.
11+
// E.g. `http://example.com` or `http://example.com/path`
12+
ABSOLUTE = 'ABSOLUTE',
13+
// A URL with no protocol, but with a host.
14+
// E.g. `//example.com` or `//example.com/path`
15+
SCHEME_RELATIVE = 'SCHEME_RELATIVE',
16+
// A URL with no protocol or host, but with a path starting at the root.
17+
// E.g. `/` or `/path`
18+
PATH_ABSOLUTE = 'PATH_ABSOLUTE',
19+
// A URL with no protocol or host, but with a path relative to the current resource.
20+
// E.g. `../foo` or `bar`
21+
PATH_RELATIVE = 'PATH_RELATIVE',
22+
// Any invalid URL.
23+
// E.g. `///`
24+
INVALID = 'INVALID',
25+
}
26+
27+
const BASE_HOSTNAME = '__domain__.invalid';
28+
const BASE_URL = `http://${ BASE_HOSTNAME }`;
29+
30+
/**
31+
* Determine the type of a URL, with regards to its completeness.
32+
* @param url the URL to analyze
33+
*
34+
* @returns the type of the URL
35+
*/
36+
export function determineUrlType( url: URLString | URL | Falsey ): URL_TYPE {
37+
// As a URL, the empty string means "the current resource".
38+
if ( url === '' ) {
39+
return URL_TYPE.PATH_RELATIVE;
40+
}
41+
42+
// Any other falsey value is an invalid URL.
43+
if ( ! url ) {
44+
return URL_TYPE.INVALID;
45+
}
46+
47+
// The native URL object can only represent absolute URLs.
48+
if ( url instanceof URL ) {
49+
return URL_TYPE.ABSOLUTE;
50+
}
51+
52+
let parsed;
53+
54+
try {
55+
// If we can parse the URL without a base, it's an absolute URL.
56+
parsed = new URL( url );
57+
return URL_TYPE.ABSOLUTE;
58+
} catch {
59+
// Do nothing.
60+
}
61+
62+
try {
63+
parsed = new URL( url, BASE_URL );
64+
} catch {
65+
// If it can't be parsed even with a base URL, it's an invalid URL.
66+
return URL_TYPE.INVALID;
67+
}
68+
69+
// If we couldn't parse it without a base, but it didn't take the hostname we provided, that means
70+
// it's a protocol-relative URL.
71+
if ( parsed.hostname !== BASE_HOSTNAME ) {
72+
return URL_TYPE.SCHEME_RELATIVE;
73+
}
74+
75+
// Otherwise, it's a relative URL of some sort.
76+
if ( url.startsWith( '/' ) ) {
77+
return URL_TYPE.PATH_ABSOLUTE;
78+
}
79+
return URL_TYPE.PATH_RELATIVE;
80+
}

0 commit comments

Comments
 (0)