Skip to content
This repository was archived by the owner on Sep 21, 2021. It is now read-only.

Commit 815265c

Browse files
Zhang-JunzhijasonLaster
authored andcommitted
Introduces unicode-url shim (#1049)
1 parent 03e9ff8 commit 815265c

File tree

4 files changed

+350
-1
lines changed

4 files changed

+350
-1
lines changed

packages/devtools-modules/index.js

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ const KeyShortcuts = require("./src/key-shortcuts");
1010
const { ZoomKeys } = require("./src/zoom-keys");
1111
const EventEmitter = require("./src/utils/event-emitter");
1212
const SourceUtils = require("./src/source-utils");
13+
const { getUnicodeHostname, getUnicodeUrlPath, getUnicodeUrl } =
14+
require("./src/unicode-url");
1315

1416
module.exports = {
1517
KeyShortcuts,
@@ -20,4 +22,7 @@ module.exports = {
2022
ZoomKeys,
2123
EventEmitter,
2224
SourceUtils,
25+
getUnicodeHostname,
26+
getUnicodeUrlPath,
27+
getUnicodeUrl,
2328
};

packages/devtools-modules/package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@
1010
"author": "",
1111
"license": "MPL-2.0",
1212
"dependencies": {
13-
"jest": "^19.0.2"
13+
"jest": "^19.0.2",
14+
"punycode": "^2.1.0"
1415
},
1516
"files": ["src"],
1617
"jest": {
Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,228 @@
1+
/* This Source Code Form is subject to the terms of the Mozilla Public
2+
* License, v. 2.0. If a copy of the MPL was not distributed with this
3+
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4+
5+
const { getUnicodeUrl, getUnicodeUrlPath, getUnicodeHostname } =
6+
require("../unicode-url");
7+
8+
describe("unicode-url", () => {
9+
// List of URLs used to test Unicode URL conversion
10+
const TEST_URLS = [
11+
// Type: Readable ASCII URLs
12+
// Expected: All of Unicode versions should equal to the raw.
13+
{
14+
raw: "https://example.org",
15+
expectedUnicode: "https://example.org",
16+
},
17+
{
18+
raw: "http://example.org",
19+
expectedUnicode: "http://example.org",
20+
},
21+
{
22+
raw: "ftp://example.org",
23+
expectedUnicode: "ftp://example.org",
24+
},
25+
{
26+
raw: "https://example.org.",
27+
expectedUnicode: "https://example.org.",
28+
},
29+
{
30+
raw: "https://example.org/",
31+
expectedUnicode: "https://example.org/",
32+
},
33+
{
34+
raw: "https://example.org/test",
35+
expectedUnicode: "https://example.org/test",
36+
},
37+
{
38+
raw: "https://example.org/test.html",
39+
expectedUnicode: "https://example.org/test.html",
40+
},
41+
{
42+
raw: "https://example.org/test.html?one=1&two=2",
43+
expectedUnicode: "https://example.org/test.html?one=1&two=2",
44+
},
45+
{
46+
raw: "https://example.org/test.html#here",
47+
expectedUnicode: "https://example.org/test.html#here",
48+
},
49+
{
50+
raw: "https://example.org/test.html?one=1&two=2#here",
51+
expectedUnicode: "https://example.org/test.html?one=1&two=2#here",
52+
},
53+
// Type: Unreadable URLs with either Punycode domain names or URI-encoded
54+
// paths
55+
// Expected: Unreadable domain names and URI-encoded paths should be converted
56+
// to readable Unicode.
57+
{
58+
raw: "https://xn--g6w.xn--8pv/test.html",
59+
// Do not type Unicode characters directly, because this test file isn't
60+
// specified with a known encoding.
61+
expectedUnicode: "https://\u6e2c.\u672c/test.html",
62+
},
63+
{
64+
raw: "https://example.org/%E6%B8%AC%E8%A9%A6.html",
65+
// Do not type Unicode characters directly, because this test file isn't
66+
// specified with a known encoding.
67+
expectedUnicode: "https://example.org/\u6e2c\u8a66.html",
68+
},
69+
{
70+
raw: "https://example.org/test.html?One=%E4%B8%80",
71+
// Do not type Unicode characters directly, because this test file isn't
72+
// specified with a known encoding.
73+
expectedUnicode: "https://example.org/test.html?One=\u4e00",
74+
},
75+
{
76+
raw: "https://example.org/test.html?%E4%B8%80=1",
77+
// Do not type Unicode characters directly, because this test file isn't
78+
// specified with a known encoding.
79+
expectedUnicode: "https://example.org/test.html?\u4e00=1",
80+
},
81+
{
82+
raw: "https://xn--g6w.xn--8pv/%E6%B8%AC%E8%A9%A6.html" +
83+
"?%E4%B8%80=%E4%B8%80" +
84+
"#%E6%AD%A4",
85+
// Do not type Unicode characters directly, because this test file isn't
86+
// specified with a known encoding.
87+
expectedUnicode: "https://\u6e2c.\u672c/\u6e2c\u8a66.html" +
88+
"?\u4e00=\u4e00" +
89+
"#\u6b64",
90+
},
91+
// Type: data: URIs
92+
// Expected: All should not be converted.
93+
{
94+
raw: "data:text/plain;charset=UTF-8;Hello%20world",
95+
expectedUnicode: "data:text/plain;charset=UTF-8;Hello%20world",
96+
},
97+
{
98+
raw: "data:text/plain;charset=UTF-8;%E6%B8%AC%20%E8%A9%A6",
99+
expectedUnicode: "data:text/plain;charset=UTF-8;%E6%B8%AC%20%E8%A9%A6",
100+
},
101+
{
102+
raw: "" +
103+
"ANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAHElEQVQI12P4" +
104+
"//8/w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHwAAAABJRU" +
105+
"5ErkJggg==",
106+
expectedUnicode: "" +
107+
"ANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAHElEQVQI12P4" +
108+
"//8/w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHwAAAABJRU" +
109+
"5ErkJggg==",
110+
},
111+
// Type: Malformed URLs
112+
// Expected: All should not be converted.
113+
{
114+
raw: "://example.org/test",
115+
expectedUnicode: "://example.org/test",
116+
},
117+
{
118+
raw: "://xn--g6w.xn--8pv/%E6%B8%AC%E8%A9%A6.html" +
119+
"?%E4%B8%80=%E4%B8%80",
120+
expectedUnicode: "://xn--g6w.xn--8pv/%E6%B8%AC%E8%A9%A6.html" +
121+
"?%E4%B8%80=%E4%B8%80",
122+
},
123+
{
124+
// %E8%A9 isn't a valid UTF-8 code, so this URL is malformed.
125+
raw: "https://xn--g6w.xn--8pv/%E6%B8%AC%E8%A9",
126+
expectedUnicode: "https://xn--g6w.xn--8pv/%E6%B8%AC%E8%A9",
127+
},
128+
];
129+
130+
// List of hostanmes used to test Unicode hostname conversion
131+
const TEST_HOSTNAMES = [
132+
// Type: Readable ASCII hostnames
133+
// Expected: All of Unicode versions should equal to the raw.
134+
{
135+
raw: "example",
136+
expectedUnicode: "example",
137+
},
138+
{
139+
raw: "example.org",
140+
expectedUnicode: "example.org",
141+
},
142+
// Type: Unreadable Punycode hostnames
143+
// Expected: Punycode should be converted to readable Unicode.
144+
{
145+
raw: "xn--g6w",
146+
// Do not type Unicode characters directly, because this test file isn't
147+
// specified with a known encoding.
148+
expectedUnicode: "\u6e2c",
149+
},
150+
{
151+
raw: "xn--g6w.xn--8pv",
152+
// Do not type Unicode characters directly, because this test file isn't
153+
// specified with a known encoding.
154+
expectedUnicode: "\u6e2c.\u672c",
155+
},
156+
];
157+
158+
// List of URL paths used to test Unicode URL path conversion
159+
const TEST_URL_PATHS = [
160+
// Type: Readable ASCII URL paths
161+
// Expected: All of Unicode versions should equal to the raw.
162+
{
163+
raw: "test",
164+
expectedUnicode: "test",
165+
},
166+
{
167+
raw: "/",
168+
expectedUnicode: "/",
169+
},
170+
{
171+
raw: "/test",
172+
expectedUnicode: "/test",
173+
},
174+
{
175+
raw: "/test.html?one=1&two=2#here",
176+
expectedUnicode: "/test.html?one=1&two=2#here",
177+
},
178+
// Type: Unreadable URI-encoded URL paths
179+
// Expected: URL paths should be converted to readable Unicode.
180+
{
181+
raw: "/%E6%B8%AC%E8%A9%A6",
182+
// Do not type Unicode characters directly, because this test file isn't
183+
// specified with a known encoding.
184+
expectedUnicode: "/\u6e2c\u8a66",
185+
},
186+
{
187+
raw: "/%E6%B8%AC%E8%A9%A6.html",
188+
// Do not type Unicode characters directly, because this test file isn't
189+
// specified with a known encoding.
190+
expectedUnicode: "/\u6e2c\u8a66.html",
191+
},
192+
{
193+
raw: "/%E6%B8%AC%E8%A9%A6.html" +
194+
"?%E4%B8%80=%E4%B8%80&%E4%BA%8C=%E4%BA%8C" +
195+
"#%E6%AD%A4",
196+
// Do not type Unicode characters directly, because this test file isn't
197+
// specified with a known encoding.
198+
expectedUnicode: "/\u6e2c\u8a66.html" +
199+
"?\u4e00=\u4e00&\u4e8c=\u4e8c" +
200+
"#\u6b64",
201+
},
202+
// Type: Malformed URL paths
203+
// Expected: All should not be converted.
204+
{
205+
// %E8%A9 isn't a valid UTF-8 code, so this URL is malformed.
206+
raw: "/%E6%B8%AC%E8%A9",
207+
expectedUnicode: "/%E6%B8%AC%E8%A9",
208+
},
209+
];
210+
211+
it("Get Unicode URLs", () => {
212+
for (let url of TEST_URLS) {
213+
expect(getUnicodeUrl(url.raw)).toBe(url.expectedUnicode);
214+
}
215+
});
216+
217+
it("Get Unicode hostnames", () => {
218+
for (let hostname of TEST_HOSTNAMES) {
219+
expect(getUnicodeHostname(hostname.raw)).toBe(hostname.expectedUnicode);
220+
}
221+
});
222+
223+
it("Get Unicode URL paths", () => {
224+
for (let urlPath of TEST_URL_PATHS) {
225+
expect(getUnicodeUrlPath(urlPath.raw)).toBe(urlPath.expectedUnicode);
226+
}
227+
});
228+
});
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
/* This Source Code Form is subject to the terms of the Mozilla Public
2+
* License, v. 2.0. If a copy of the MPL was not distributed with this
3+
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4+
5+
// This file is a chrome-API-free version of the module
6+
// devtools/client/shared/unicode-url.js in the mozilla-central repository, so
7+
// that it can be used in Chrome-API-free applications, such as the Launchpad.
8+
// But because of this, it cannot take advantage of utilizing chrome APIs and
9+
// should implement the similar functionalities on its own.
10+
//
11+
// Please keep in mind that if the feature in this file has changed, don't
12+
// forget to also change that accordingly in
13+
// devtools/client/shared/unicode-url.js in the mozilla-central repository.
14+
15+
"use strict";
16+
17+
const punycode = require("punycode");
18+
19+
/**
20+
* Gets a readble Unicode hostname from a hostname.
21+
*
22+
* If the `hostname` is a readable ASCII hostname, such as example.org, then
23+
* this function will simply return the original `hostname`.
24+
*
25+
* If the `hostname` is a Punycode hostname representing a Unicode domain name,
26+
* such as xn--g6w.xn--8pv, then this function will return the readable Unicode
27+
* domain name by decoding the Punycode hostname.
28+
*
29+
* @param {string} hostname
30+
* the hostname from which the Unicode hostname will be
31+
* parsed, such as example.org, xn--g6w.xn--8pv.
32+
* @return {string} The Unicode hostname. It may be the same as the `hostname`
33+
* passed to this function if the `hostname` itself is
34+
* a readable ASCII hostname or a Unicode hostname.
35+
*/
36+
function getUnicodeHostname(hostname) {
37+
try {
38+
return punycode.toUnicode(hostname);
39+
} catch (err) {
40+
}
41+
return hostname;
42+
}
43+
44+
/**
45+
* Gets a readble Unicode URL pathname from a URL pathname.
46+
*
47+
* If the `urlPath` is a readable ASCII URL pathname, such as /a/b/c.js, then
48+
* this function will simply return the original `urlPath`.
49+
*
50+
* If the `urlPath` is a URI-encoded pathname, such as %E8%A9%A6/%E6%B8%AC.js,
51+
* then this function will return the readable Unicode pathname.
52+
*
53+
* If the `urlPath` is a malformed URL pathname, then this function will simply
54+
* return the original `urlPath`.
55+
*
56+
* @param {string} urlPath
57+
* the URL path from which the Unicode URL path will be parsed,
58+
* such as /a/b/c.js, %E8%A9%A6/%E6%B8%AC.js.
59+
* @return {string} The Unicode URL Path. It may be the same as the `urlPath`
60+
* passed to this function if the `urlPath` itself is a readable
61+
* ASCII url or a Unicode url.
62+
*/
63+
function getUnicodeUrlPath(urlPath) {
64+
try {
65+
return decodeURIComponent(urlPath);
66+
} catch (err) {
67+
}
68+
return urlPath;
69+
}
70+
71+
/**
72+
* Gets a readable Unicode URL from a URL.
73+
*
74+
* If the `url` is a readable ASCII URL, such as http://example.org/a/b/c.js,
75+
* then this function will simply return the original `url`.
76+
*
77+
* If the `url` includes either an unreadable Punycode domain name or an
78+
* unreadable URI-encoded pathname, such as
79+
* http://xn--g6w.xn--8pv/%E8%A9%A6/%E6%B8%AC.js, then this function will return
80+
* the readable URL by decoding all its unreadable URL components to Unicode
81+
* characters.
82+
*
83+
* If the `url` is a malformed URL, then this function will return the original
84+
* `url`.
85+
*
86+
* If the `url` is a data: URI, then this function will return the original
87+
* `url`.
88+
*
89+
* @param {string} url
90+
* the full URL, or a data: URI. from which the readable URL
91+
* will be parsed, such as, http://example.org/a/b/c.js,
92+
* http://xn--g6w.xn--8pv/%E8%A9%A6/%E6%B8%AC.js
93+
* @return {string} The readable URL. It may be the same as the `url` passed to
94+
* this function if the `url` itself is readable.
95+
*/
96+
function getUnicodeUrl(url) {
97+
try {
98+
const { protocol, hostname } = new URL(url);
99+
if (protocol === "data:") {
100+
// Never convert a data: URI.
101+
return url;
102+
}
103+
const readableHostname = getUnicodeHostname(hostname);
104+
url = decodeURIComponent(url);
105+
return url.replace(hostname, readableHostname);
106+
} catch (err) {
107+
}
108+
return url;
109+
}
110+
111+
module.exports = {
112+
getUnicodeHostname,
113+
getUnicodeUrlPath,
114+
getUnicodeUrl,
115+
};

0 commit comments

Comments
 (0)