Introduces unicode-url shim (#1049)

Zhang-Junzhi · jasonLaster · commit 815265c6af0b · 2018-04-19T08:32:45.000-04:00
diff --git a/packages/devtools-modules/index.js b/packages/devtools-modules/index.js
@@ -10,6 +10,8 @@ const KeyShortcuts = require("./src/key-shortcuts");
 const { ZoomKeys } = require("./src/zoom-keys");
 const EventEmitter = require("./src/utils/event-emitter");
 const SourceUtils = require("./src/source-utils");
+const { getUnicodeHostname, getUnicodeUrlPath, getUnicodeUrl } =
+  require("./src/unicode-url");
 
 module.exports = {
   KeyShortcuts,
@@ -20,4 +22,7 @@ module.exports = {
   ZoomKeys,
   EventEmitter,
   SourceUtils,
+  getUnicodeHostname,
+  getUnicodeUrlPath,
+  getUnicodeUrl,
 };
diff --git a/packages/devtools-modules/package.json b/packages/devtools-modules/package.json
@@ -10,7 +10,8 @@
   "author": "",
   "license": "MPL-2.0",
   "dependencies": {
-    "jest": "^19.0.2"
+    "jest": "^19.0.2",
+    "punycode": "^2.1.0"
   },
   "files": ["src"],
   "jest": {
diff --git a/packages/devtools-modules/src/tests/unicode-url.js b/packages/devtools-modules/src/tests/unicode-url.js
@@ -0,0 +1,228 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+const { getUnicodeUrl, getUnicodeUrlPath, getUnicodeHostname } =
+  require("../unicode-url");
+
+describe("unicode-url", () => {
+  // List of URLs used to test Unicode URL conversion
+  const TEST_URLS = [
+    // Type:     Readable ASCII URLs
+    // Expected: All of Unicode versions should equal to the raw.
+    {
+      raw: "https://example.org",
+      expectedUnicode: "https://example.org",
+    },
+    {
+      raw: "http://example.org",
+      expectedUnicode: "http://example.org",
+    },
+    {
+      raw: "ftp://example.org",
+      expectedUnicode: "ftp://example.org",
+    },
+    {
+      raw: "https://example.org.",
+      expectedUnicode: "https://example.org.",
+    },
+    {
+      raw: "https://example.org/",
+      expectedUnicode: "https://example.org/",
+    },
+    {
+      raw: "https://example.org/test",
+      expectedUnicode: "https://example.org/test",
+    },
+    {
+      raw: "https://example.org/test.html",
+      expectedUnicode: "https://example.org/test.html",
+    },
+    {
+      raw: "https://example.org/test.html?one=1&two=2",
+      expectedUnicode: "https://example.org/test.html?one=1&two=2",
+    },
+    {
+      raw: "https://example.org/test.html#here",
+      expectedUnicode: "https://example.org/test.html#here",
+    },
+    {
+      raw: "https://example.org/test.html?one=1&two=2#here",
+      expectedUnicode: "https://example.org/test.html?one=1&two=2#here",
+    },
+    // Type:     Unreadable URLs with either Punycode domain names or URI-encoded
+    //           paths
+    // Expected: Unreadable domain names and URI-encoded paths should be converted
+    //           to readable Unicode.
+    {
+      raw: "https://xn--g6w.xn--8pv/test.html",
+      // Do not type Unicode characters directly, because this test file isn't
+      // specified with a known encoding.
+      expectedUnicode: "https://\u6e2c.\u672c/test.html",
+    },
+    {
+      raw: "https://example.org/%E6%B8%AC%E8%A9%A6.html",
+      // Do not type Unicode characters directly, because this test file isn't
+      // specified with a known encoding.
+      expectedUnicode: "https://example.org/\u6e2c\u8a66.html",
+    },
+    {
+      raw: "https://example.org/test.html?One=%E4%B8%80",
+      // Do not type Unicode characters directly, because this test file isn't
+      // specified with a known encoding.
+      expectedUnicode: "https://example.org/test.html?One=\u4e00",
+    },
+    {
+      raw: "https://example.org/test.html?%E4%B8%80=1",
+      // Do not type Unicode characters directly, because this test file isn't
+      // specified with a known encoding.
+      expectedUnicode: "https://example.org/test.html?\u4e00=1",
+    },
+    {
+      raw: "https://xn--g6w.xn--8pv/%E6%B8%AC%E8%A9%A6.html" +
+           "?%E4%B8%80=%E4%B8%80" +
+           "#%E6%AD%A4",
+      // Do not type Unicode characters directly, because this test file isn't
+      // specified with a known encoding.
+      expectedUnicode: "https://\u6e2c.\u672c/\u6e2c\u8a66.html" +
+                       "?\u4e00=\u4e00" +
+                       "#\u6b64",
+    },
+    // Type:     data: URIs
+    // Expected: All should not be converted.
+    {
+      raw: "data:text/plain;charset=UTF-8;Hello%20world",
+      expectedUnicode: "data:text/plain;charset=UTF-8;Hello%20world",
+    },
+    {
+      raw: "data:text/plain;charset=UTF-8;%E6%B8%AC%20%E8%A9%A6",
+      expectedUnicode: "data:text/plain;charset=UTF-8;%E6%B8%AC%20%E8%A9%A6",
+    },
+    {
+      raw: "data:image/png;base64,iVBORw0KGgoAAA" +
+           "ANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAHElEQVQI12P4" +
+           "//8/w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHwAAAABJRU" +
+           "5ErkJggg==",
+      expectedUnicode: "data:image/png;base64,iVBORw0KGgoAAA" +
+                       "ANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAHElEQVQI12P4" +
+                       "//8/w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHwAAAABJRU" +
+                       "5ErkJggg==",
+    },
+    // Type:     Malformed URLs
+    // Expected: All should not be converted.
+    {
+      raw: "://example.org/test",
+      expectedUnicode: "://example.org/test",
+    },
+    {
+      raw: "://xn--g6w.xn--8pv/%E6%B8%AC%E8%A9%A6.html" +
+           "?%E4%B8%80=%E4%B8%80",
+      expectedUnicode: "://xn--g6w.xn--8pv/%E6%B8%AC%E8%A9%A6.html" +
+                       "?%E4%B8%80=%E4%B8%80",
+    },
+    {
+      // %E8%A9 isn't a valid UTF-8 code, so this URL is malformed.
+      raw: "https://xn--g6w.xn--8pv/%E6%B8%AC%E8%A9",
+      expectedUnicode: "https://xn--g6w.xn--8pv/%E6%B8%AC%E8%A9",
+    },
+  ];
+
+  // List of hostanmes used to test Unicode hostname conversion
+  const TEST_HOSTNAMES = [
+    // Type:     Readable ASCII hostnames
+    // Expected: All of Unicode versions should equal to the raw.
+    {
+      raw: "example",
+      expectedUnicode: "example",
+    },
+    {
+      raw: "example.org",
+      expectedUnicode: "example.org",
+    },
+    // Type:     Unreadable Punycode hostnames
+    // Expected: Punycode should be converted to readable Unicode.
+    {
+      raw: "xn--g6w",
+      // Do not type Unicode characters directly, because this test file isn't
+      // specified with a known encoding.
+      expectedUnicode: "\u6e2c",
+    },
+    {
+      raw: "xn--g6w.xn--8pv",
+      // Do not type Unicode characters directly, because this test file isn't
+      // specified with a known encoding.
+      expectedUnicode: "\u6e2c.\u672c",
+    },
+  ];
+
+  // List of URL paths used to test Unicode URL path conversion
+  const TEST_URL_PATHS = [
+    // Type:     Readable ASCII URL paths
+    // Expected: All of Unicode versions should equal to the raw.
+    {
+      raw: "test",
+      expectedUnicode: "test",
+    },
+    {
+      raw: "/",
+      expectedUnicode: "/",
+    },
+    {
+      raw: "/test",
+      expectedUnicode: "/test",
+    },
+    {
+      raw: "/test.html?one=1&two=2#here",
+      expectedUnicode: "/test.html?one=1&two=2#here",
+    },
+    // Type:     Unreadable URI-encoded URL paths
+    // Expected: URL paths should be converted to readable Unicode.
+    {
+      raw: "/%E6%B8%AC%E8%A9%A6",
+      // Do not type Unicode characters directly, because this test file isn't
+      // specified with a known encoding.
+      expectedUnicode: "/\u6e2c\u8a66",
+    },
+    {
+      raw: "/%E6%B8%AC%E8%A9%A6.html",
+      // Do not type Unicode characters directly, because this test file isn't
+      // specified with a known encoding.
+      expectedUnicode: "/\u6e2c\u8a66.html",
+    },
+    {
+      raw: "/%E6%B8%AC%E8%A9%A6.html" +
+           "?%E4%B8%80=%E4%B8%80&%E4%BA%8C=%E4%BA%8C" +
+           "#%E6%AD%A4",
+      // Do not type Unicode characters directly, because this test file isn't
+      // specified with a known encoding.
+      expectedUnicode: "/\u6e2c\u8a66.html" +
+                       "?\u4e00=\u4e00&\u4e8c=\u4e8c" +
+                       "#\u6b64",
+    },
+    // Type:     Malformed URL paths
+    // Expected: All should not be converted.
+    {
+      // %E8%A9 isn't a valid UTF-8 code, so this URL is malformed.
+      raw: "/%E6%B8%AC%E8%A9",
+      expectedUnicode: "/%E6%B8%AC%E8%A9",
+    },
+  ];
+
+  it("Get Unicode URLs", () => {
+    for (let url of TEST_URLS) {
+      expect(getUnicodeUrl(url.raw)).toBe(url.expectedUnicode);
+    }
+  });
+
+  it("Get Unicode hostnames", () => {
+    for (let hostname of TEST_HOSTNAMES) {
+      expect(getUnicodeHostname(hostname.raw)).toBe(hostname.expectedUnicode);
+    }
+  });
+
+  it("Get Unicode URL paths", () => {
+    for (let urlPath of TEST_URL_PATHS) {
+      expect(getUnicodeUrlPath(urlPath.raw)).toBe(urlPath.expectedUnicode);
+    }
+  });
+});
diff --git a/packages/devtools-modules/src/unicode-url.js b/packages/devtools-modules/src/unicode-url.js
@@ -0,0 +1,115 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// This file is a chrome-API-free version of the module
+// devtools/client/shared/unicode-url.js in the mozilla-central repository, so
+// that it can be used in Chrome-API-free applications, such as the Launchpad.
+// But because of this, it cannot take advantage of utilizing chrome APIs and
+// should implement the similar functionalities on its own.
+//
+// Please keep in mind that if the feature in this file has changed, don't
+// forget to also change that accordingly in
+// devtools/client/shared/unicode-url.js in the mozilla-central repository.
+
+"use strict";
+
+const punycode = require("punycode");
+
+/**
+ * Gets a readble Unicode hostname from a hostname.
+ *
+ * If the `hostname` is a readable ASCII hostname, such as example.org, then
+ * this function will simply return the original `hostname`.
+ *
+ * If the `hostname` is a Punycode hostname representing a Unicode domain name,
+ * such as xn--g6w.xn--8pv, then this function will return the readable Unicode
+ * domain name by decoding the Punycode hostname.
+ *
+ * @param {string}  hostname
+ *                  the hostname from which the Unicode hostname will be
+ *                  parsed, such as example.org, xn--g6w.xn--8pv.
+ * @return {string} The Unicode hostname. It may be the same as the `hostname`
+ *                  passed to this function if the `hostname` itself is
+ *                  a readable ASCII hostname or a Unicode hostname.
+ */
+function getUnicodeHostname(hostname) {
+  try {
+    return punycode.toUnicode(hostname);
+  } catch (err) {
+  }
+  return hostname;
+}
+
+/**
+ * Gets a readble Unicode URL pathname from a URL pathname.
+ *
+ * If the `urlPath` is a readable ASCII URL pathname, such as /a/b/c.js, then
+ * this function will simply return the original `urlPath`.
+ *
+ * If the `urlPath` is a URI-encoded pathname, such as %E8%A9%A6/%E6%B8%AC.js,
+ * then this function will return the readable Unicode pathname.
+ *
+ * If the `urlPath` is a malformed URL pathname, then this function will simply
+ * return the original `urlPath`.
+ *
+ * @param {string}  urlPath
+ *                  the URL path from which the Unicode URL path will be parsed,
+ *                  such as /a/b/c.js, %E8%A9%A6/%E6%B8%AC.js.
+ * @return {string} The Unicode URL Path. It may be the same as the `urlPath`
+ *                  passed to this function if the `urlPath` itself is a readable
+ *                  ASCII url or a Unicode url.
+ */
+function getUnicodeUrlPath(urlPath) {
+  try {
+    return decodeURIComponent(urlPath);
+  } catch (err) {
+  }
+  return urlPath;
+}
+
+/**
+ * Gets a readable Unicode URL from a URL.
+ *
+ * If the `url` is a readable ASCII URL, such as http://example.org/a/b/c.js,
+ * then this function will simply return the original `url`.
+ *
+ * If the `url` includes either an unreadable Punycode domain name or an
+ * unreadable URI-encoded pathname, such as
+ * http://xn--g6w.xn--8pv/%E8%A9%A6/%E6%B8%AC.js, then this function will return
+ * the readable URL by decoding all its unreadable URL components to Unicode
+ * characters.
+ *
+ * If the `url` is a malformed URL, then this function will return the original
+ * `url`.
+ *
+ * If the `url` is a data: URI, then this function will return the original
+ * `url`.
+ *
+ * @param {string}  url
+ *                  the full URL, or a data: URI. from which the readable URL
+ *                  will be parsed, such as, http://example.org/a/b/c.js,
+ *                  http://xn--g6w.xn--8pv/%E8%A9%A6/%E6%B8%AC.js
+ * @return {string} The readable URL. It may be the same as the `url` passed to
+ *                  this function if the `url` itself is readable.
+ */
+function getUnicodeUrl(url) {
+  try {
+    const { protocol, hostname } = new URL(url);
+    if (protocol === "data:") {
+      // Never convert a data: URI.
+      return url;
+    }
+    const readableHostname = getUnicodeHostname(hostname);
+    url = decodeURIComponent(url);
+    return url.replace(hostname, readableHostname);
+  } catch (err) {
+  }
+  return url;
+}
+
+module.exports = {
+  getUnicodeHostname,
+  getUnicodeUrlPath,
+  getUnicodeUrl,
+};