|
| 1 | +const escapeUnicode = require("escape-unicode"); |
| 2 | + |
| 3 | +/** |
| 4 | + * @see https://en.wikipedia.org/wiki/ASCII |
| 5 | + * ascii contains 128 characters. |
| 6 | + * its char codes reach from 0 to 127. |
| 7 | + * @type {number} |
| 8 | + */ |
| 9 | +const CHAR_CODE_OF_LAST_ASCII_CHARACTER = 127; |
| 10 | + |
| 11 | +// use memoization for escapeUnicode function for performance |
| 12 | +const memoizeEscapeUnicodeMap = {}; |
| 13 | +const memoizeEscapeUnicode = function(sChar) { |
| 14 | + if (memoizeEscapeUnicodeMap[sChar]) { |
| 15 | + return memoizeEscapeUnicodeMap[sChar]; |
| 16 | + } |
| 17 | + memoizeEscapeUnicodeMap[sChar] = escapeUnicode(sChar); |
| 18 | + return memoizeEscapeUnicodeMap[sChar]; |
| 19 | +}; |
| 20 | + |
| 21 | +/** |
| 22 | + * Escapes non ASCII characters with unicode escape sequences. |
| 23 | + * |
| 24 | + * @see https://en.wikipedia.org/wiki/ASCII |
| 25 | + * @see https://tools.ietf.org/html/rfc5137#section-6.1 |
| 26 | + * |
| 27 | + * |
| 28 | + * @param {string} string input string with non ascii characters, e.g. L♥VE |
| 29 | + * @returns {{string: (string), modified: boolean}} output string with all non ascii |
| 30 | + * characters being escaped by unicode sequence, e.g. L\u2665VE |
| 31 | + */ |
| 32 | +const escapeNonAscii = function(string) { |
| 33 | + let result = ""; |
| 34 | + let modified = false; |
| 35 | + for (let i = 0; i < string.length; i++) { |
| 36 | + const char = string[i]; |
| 37 | + // check for non ascii characters (characters which have a char code |
| 38 | + // greater than the ascii character code range) |
| 39 | + if (string.charCodeAt(i) > CHAR_CODE_OF_LAST_ASCII_CHARACTER) { |
| 40 | + result += memoizeEscapeUnicode(char); |
| 41 | + modified = true; |
| 42 | + } else { |
| 43 | + result += char; |
| 44 | + } |
| 45 | + } |
| 46 | + return { |
| 47 | + modified, |
| 48 | + string: result |
| 49 | + }; |
| 50 | +}; |
| 51 | + |
| 52 | +/** |
| 53 | + * Escapes non ASCII characters with unicode escape sequences. |
| 54 | + * |
| 55 | + * @example |
| 56 | + * const encoding = nonAsciiEscaper.getEncodingFromAlias("ISO-8859-1"); |
| 57 | + * nonAsciiEscaper({resources, options: {encoding}}); |
| 58 | + * |
| 59 | + * |
| 60 | + * @public |
| 61 | + * @alias module:@ui5/builder.processors.nonAsciiEscaper |
| 62 | + * @param {Object} parameters Parameters |
| 63 | + * @param {module:@ui5/fs.Resource[]} parameters.resources List of resources to be processed |
| 64 | + * @param {Object} [parameters.options] Options |
| 65 | + * @param {string} [parameters.options.encoding="utf8"] resource file encoding (node.js based encodings). Use #getEncodingFromAlias to get the encoding string |
| 66 | + * {@link https://nodejs.org/api/buffer.html#buffer_buffers_and_character_encodings Node.js character encodings}; |
| 67 | + * @returns {Promise<module:@ui5/fs.Resource[]>} Promise resolving with the processed resources |
| 68 | + */ |
| 69 | +module.exports = async function nonAsciiEscaper({resources, options={}}) { |
| 70 | + const encoding = options.encoding || "utf8"; |
| 71 | + |
| 72 | + async function processResource(resource) { |
| 73 | + const resourceString = (await resource.getBuffer()).toString(encoding); |
| 74 | + const escaped = escapeNonAscii(resourceString); |
| 75 | + // only modify the resource's string if it was changed |
| 76 | + if (escaped.modified) { |
| 77 | + resource.setString(escaped.string); |
| 78 | + } |
| 79 | + return resource; |
| 80 | + } |
| 81 | + |
| 82 | + return Promise.all(resources.map(processResource)); |
| 83 | +}; |
| 84 | + |
| 85 | +const encodingMap = { |
| 86 | + "UTF-8": "utf8", |
| 87 | + "ISO-8859-1": "latin1", |
| 88 | +}; |
| 89 | + |
| 90 | +/** |
| 91 | + * Provides a mapping from user-friendly encoding name (alias) such as "UTF-8" and "ISO-8859-1" to node |
| 92 | + * specific encoding name such as "utf8" or "latin1". Simplifies usage of nonAsciiEscaper encoding |
| 93 | + * option such that it can be used standalone without the respective task (e.g. in Splitter, Bundler and related projects). |
| 94 | + * |
| 95 | + * @param {string} encoding encoding labels: "UTF-8" and "ISO-8859-1" |
| 96 | + * @returns {string} node.js character encoding string, e.g. utf8 and latin1 |
| 97 | + */ |
| 98 | +module.exports.getEncodingFromAlias = function(encoding) { |
| 99 | + if (!encodingMap[encoding]) { |
| 100 | + throw new Error(`Encoding "${encoding}" is not supported. Only ${Object.keys(encodingMap).join(", ")} are allowed values` ); |
| 101 | + } |
| 102 | + return encodingMap[encoding]; |
| 103 | +}; |
0 commit comments