|
| 1 | +/* |
| 2 | + * Copyright 2017 Sam Thorogood. All rights reserved. |
| 3 | + * |
| 4 | + * Licensed under the Apache License, Version 2.0 (the "License"); you may not |
| 5 | + * use this file except in compliance with the License. You may obtain a copy of |
| 6 | + * the License at |
| 7 | + * |
| 8 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | + * |
| 10 | + * Unless required by applicable law or agreed to in writing, software |
| 11 | + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| 12 | + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| 13 | + * License for the specific language governing permissions and limitations under |
| 14 | + * the License. |
| 15 | + */ |
| 16 | + |
| 17 | +/** |
| 18 | + * @fileoverview Polyfill for TextEncoder and TextDecoder. |
| 19 | + * |
| 20 | + * You probably want `text.min.js`, and not this file directly. |
| 21 | + */ |
| 22 | + |
| 23 | +//JSC |
| 24 | +const global = (0,eval)("this"); |
| 25 | + |
| 26 | +(function(scope) { |
| 27 | +'use strict'; |
| 28 | + |
| 29 | +// fail early |
| 30 | +if (scope['TextEncoder'] && scope['TextDecoder']) { |
| 31 | + return false; |
| 32 | +} |
| 33 | + |
| 34 | +// used for FastTextDecoder |
| 35 | +const validUtfLabels = ['utf-8', 'utf8', 'unicode-1-1-utf-8']; |
| 36 | + |
| 37 | +/** |
| 38 | + * @constructor |
| 39 | + */ |
| 40 | +function FastTextEncoder() { |
| 41 | + // This does not accept an encoding, and always uses UTF-8: |
| 42 | + // https://www.w3.org/TR/encoding/#dom-textencoder |
| 43 | +} |
| 44 | + |
| 45 | +Object.defineProperty(FastTextEncoder.prototype, 'encoding', {value: 'utf-8'}); |
| 46 | + |
| 47 | +/** |
| 48 | + * @param {string} string |
| 49 | + * @param {{stream: boolean}=} options |
| 50 | + * @return {!Uint8Array} |
| 51 | + */ |
| 52 | +FastTextEncoder.prototype['encode'] = function(string, options={stream: false}) { |
| 53 | + if (options.stream) { |
| 54 | + throw new Error(`Failed to encode: the 'stream' option is unsupported.`); |
| 55 | + } |
| 56 | + |
| 57 | + let pos = 0; |
| 58 | + const len = string.length; |
| 59 | + |
| 60 | + let at = 0; // output position |
| 61 | + let tlen = Math.max(32, len + (len >>> 1) + 7); // 1.5x size |
| 62 | + let target = new Uint8Array((tlen >>> 3) << 3); // ... but at 8 byte offset |
| 63 | + |
| 64 | + while (pos < len) { |
| 65 | + let value = string.charCodeAt(pos++); |
| 66 | + if (value >= 0xd800 && value <= 0xdbff) { |
| 67 | + // high surrogate |
| 68 | + if (pos < len) { |
| 69 | + const extra = string.charCodeAt(pos); |
| 70 | + if ((extra & 0xfc00) === 0xdc00) { |
| 71 | + ++pos; |
| 72 | + value = ((value & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000; |
| 73 | + } |
| 74 | + } |
| 75 | + if (value >= 0xd800 && value <= 0xdbff) { |
| 76 | + continue; // drop lone surrogate |
| 77 | + } |
| 78 | + } |
| 79 | + |
| 80 | + // expand the buffer if we couldn't write 4 bytes |
| 81 | + if (at + 4 > target.length) { |
| 82 | + tlen += 8; // minimum extra |
| 83 | + tlen *= (1.0 + (pos / string.length) * 2); // take 2x the remaining |
| 84 | + tlen = (tlen >>> 3) << 3; // 8 byte offset |
| 85 | + |
| 86 | + const update = new Uint8Array(tlen); |
| 87 | + update.set(target); |
| 88 | + target = update; |
| 89 | + } |
| 90 | + |
| 91 | + if ((value & 0xffffff80) === 0) { // 1-byte |
| 92 | + target[at++] = value; // ASCII |
| 93 | + continue; |
| 94 | + } else if ((value & 0xfffff800) === 0) { // 2-byte |
| 95 | + target[at++] = ((value >>> 6) & 0x1f) | 0xc0; |
| 96 | + } else if ((value & 0xffff0000) === 0) { // 3-byte |
| 97 | + target[at++] = ((value >>> 12) & 0x0f) | 0xe0; |
| 98 | + target[at++] = ((value >>> 6) & 0x3f) | 0x80; |
| 99 | + } else if ((value & 0xffe00000) === 0) { // 4-byte |
| 100 | + target[at++] = ((value >>> 18) & 0x07) | 0xf0; |
| 101 | + target[at++] = ((value >>> 12) & 0x3f) | 0x80; |
| 102 | + target[at++] = ((value >>> 6) & 0x3f) | 0x80; |
| 103 | + } else { |
| 104 | + continue; // out of range |
| 105 | + } |
| 106 | + |
| 107 | + target[at++] = (value & 0x3f) | 0x80; |
| 108 | + } |
| 109 | + |
| 110 | + // Use subarray if slice isn't supported (IE11). This will use more memory |
| 111 | + // because the original array still exists. |
| 112 | + return target.slice ? target.slice(0, at) : target.subarray(0, at); |
| 113 | +} |
| 114 | + |
| 115 | +/** |
| 116 | + * @constructor |
| 117 | + * @param {string=} utfLabel |
| 118 | + * @param {{fatal: boolean}=} options |
| 119 | + */ |
| 120 | +function FastTextDecoder(utfLabel='utf-8', options={fatal: false}) { |
| 121 | + if (validUtfLabels.indexOf(utfLabel.toLowerCase()) === -1) { |
| 122 | + throw new RangeError( |
| 123 | + `Failed to construct 'TextDecoder': The encoding label provided ('${utfLabel}') is invalid.`); |
| 124 | + } |
| 125 | + // if (options.fatal) { |
| 126 | + // throw new Error(`Failed to construct 'TextDecoder': the 'fatal' option is unsupported.`); |
| 127 | + // } |
| 128 | +} |
| 129 | + |
| 130 | +Object.defineProperty(FastTextDecoder.prototype, 'encoding', {value: 'utf-8'}); |
| 131 | + |
| 132 | +Object.defineProperty(FastTextDecoder.prototype, 'fatal', {value: false}); |
| 133 | + |
| 134 | +Object.defineProperty(FastTextDecoder.prototype, 'ignoreBOM', {value: false}); |
| 135 | + |
| 136 | +/** |
| 137 | + * @param {!Uint8Array} bytes |
| 138 | + * @return {string} |
| 139 | + */ |
| 140 | +function decodeBuffer(bytes) { |
| 141 | + return Buffer.from(bytes.buffer, bytes.byteOffset, bytes.byteLength).toString('utf-8'); |
| 142 | +} |
| 143 | + |
| 144 | +/** |
| 145 | + * @param {!Uint8Array} bytes |
| 146 | + * @return {string} |
| 147 | + */ |
| 148 | +function decodeSyncXHR(bytes) { |
| 149 | + const b = new Blob([bytes], {type: 'text/plain;charset=UTF-8'}); |
| 150 | + const u = URL.createObjectURL(b); |
| 151 | + |
| 152 | + // This hack will fail in non-Edgium Edge because sync XHRs are disabled (and |
| 153 | + // possibly in other places), so ensure there's a fallback call. |
| 154 | + try { |
| 155 | + const x = new XMLHttpRequest(); |
| 156 | + x.open('GET', u, false); |
| 157 | + x.send(); |
| 158 | + return x.responseText; |
| 159 | + } catch (e) { |
| 160 | + return decodeFallback(bytes); |
| 161 | + } finally { |
| 162 | + URL.revokeObjectURL(u); |
| 163 | + } |
| 164 | +} |
| 165 | + |
| 166 | +/** |
| 167 | + * @param {!Uint8Array} bytes |
| 168 | + * @return {string} |
| 169 | + */ |
| 170 | +function decodeFallback(bytes) { |
| 171 | + let inputIndex = 0; |
| 172 | + |
| 173 | + // Create a working buffer for UTF-16 code points, but don't generate one |
| 174 | + // which is too large for small input sizes. UTF-8 to UCS-16 conversion is |
| 175 | + // going to be at most 1:1, if all code points are ASCII. The other extreme |
| 176 | + // is 4-byte UTF-8, which results in two UCS-16 points, but this is still 50% |
| 177 | + // fewer entries in the output. |
| 178 | + const pendingSize = Math.min(256 * 256, bytes.length + 1); |
| 179 | + const pending = new Uint16Array(pendingSize); |
| 180 | + const chunks = []; |
| 181 | + let pendingIndex = 0; |
| 182 | + |
| 183 | + for (;;) { |
| 184 | + const more = inputIndex < bytes.length; |
| 185 | + |
| 186 | + // If there's no more data or there'd be no room for two UTF-16 values, |
| 187 | + // create a chunk. This isn't done at the end by simply slicing the data |
| 188 | + // into equal sized chunks as we might hit a surrogate pair. |
| 189 | + if (!more || (pendingIndex >= pendingSize - 1)) { |
| 190 | + // nb. .apply and friends are *really slow*. Low-hanging fruit is to |
| 191 | + // expand this to literally pass pending[0], pending[1], ... etc, but |
| 192 | + // the output code expands pretty fast in this case. |
| 193 | + chunks.push(String.fromCharCode.apply(null, pending.subarray(0, pendingIndex))); |
| 194 | + |
| 195 | + if (!more) { |
| 196 | + return chunks.join(''); |
| 197 | + } |
| 198 | + |
| 199 | + // Move the buffer forward and create another chunk. |
| 200 | + bytes = bytes.subarray(inputIndex); |
| 201 | + inputIndex = 0; |
| 202 | + pendingIndex = 0; |
| 203 | + } |
| 204 | + |
| 205 | + // The native TextDecoder will generate "REPLACEMENT CHARACTER" where the |
| 206 | + // input data is invalid. Here, we blindly parse the data even if it's |
| 207 | + // wrong: e.g., if a 3-byte sequence doesn't have two valid continuations. |
| 208 | + |
| 209 | + const byte1 = bytes[inputIndex++]; |
| 210 | + if ((byte1 & 0x80) === 0) { // 1-byte or null |
| 211 | + pending[pendingIndex++] = byte1; |
| 212 | + } else if ((byte1 & 0xe0) === 0xc0) { // 2-byte |
| 213 | + const byte2 = bytes[inputIndex++] & 0x3f; |
| 214 | + pending[pendingIndex++] = ((byte1 & 0x1f) << 6) | byte2; |
| 215 | + } else if ((byte1 & 0xf0) === 0xe0) { // 3-byte |
| 216 | + const byte2 = bytes[inputIndex++] & 0x3f; |
| 217 | + const byte3 = bytes[inputIndex++] & 0x3f; |
| 218 | + pending[pendingIndex++] = ((byte1 & 0x1f) << 12) | (byte2 << 6) | byte3; |
| 219 | + } else if ((byte1 & 0xf8) === 0xf0) { // 4-byte |
| 220 | + const byte2 = bytes[inputIndex++] & 0x3f; |
| 221 | + const byte3 = bytes[inputIndex++] & 0x3f; |
| 222 | + const byte4 = bytes[inputIndex++] & 0x3f; |
| 223 | + |
| 224 | + // this can be > 0xffff, so possibly generate surrogates |
| 225 | + let codepoint = ((byte1 & 0x07) << 0x12) | (byte2 << 0x0c) | (byte3 << 0x06) | byte4; |
| 226 | + if (codepoint > 0xffff) { |
| 227 | + // codepoint &= ~0x10000; |
| 228 | + codepoint -= 0x10000; |
| 229 | + pending[pendingIndex++] = (codepoint >>> 10) & 0x3ff | 0xd800; |
| 230 | + codepoint = 0xdc00 | codepoint & 0x3ff; |
| 231 | + } |
| 232 | + pending[pendingIndex++] = codepoint; |
| 233 | + } else { |
| 234 | + // invalid initial byte |
| 235 | + } |
| 236 | + } |
| 237 | +} |
| 238 | + |
| 239 | +// Decoding a string is pretty slow, but use alternative options where possible. |
| 240 | +let decodeImpl = decodeFallback; |
| 241 | +if (typeof Buffer === 'function' && Buffer.from) { |
| 242 | + // Buffer.from was added in Node v5.10.0 (2015-11-17). |
| 243 | + decodeImpl = decodeBuffer; |
| 244 | +} else if (typeof Blob === 'function' && typeof URL === 'function' && typeof URL.createObjectURL === 'function') { |
| 245 | + // Blob and URL.createObjectURL are available from IE10, Safari 6, Chrome 19 |
| 246 | + // (all released in 2012), Firefox 19 (2013), ... |
| 247 | + decodeImpl = decodeSyncXHR; |
| 248 | +} |
| 249 | + |
| 250 | +/** |
| 251 | + * @param {(!ArrayBuffer|!ArrayBufferView)} buffer |
| 252 | + * @param {{stream: boolean}=} options |
| 253 | + * @return {string} |
| 254 | + */ |
| 255 | +FastTextDecoder.prototype['decode'] = function(buffer, options={stream: false}) { |
| 256 | + if (options['stream']) { |
| 257 | + throw new Error(`Failed to decode: the 'stream' option is unsupported.`); |
| 258 | + } |
| 259 | + |
| 260 | + if (!buffer) |
| 261 | + return; |
| 262 | + |
| 263 | + let bytes; |
| 264 | + |
| 265 | + if (buffer instanceof Uint8Array) { |
| 266 | + // Accept Uint8Array instances as-is. |
| 267 | + bytes = buffer; |
| 268 | + } else if (buffer.buffer instanceof ArrayBuffer) { |
| 269 | + // Look for ArrayBufferView, which isn't a real type, but basically |
| 270 | + // represents all the valid TypedArray types plus DataView. They all have |
| 271 | + // ".buffer" as an instance of ArrayBuffer. |
| 272 | + bytes = new Uint8Array(buffer.buffer); |
| 273 | + } else { |
| 274 | + // The only other valid argument here is that "buffer" is an ArrayBuffer. |
| 275 | + // We also try to convert anything else passed to a Uint8Array, as this |
| 276 | + // catches anything that's array-like. Native code would throw here. |
| 277 | + bytes = new Uint8Array(buffer); |
| 278 | + } |
| 279 | + |
| 280 | + return decodeImpl(/** @type {!Uint8Array} */ (bytes)); |
| 281 | +} |
| 282 | + |
| 283 | +scope['TextEncoder'] = FastTextEncoder; |
| 284 | +scope['TextDecoder'] = FastTextDecoder; |
| 285 | + |
| 286 | +}(typeof window !== 'undefined' ? window : (typeof global !== 'undefined' ? global : this))); |
0 commit comments