|
| 1 | +/* |
| 2 | +* DAPjs |
| 3 | +* Copyright Arm Limited 2020 |
| 4 | +* |
| 5 | +* Permission is hereby granted, free of charge, to any person obtaining a copy |
| 6 | +* of this software and associated documentation files (the "Software"), to deal |
| 7 | +* in the Software without restriction, including without limitation the rights |
| 8 | +* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 9 | +* copies of the Software, and to permit persons to whom the Software is |
| 10 | +* furnished to do so, subject to the following conditions: |
| 11 | +* |
| 12 | +* The above copyright notice and this permission notice shall be included in all |
| 13 | +* copies or substantial portions of the Software. |
| 14 | +* |
| 15 | +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 16 | +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 17 | +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 18 | +* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 19 | +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 20 | +* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 21 | +* SOFTWARE. |
| 22 | +*/ |
| 23 | + |
| 24 | +// https://github.com/anonyco/FastestSmallestTextEncoderDecoder |
| 25 | + |
| 26 | +const PARTIAL_CHAR_TEST = /[\xc0-\xff][\x80-\xbf]*$/g; |
| 27 | +const DOUBLE_BYTE_REPLACE = /[\xc0-\xff][\x80-\xbf]*/g; |
| 28 | + |
| 29 | +export class TextDecoder { |
| 30 | + |
| 31 | + private partialChar: string | undefined; |
| 32 | + |
| 33 | + /** |
| 34 | + * Decode an ArrayBuffer to a string, handling double-byte characters |
| 35 | + * @param input The ArrayBuffer to decode |
| 36 | + */ |
| 37 | + public decode(input: ArrayBuffer): string { |
| 38 | + |
| 39 | + const numberArray = Array.prototype.slice.call(new Uint8Array(input)); |
| 40 | + let data = String.fromCodePoint.apply(undefined, numberArray); |
| 41 | + |
| 42 | + if (this.partialChar) { |
| 43 | + // Previous double-byte character was cut off |
| 44 | + data = `${this.partialChar}${data}`; |
| 45 | + this.partialChar = undefined; |
| 46 | + } |
| 47 | + |
| 48 | + const match = data.match(PARTIAL_CHAR_TEST); |
| 49 | + if (match) { |
| 50 | + // Partial double-byte character at end of string, save it and truncate data |
| 51 | + const length = match[0].length; |
| 52 | + this.partialChar = data.slice(-length); |
| 53 | + data = data.slice(0, -length); |
| 54 | + } |
| 55 | + |
| 56 | + return data.replace(DOUBLE_BYTE_REPLACE, this.decoderReplacer); |
| 57 | + } |
| 58 | + |
| 59 | + private decoderReplacer(encoded: string): string { |
| 60 | + let codePoint = encoded.codePointAt(0)! << 24; |
| 61 | + const leadingOnes = Math.clz32(~codePoint); |
| 62 | + let endPos = 0; |
| 63 | + const stringLen = encoded.length; |
| 64 | + let result = ""; |
| 65 | + if (leadingOnes < 5 && stringLen >= leadingOnes) { |
| 66 | + codePoint = (codePoint << leadingOnes) >>> (24 + leadingOnes); |
| 67 | + for (endPos = 1; endPos < leadingOnes; endPos = endPos + 1) { |
| 68 | + codePoint = (codePoint << 6) | (encoded.codePointAt(endPos)! & 0x3f); |
| 69 | + } |
| 70 | + if (codePoint <= 0xFFFF) { // BMP code point |
| 71 | + result += String.fromCodePoint(codePoint); |
| 72 | + } else if (codePoint <= 0x10FFFF) { |
| 73 | + // https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae |
| 74 | + codePoint = codePoint - 0x10000; |
| 75 | + result += String.fromCodePoint( |
| 76 | + (codePoint >> 10) + 0xD800, // highSurrogate |
| 77 | + (codePoint & 0x3ff) + 0xDC00 // lowSurrogate |
| 78 | + ); |
| 79 | + } else endPos = 0; // to fill it in with INVALIDs |
| 80 | + } |
| 81 | + for (; endPos < stringLen; endPos = endPos + 1) { |
| 82 | + result += "\ufffd"; // replacement character |
| 83 | + } |
| 84 | + return result; |
| 85 | + } |
| 86 | +} |
0 commit comments