|
1822 | 1822 | ByteBuffer.prototype.writeString = ByteBuffer.prototype.writeUTF8String;
|
1823 | 1823 |
|
1824 | 1824 | /**
|
1825 |
| - * Calculates the length of a string when being encoded as UTF8. JavaScript itself uses UTF-16, so that a string's |
1826 |
| - * `length` property does not reflect its actual UTF8 length if it contains code points larger than 0xFFFF. |
| 1825 | + * Calculates the number of UTF8 characters of a string. JavaScript itself uses UTF-16, so that a string's |
| 1826 | + * `length` property does not reflect its actual UTF8 size if it contains code points larger than 0xFFFF. |
1827 | 1827 | * @function
|
1828 | 1828 | * @param {string} str String to calculate
|
1829 |
| - * @returns {number} UTF8 length |
| 1829 | + * @returns {number} Number of UTF8 characters |
1830 | 1830 | * @expose
|
1831 | 1831 | */
|
1832 |
| - ByteBuffer.calculateUTF8String = function(str) { |
| 1832 | + ByteBuffer.calculateUTF8Chars = function(str) { |
1833 | 1833 | var i = 0, n = 0;
|
1834 | 1834 | while (i < str.length) {
|
1835 | 1835 | i += str.codePointAt(i) < 0xFFFF ? 1 : 2;
|
|
1838 | 1838 | return n;
|
1839 | 1839 | };
|
1840 | 1840 |
|
| 1841 | + /** |
| 1842 | + * Calculates the number of UTF8 bytes of a string. |
| 1843 | + * @param {string} str String to calculate |
| 1844 | + * @returns {number} Number of UTF8 bytes |
| 1845 | + * @expose |
| 1846 | + */ |
| 1847 | + ByteBuffer.calculateUTF8Bytes = function(str) { |
| 1848 | + return utf8_calc_string(str); |
| 1849 | + }; |
| 1850 | + |
1841 | 1851 | /**
|
1842 | 1852 | * Reads an UTF8 encoded string.
|
1843 | 1853 | * @param {number} length Number of characters or bytes to read
|
|
3279 | 3289 | c = bb.view.getUint8(offset++);
|
3280 | 3290 | d = bb.view.getUint8(offset++);
|
3281 | 3291 | codePoint = ((a&0x07)<<18) | ((b&0x3F)<<12) | ((c&0x3F)<<6) | (d&0x3F);
|
3282 |
| - } else if ((a&0xFC) === 0xF8) { |
3283 |
| - b = bb.view.getUint8(offset++); |
3284 |
| - c = bb.view.getUint8(offset++); |
3285 |
| - d = bb.view.getUint8(offset++); |
3286 |
| - e = bb.view.getUint8(offset++); |
3287 |
| - codePoint = ((a&0x03)<<24) | ((b&0x3F)<<18) | ((c&0x3F)<<12) | ((d&0x3F)<<6) | (e&0x3F); |
3288 |
| - } else if ((a&0xFE) === 0xFC) { |
3289 |
| - b = bb.view.getUint8(offset++); |
3290 |
| - c = bb.view.getUint8(offset++); |
3291 |
| - d = bb.view.getUint8(offset++); |
3292 |
| - e = bb.view.getUint8(offset++); |
3293 |
| - f = bb.view.getUint8(offset++); |
3294 |
| - codePoint = ((a&0x01)<<30) | ((b&0x3F)<<24) | ((c&0x3F)<<18) | ((d&0x3F)<<12) | ((e&0x3F)<<6) | (f&0x3F); |
3295 | 3292 | } else
|
3296 | 3293 | throw(new RangeError("Illegal code point at offset "+offset+": 0x"+a.toString(16)));
|
3297 | 3294 | return {
|
|
3300 | 3297 | };
|
3301 | 3298 | }
|
3302 | 3299 |
|
| 3300 | + /** |
| 3301 | + * Calculates the actual number of bytes required to encode the specified char code. |
| 3302 | + * @param {number} codePoint Code point to encode |
| 3303 | + * @returns {number} Number of bytes required to encode the specified code point |
| 3304 | + * @inner |
| 3305 | + * @see http://en.wikipedia.org/wiki/UTF-8#Description |
| 3306 | + */ |
| 3307 | + function utf8_calc_char(codePoint) { |
| 3308 | + if (codePoint < 0) |
| 3309 | + throw(new RangeError("Illegal code point: -0x"+(-codePoint).toString(16))); |
| 3310 | + if (codePoint < 0x80) return 1; |
| 3311 | + else if (codePoint < 0x800) return 2; |
| 3312 | + else if (codePoint < 0x10000) return 3; |
| 3313 | + else if (codePoint < 0x110000) return 4; |
| 3314 | + else throw(new RangeError("Illegal code point: 0x"+codePoint.toString(16))); |
| 3315 | + } |
| 3316 | + |
| 3317 | + /** |
| 3318 | + * Calculates the number of bytes required to store an UTF8 encoded string. |
| 3319 | + * @param {string} str String to calculate |
| 3320 | + * @returns {number} Number of bytes required |
| 3321 | + * @inner |
| 3322 | + */ |
| 3323 | + function utf8_calc_string(str) { |
| 3324 | + var i = 0, cp, n = 0; |
| 3325 | + while (i < str.length) { |
| 3326 | + n += utf8_calc_char(cp = str.codePointAt(i)); |
| 3327 | + i += cp < 0xFFFF ? 1 : 2; |
| 3328 | + } |
| 3329 | + return n; |
| 3330 | + } |
| 3331 | + |
3303 | 3332 | /**
|
3304 | 3333 | * Encodes a single UTF8 character to the specified ByteBuffer backed by an ArrayBuffer. The ByteBuffer's offsets are
|
3305 | 3334 | * not modified.
|
|
3323 | 3352 | bb.view.setUint8(offset++, ((codePoint>>12)&0x0F)|0xE0);
|
3324 | 3353 | bb.view.setUint8(offset++, ((codePoint>>6 )&0x3F)|0x80);
|
3325 | 3354 | bb.view.setUint8(offset++, ( codePoint &0x3F)|0x80);
|
3326 |
| - } else if (codePoint < 0x200000) { |
| 3355 | + } else if (codePoint < 0x110000) { |
3327 | 3356 | bb.view.setUint8(offset++, ((codePoint>>18)&0x07)|0xF0);
|
3328 | 3357 | bb.view.setUint8(offset++, ((codePoint>>12)&0x3F)|0x80);
|
3329 | 3358 | bb.view.setUint8(offset++, ((codePoint>>6 )&0x3F)|0x80);
|
3330 | 3359 | bb.view.setUint8(offset++, ( codePoint &0x3F)|0x80);
|
3331 |
| - } else if (codePoint < 0x4000000) { |
3332 |
| - bb.view.setUint8(offset++, ((codePoint>>24)&0x03)|0xF8); |
3333 |
| - bb.view.setUint8(offset++, ((codePoint>>18)&0x3F)|0x80); |
3334 |
| - bb.view.setUint8(offset++, ((codePoint>>12)&0x3F)|0x80); |
3335 |
| - bb.view.setUint8(offset++, ((codePoint>>6 )&0x3F)|0x80); |
3336 |
| - bb.view.setUint8(offset++, ( codePoint &0x3F)|0x80); |
3337 |
| - } else if (codePoint < 0x80000000) { |
3338 |
| - bb.view.setUint8(offset++, ((codePoint>>30)&0x01)|0xFC); |
3339 |
| - bb.view.setUint8(offset++, ((codePoint>>24)&0x3F)|0x80); |
3340 |
| - bb.view.setUint8(offset++, ((codePoint>>18)&0x3F)|0x80); |
3341 |
| - bb.view.setUint8(offset++, ((codePoint>>12)&0x3F)|0x80); |
3342 |
| - bb.view.setUint8(offset++, ((codePoint>>6 )&0x3F)|0x80); |
3343 |
| - bb.view.setUint8(offset++, ( codePoint &0x3F)|0x80); |
3344 | 3360 | } else
|
3345 | 3361 | throw(new RangeError("Illegal code point: 0x"+codePoint.toString(16)));
|
3346 | 3362 | return offset - start;
|
3347 | 3363 | }
|
3348 | 3364 |
|
3349 |
| - /** |
3350 |
| - * Calculates the actual number of bytes required to encode the specified char code. |
3351 |
| - * @param {number} codePoint Code point to encode |
3352 |
| - * @returns {number} Number of bytes required to encode the specified code point |
3353 |
| - * @inner |
3354 |
| - * @see http://en.wikipedia.org/wiki/UTF-8#Description |
3355 |
| - */ |
3356 |
| - function utf8_calc_char(codePoint) { |
3357 |
| - if (codePoint < 0) |
3358 |
| - throw(new RangeError("Illegal code point: -0x"+(-codePoint).toString(16))); |
3359 |
| - if (codePoint < 0x80) return 1; |
3360 |
| - else if (codePoint < 0x800) return 2; |
3361 |
| - else if (codePoint < 0x10000) return 3; |
3362 |
| - else if (codePoint < 0x200000) return 4; |
3363 |
| - else if (codePoint < 0x4000000) return 5; |
3364 |
| - else if (codePoint < 0x80000000) return 6; |
3365 |
| - else throw(new RangeError("Illegal code point: 0x"+codePoint.toString(16))); |
3366 |
| - } |
3367 |
| - |
3368 |
| - /** |
3369 |
| - * Calculates the number of bytes required to store an UTF8 encoded string. |
3370 |
| - * @param {string} str String to calculate |
3371 |
| - * @returns {number} Number of bytes required |
3372 |
| - * @inner |
3373 |
| - */ |
3374 |
| - function utf8_calc_string(str) { |
3375 |
| - var i = 0, cp, n = 0; |
3376 |
| - while (i < str.length) { |
3377 |
| - n += utf8_calc_char(cp = str.codePointAt(i)); |
3378 |
| - i += cp < 0xFFFF ? 1 : 2; |
3379 |
| - } |
3380 |
| - return n; |
3381 |
| - } |
3382 |
| - |
3383 | 3365 |
|
3384 | 3366 | // encodings/utf8
|
3385 | 3367 |
|
|
0 commit comments