Skip to content

Commit 109404f

Browse files
committed
Use max. 4 UTF8 bytes, see #19
1 parent ac00eff commit 109404f

File tree

9 files changed

+342
-259
lines changed

9 files changed

+342
-259
lines changed

README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ support, you can skip the Long.js include.
5353

5454
```html
5555
<script src="Long.min.js"></script>
56-
<script src="ByteBuffer.min.js"></script>
56+
<script src="ByteBufferAB.min.js"></script>
5757
```
5858

5959
```javascript
@@ -72,13 +72,13 @@ support, you can skip the Long.js config. [Require.js](http://requirejs.org/) ex
7272
require.config({
7373
"paths": {
7474
"Long": "/path/to/Long.js"
75-
"ByteBuffer": "/path/to/ByteBuffer.js"
75+
"ByteBuffer": "/path/to/ByteBufferAB.js"
7676
}
7777
});
7878
require(["ByteBuffer"], function(ByteBuffer) {
7979
var bb = new ByteBuffer();
80-
bb.writeLString("Hello world!").flip();
81-
alert(bb.readLString()+" from ByteBuffer.js");
80+
bb.writeIString("Hello world!").flip();
81+
alert(bb.readIString()+" from ByteBuffer.js");
8282
});
8383
```
8484

dist/ByteBufferAB.js

Lines changed: 47 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1822,14 +1822,14 @@
18221822
ByteBuffer.prototype.writeString = ByteBuffer.prototype.writeUTF8String;
18231823

18241824
/**
1825-
* Calculates the length of a string when being encoded as UTF8. JavaScript itself uses UTF-16, so that a string's
1826-
* `length` property does not reflect its actual UTF8 length if it contains code points larger than 0xFFFF.
1825+
* Calculates the number of UTF8 characters of a string. JavaScript itself uses UTF-16, so that a string's
1826+
* `length` property does not reflect its actual UTF8 size if it contains code points larger than 0xFFFF.
18271827
* @function
18281828
* @param {string} str String to calculate
1829-
* @returns {number} UTF8 length
1829+
* @returns {number} Number of UTF8 characters
18301830
* @expose
18311831
*/
1832-
ByteBuffer.calculateUTF8String = function(str) {
1832+
ByteBuffer.calculateUTF8Chars = function(str) {
18331833
var i = 0, n = 0;
18341834
while (i < str.length) {
18351835
i += str.codePointAt(i) < 0xFFFF ? 1 : 2;
@@ -1838,6 +1838,16 @@
18381838
return n;
18391839
};
18401840

1841+
/**
1842+
* Calculates the number of UTF8 bytes of a string.
1843+
* @param {string} str String to calculate
1844+
* @returns {number} Number of UTF8 bytes
1845+
* @expose
1846+
*/
1847+
ByteBuffer.calculateUTF8Bytes = function(str) {
1848+
return utf8_calc_string(str);
1849+
};
1850+
18411851
/**
18421852
* Reads an UTF8 encoded string.
18431853
* @param {number} length Number of characters or bytes to read
@@ -3279,19 +3289,6 @@
32793289
c = bb.view.getUint8(offset++);
32803290
d = bb.view.getUint8(offset++);
32813291
codePoint = ((a&0x07)<<18) | ((b&0x3F)<<12) | ((c&0x3F)<<6) | (d&0x3F);
3282-
} else if ((a&0xFC) === 0xF8) {
3283-
b = bb.view.getUint8(offset++);
3284-
c = bb.view.getUint8(offset++);
3285-
d = bb.view.getUint8(offset++);
3286-
e = bb.view.getUint8(offset++);
3287-
codePoint = ((a&0x03)<<24) | ((b&0x3F)<<18) | ((c&0x3F)<<12) | ((d&0x3F)<<6) | (e&0x3F);
3288-
} else if ((a&0xFE) === 0xFC) {
3289-
b = bb.view.getUint8(offset++);
3290-
c = bb.view.getUint8(offset++);
3291-
d = bb.view.getUint8(offset++);
3292-
e = bb.view.getUint8(offset++);
3293-
f = bb.view.getUint8(offset++);
3294-
codePoint = ((a&0x01)<<30) | ((b&0x3F)<<24) | ((c&0x3F)<<18) | ((d&0x3F)<<12) | ((e&0x3F)<<6) | (f&0x3F);
32953292
} else
32963293
throw(new RangeError("Illegal code point at offset "+offset+": 0x"+a.toString(16)));
32973294
return {
@@ -3300,6 +3297,38 @@
33003297
};
33013298
}
33023299

3300+
/**
3301+
* Calculates the actual number of bytes required to encode the specified char code.
3302+
* @param {number} codePoint Code point to encode
3303+
* @returns {number} Number of bytes required to encode the specified code point
3304+
* @inner
3305+
* @see http://en.wikipedia.org/wiki/UTF-8#Description
3306+
*/
3307+
function utf8_calc_char(codePoint) {
3308+
if (codePoint < 0)
3309+
throw(new RangeError("Illegal code point: -0x"+(-codePoint).toString(16)));
3310+
if (codePoint < 0x80) return 1;
3311+
else if (codePoint < 0x800) return 2;
3312+
else if (codePoint < 0x10000) return 3;
3313+
else if (codePoint < 0x110000) return 4;
3314+
else throw(new RangeError("Illegal code point: 0x"+codePoint.toString(16)));
3315+
}
3316+
3317+
/**
3318+
* Calculates the number of bytes required to store an UTF8 encoded string.
3319+
* @param {string} str String to calculate
3320+
* @returns {number} Number of bytes required
3321+
* @inner
3322+
*/
3323+
function utf8_calc_string(str) {
3324+
var i = 0, cp, n = 0;
3325+
while (i < str.length) {
3326+
n += utf8_calc_char(cp = str.codePointAt(i));
3327+
i += cp < 0xFFFF ? 1 : 2;
3328+
}
3329+
return n;
3330+
}
3331+
33033332
/**
33043333
* Encodes a single UTF8 character to the specified ByteBuffer backed by an ArrayBuffer. The ByteBuffer's offsets are
33053334
* not modified.
@@ -3323,63 +3352,16 @@
33233352
bb.view.setUint8(offset++, ((codePoint>>12)&0x0F)|0xE0);
33243353
bb.view.setUint8(offset++, ((codePoint>>6 )&0x3F)|0x80);
33253354
bb.view.setUint8(offset++, ( codePoint &0x3F)|0x80);
3326-
} else if (codePoint < 0x200000) {
3355+
} else if (codePoint < 0x110000) {
33273356
bb.view.setUint8(offset++, ((codePoint>>18)&0x07)|0xF0);
33283357
bb.view.setUint8(offset++, ((codePoint>>12)&0x3F)|0x80);
33293358
bb.view.setUint8(offset++, ((codePoint>>6 )&0x3F)|0x80);
33303359
bb.view.setUint8(offset++, ( codePoint &0x3F)|0x80);
3331-
} else if (codePoint < 0x4000000) {
3332-
bb.view.setUint8(offset++, ((codePoint>>24)&0x03)|0xF8);
3333-
bb.view.setUint8(offset++, ((codePoint>>18)&0x3F)|0x80);
3334-
bb.view.setUint8(offset++, ((codePoint>>12)&0x3F)|0x80);
3335-
bb.view.setUint8(offset++, ((codePoint>>6 )&0x3F)|0x80);
3336-
bb.view.setUint8(offset++, ( codePoint &0x3F)|0x80);
3337-
} else if (codePoint < 0x80000000) {
3338-
bb.view.setUint8(offset++, ((codePoint>>30)&0x01)|0xFC);
3339-
bb.view.setUint8(offset++, ((codePoint>>24)&0x3F)|0x80);
3340-
bb.view.setUint8(offset++, ((codePoint>>18)&0x3F)|0x80);
3341-
bb.view.setUint8(offset++, ((codePoint>>12)&0x3F)|0x80);
3342-
bb.view.setUint8(offset++, ((codePoint>>6 )&0x3F)|0x80);
3343-
bb.view.setUint8(offset++, ( codePoint &0x3F)|0x80);
33443360
} else
33453361
throw(new RangeError("Illegal code point: 0x"+codePoint.toString(16)));
33463362
return offset - start;
33473363
}
33483364

3349-
/**
3350-
* Calculates the actual number of bytes required to encode the specified char code.
3351-
* @param {number} codePoint Code point to encode
3352-
* @returns {number} Number of bytes required to encode the specified code point
3353-
* @inner
3354-
* @see http://en.wikipedia.org/wiki/UTF-8#Description
3355-
*/
3356-
function utf8_calc_char(codePoint) {
3357-
if (codePoint < 0)
3358-
throw(new RangeError("Illegal code point: -0x"+(-codePoint).toString(16)));
3359-
if (codePoint < 0x80) return 1;
3360-
else if (codePoint < 0x800) return 2;
3361-
else if (codePoint < 0x10000) return 3;
3362-
else if (codePoint < 0x200000) return 4;
3363-
else if (codePoint < 0x4000000) return 5;
3364-
else if (codePoint < 0x80000000) return 6;
3365-
else throw(new RangeError("Illegal code point: 0x"+codePoint.toString(16)));
3366-
}
3367-
3368-
/**
3369-
* Calculates the number of bytes required to store an UTF8 encoded string.
3370-
* @param {string} str String to calculate
3371-
* @returns {number} Number of bytes required
3372-
* @inner
3373-
*/
3374-
function utf8_calc_string(str) {
3375-
var i = 0, cp, n = 0;
3376-
while (i < str.length) {
3377-
n += utf8_calc_char(cp = str.codePointAt(i));
3378-
i += cp < 0xFFFF ? 1 : 2;
3379-
}
3380-
return n;
3381-
}
3382-
33833365

33843366
// encodings/utf8
33853367

0 commit comments

Comments
 (0)