Skip to content

Commit 1c10732

Browse files
committed
Preparations to replace utfx with something efficient eventually, see #60
1 parent ffd2d49 commit 1c10732

File tree

4 files changed

+150
-0
lines changed

4 files changed

+150
-0
lines changed

dist/bytebuffer-dataview.min.js.gz

0 Bytes
Binary file not shown.

dist/bytebuffer.min.js.gz

0 Bytes
Binary file not shown.

src/encodings/impl/utf8.js

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
// encodings/impl/utf8
2+
3+
/**
4+
* Encodes a standard JavaScript string (UTF16) to UTF8 bytes.
5+
* @param {string} src Source string
6+
* @param {number} srcOffset Source offset
7+
* @param {!ByteBuffer} dst Destination ByteBuffer
8+
* @param {number} dstOffset Destination offset
9+
* @param {number} count Number of char codes to encode
10+
* @returns {number} Number of bytes encoded
11+
* @inner
12+
*/
13+
function bytebuffer_utf8_encode(src, srcOffset, dst, dstOffset, count) {
14+
if (count === 0)
15+
return 0;
16+
var n = 0;
17+
//? // SET(varValue, varOffset, varTarget) with varTarget referencing a ByteBuffer
18+
do {
19+
var cc = src.charCodeAt(srcOffset++);
20+
--count;
21+
if (cc < 0x80) {
22+
n += 1;
23+
//? SET('cc', 'dstOffset++', 'dst');
24+
} else if (cc < 0x800) {
25+
n += 2;
26+
//? SET('0xC0 | (cc >> 6)', 'dstOffset++', 'dst');
27+
//? SET('0x80 | (cc & 0x3F)', 'dstOffset++', 'dst');
28+
} else if (cc < 0xD800 || cc >= 0xE000) {
29+
n += 3;
30+
//? SET('0xE0 | (cc >> 12)', 'dstOffset++', 'dst');
31+
//? SET('0x80 | ((cc >> 6) & 0x3F)', 'dstOffset++', 'dst');
32+
//? SET('0x80 | (cc & 0x3F)', 'dstOffset++', 'dst');
33+
} else { // surrogate
34+
if (count === 0)
35+
throw Error("truncated utf8 surrogate");
36+
cc = 0x10000 + (((cc & 0x3FF) << 10) | (src.charCodeAt(srcOffset++) & 0x3FF));
37+
--count;
38+
n += 4;
39+
//? SET('0xF0 | (cc >> 18)', 'dstOffset++', 'dst');
40+
//? SET('0x80 | ((cc >> 12) & 0x3F)', 'dstOffset++', 'dst');
41+
//? SET('0x80 | ((cc >> 6) & 0x3F)', 'dstOffset++', 'dst');
42+
//? SET('0x80 | (cc & 0x3F)', 'dstOffset++', 'dst');
43+
}
44+
} while (count > 0);
45+
return n;
46+
}
47+
48+
/**
49+
* Decodes UTF8 bytes to a standard JavaScript string (UTF16).
50+
* @param {!ByteBuffer} src Source ByteBuffer
51+
* @param {number} srcOffset Source offset
52+
* @param {number} count Number of bytes to decode
53+
* @returns {string} Decoded string
54+
* @inner
55+
*/
56+
function bytebuffer_utf8_decode(src, srcOffset, count) {
57+
if (count === 0)
58+
return "";
59+
var parts = [], // readily assembled parts
60+
batch = []; // char codes for batch processing
61+
//? // GET(varOffset, varTarget) with varTarget referencing a ByteBuffer
62+
while (count--) {
63+
var c = /*? GET('srcOffset++', 'src') */,
64+
c2, c3;
65+
switch (c >> 4) {
66+
case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
67+
batch.push(c);
68+
break;
69+
case 12: case 13:
70+
if (count < 1)
71+
throw Error("truncated utf8 sequence");
72+
c2 = /*? GET('srcOffset++', 'src') */;
73+
--count;
74+
batch.push(((c & 0x1F) << 6) | (c2 & 0x3F));
75+
break;
76+
case 14:
77+
if (count < 2)
78+
throw Error("truncated utf8 sequence");
79+
c2 = /*? GET('srcOffset++', 'src') */;
80+
c3 = /*? GET('srcOffset++', 'src') */;
81+
count -= 2;
82+
batch.push(((c & 0x0F) << 12) | ((c2 & 0x3F) << 6) | ((c3 & 0x3F) << 0));
83+
break;
84+
}
85+
if (batch.length > 1023) {
86+
parts.push(String.fromCharCode.apply(String, batch));
87+
batch.length = 0;
88+
}
89+
}
90+
if (batch.length > 0) {
91+
if (parts.length === 0)
92+
return String.fromCharCode.apply(String, batch);
93+
parts.push(String.fromCharCode.apply(String, batch));
94+
}
95+
return parts.join('');
96+
}
97+
98+
/**
99+
* Calculates the number of UTF8 bytes required to store a standard JavaScript string (UTF16).
100+
* @param {string} src Source string
101+
* @param {number} srcOffset Source offset
102+
* @param {number} count Number of char codes to calculate
103+
* @returns {number} Number of bytes required
104+
* @inner
105+
*/
106+
function bytebuffer_utf8_calculate(src, srcOffset, count) {
107+
if (count === 0)
108+
return 0;
109+
var n = 0;
110+
do {
111+
var cc = src.charCodeAt(srcOffset++);
112+
--count;
113+
if (cc < 0x80) {
114+
n += 1;
115+
} else if (cc < 0x800) {
116+
n += 2;
117+
} else if (cc < 0xD800 || cc >= 0xE000) {
118+
n += 3;
119+
} else {
120+
if (count === 0)
121+
throw Error("truncated utf8 surrogate");
122+
n += 4;
123+
}
124+
} while (count > 0);
125+
return n;
126+
}

src/macros.js

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,4 +195,28 @@ WRITE_UINT32_ARRAY = function(varValue, varOffset, varTarget, varEndian) {
195195
if (VERBOSE_MS) writeln(__+'// </WRITE_UINT32>');
196196
};
197197

198+
SET = function(varValue, varOffset, varTarget) { // with varTarget referencing a ByteBuffer
199+
if (typeof varValue === 'undefined') varValue = 'value';
200+
if (typeof varOffset === 'undefined') varOffset = 'offset';
201+
if (typeof varTarget === 'undefined') varTarget = 'this';
202+
if (NODE) {
203+
writeln(__+varTarget+'.buffer['+varOffset+'] = '+varValue+';');
204+
} else if (DATAVIEW) {
205+
writeln(__+varTarget+'.view.setUint8('+varValue+', '+varOffset+');');
206+
} else {
207+
writeln(__+varTarget+'.view['+varOffset+'] = '+varValue+';');
208+
}
209+
};
210+
211+
GET = function(varOffset, varTarget) { // with varTarget referencing a ByteBuffer
212+
if (typeof varOffset === 'undefined') varOffset = 'offset';
213+
if (typeof varTarget === 'undefined') varTarget = 'this';
214+
if (NODE) {
215+
write(varTarget+'.buffer['+varOffset+']');
216+
} else if (DATAVIEW) {
217+
write(varTarget+'.view.getUint8('+varOffset+')');
218+
} else {
219+
write(varTarget+'.view['+varOffset+']');
220+
}
221+
};
198222
//?.

0 commit comments

Comments
 (0)