Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
* Runtime: make Obj.dup work with floats and boxed numbers (#1871)
* Runtime: delete BigStringReader, one should use UInt8ArrayReader instead
* Runtime: less conversion during un-marshalling (#1889)
* Runtime: use TextEncoder/TextDecoder for utf8-utf16 conversions
* Runtime/wasm: implement BLAKE2b primitives for Wasm (#1873)
* Runtime/wasm: support jsoo_env and keep track of backtrace status (#1881)
* Runtime/wasm: support unmarshaling compressed data (#1898)
Expand Down
162 changes: 39 additions & 123 deletions runtime/js/mlBytes.js
Original file line number Diff line number Diff line change
Expand Up @@ -79,115 +79,6 @@ function caml_sub_uint8_array_to_jsbytes(a, i, len) {
return s;
}

//Provides: caml_utf8_of_utf16
function caml_utf8_of_utf16(s) {
for (var b = "", t = b, c, d, i = 0, l = s.length; i < l; i++) {
c = s.charCodeAt(i);
if (c < 0x80) {
for (var j = i + 1; j < l && (c = s.charCodeAt(j)) < 0x80; j++);
if (j - i > 512) {
t.slice(0, 1);
b += t;
t = "";
b += s.slice(i, j);
} else t += s.slice(i, j);
if (j === l) break;
i = j;
}
if (c < 0x800) {
t += String.fromCharCode(0xc0 | (c >> 6));
t += String.fromCharCode(0x80 | (c & 0x3f));
} else if (c < 0xd800 || c > 0xdfff) {
t += String.fromCharCode(
0xe0 | (c >> 12),
0x80 | ((c >> 6) & 0x3f),
0x80 | (c & 0x3f),
);
} else if (
c > 0xdbff ||
i + 1 === l ||
(d = s.charCodeAt(i + 1)) < 0xdc00 ||
d > 0xdfff
) {
// Unmatched surrogate pair, replaced by \ufffd (replacement character)
t += "\xef\xbf\xbd";
} else {
i++;
c = (c << 10) + d - 0x35fdc00;
t += String.fromCharCode(
0xf0 | (c >> 18),
0x80 | ((c >> 12) & 0x3f),
0x80 | ((c >> 6) & 0x3f),
0x80 | (c & 0x3f),
);
}
if (t.length > 1024) {
t.slice(0, 1);
b += t;
t = "";
}
}
return b + t;
}

//Provides: caml_utf16_of_utf8
function caml_utf16_of_utf8(s) {
for (var b = "", t = "", c, c1, c2, v, i = 0, l = s.length; i < l; i++) {
c1 = s.charCodeAt(i);
if (c1 < 0x80) {
for (var j = i + 1; j < l && (c1 = s.charCodeAt(j)) < 0x80; j++);
if (j - i > 512) {
t.slice(0, 1);
b += t;
t = "";
b += s.slice(i, j);
} else t += s.slice(i, j);
if (j === l) break;
i = j;
}
v = 1;
if (++i < l && ((c2 = s.charCodeAt(i)) & -64) === 128) {
c = c2 + (c1 << 6);
if (c1 < 0xe0) {
v = c - 0x3080;
if (v < 0x80) v = 1;
} else {
v = 2;
if (++i < l && ((c2 = s.charCodeAt(i)) & -64) === 128) {
c = c2 + (c << 6);
if (c1 < 0xf0) {
v = c - 0xe2080;
if (v < 0x800 || (v > 0xd7ff && v < 0xe000)) v = 2;
} else {
v = 3;
if (
++i < l &&
((c2 = s.charCodeAt(i)) & -64) === 128 &&
c1 < 0xf5
) {
v = c2 - 0x3c82080 + (c << 6);
if (v < 0x10000 || v > 0x10ffff) v = 3;
}
}
}
}
}
if (v < 4) {
// Invalid sequence
i -= v;
t += "\ufffd";
} else if (v > 0xffff)
t += String.fromCharCode(0xd7c0 + (v >> 10), 0xdc00 + (v & 0x3ff));
else t += String.fromCharCode(v);
if (t.length > 1024) {
t.slice(0, 1);
b += t;
t = "";
}
}
return b + t;
}

//Provides: jsoo_is_ascii
function jsoo_is_ascii(s) {
// The regular expression gets better at around this point for all browsers
Expand Down Expand Up @@ -384,17 +275,28 @@ function caml_bytes_set(s, i, c) {
return caml_bytes_unsafe_set(s, i, c);
}

//Provides: jsoo_text_encoder
var jsoo_text_encoder = new TextEncoder();

//Provides: jsoo_text_decoder
var jsoo_text_decoder = new TextDecoder();

//Provides: caml_bytes_of_utf16_jsstring
//Requires: jsoo_is_ascii, caml_utf8_of_utf16, MlBytes
//Requires: MlBytes, jsoo_text_encoder
//Requires: jsoo_is_ascii
function caml_bytes_of_utf16_jsstring(s) {
var tag = 9 /* BYTES | ASCII */;
if (!jsoo_is_ascii(s))
(tag = 8) /* BYTES | NOT_ASCII */, (s = caml_utf8_of_utf16(s));
return new MlBytes(tag, s, s.length);
if (jsoo_is_ascii(s)) {
return new MlBytes(9, s, s.length);
} else {
var a = jsoo_text_encoder.encode(s);
return new MlBytes(4, a, a.length);
}
}

//Provides: MlBytes
//Requires: caml_convert_string_to_bytes, jsoo_is_ascii, caml_utf16_of_utf8
//Requires: caml_convert_string_to_bytes, jsoo_is_ascii
//Requires: caml_uint8_array_of_bytes
//Requires: jsoo_text_decoder
class MlBytes {
constructor(tag, contents, length) {
this.t = tag;
Expand All @@ -420,9 +322,9 @@ class MlBytes {
}

toUtf16() {
var r = this.toString();
if (this.t === 9) return r;
return caml_utf16_of_utf8(r);
if (this.t === 9) return this.c;
var a = caml_uint8_array_of_bytes(this);
return jsoo_text_decoder.decode(a);
}

slice() {
Expand Down Expand Up @@ -750,20 +652,35 @@ function caml_jsbytes_of_string(x) {
return x;
}

//Provides: jsoo_text_decoder_buff
var jsoo_text_decoder_buff = new ArrayBuffer(1024);

//Provides: caml_jsstring_of_string const
//Requires: jsoo_is_ascii, caml_utf16_of_utf8
//Requires: jsoo_is_ascii
//Requires: jsoo_text_decoder
//Requires: jsoo_text_decoder_buff
//If: js-string
function caml_jsstring_of_string(s) {
if (jsoo_is_ascii(s)) return s;
return caml_utf16_of_utf8(s);
var a =
s.length <= jsoo_text_decoder_buff.length
? new Uint8Array(jsoo_text_decoder_buff, 0, s.length)
: new Uint8Array(s.length);
for (var i = 0; i < s.length; i++) {
a[i] = s.charCodeAt(i);
}
return jsoo_text_decoder.decode(a);
}

//Provides: caml_string_of_jsstring const
//Requires: jsoo_is_ascii, caml_utf8_of_utf16, caml_string_of_jsbytes
//Requires: caml_string_of_array
//Requires: jsoo_text_encoder
//Requires: jsoo_is_ascii, caml_string_of_jsbytes
//If: js-string
function caml_string_of_jsstring(s) {
if (jsoo_is_ascii(s)) return caml_string_of_jsbytes(s);
else return caml_string_of_jsbytes(caml_utf8_of_utf16(s));
var a = jsoo_text_encoder.encode(s);
return caml_string_of_array(a);
}

//Provides: caml_bytes_of_jsbytes const
Expand Down Expand Up @@ -883,7 +800,6 @@ function caml_ml_bytes_content(s) {
}

//Provides: caml_is_ml_string
//Requires: jsoo_is_ascii
//If: js-string
function caml_is_ml_string(s) {
// biome-ignore lint/suspicious/noControlCharactersInRegex: expected
Expand Down
Loading