Skip to content

Commit 290eb2d

Browse files
committed
Runtime: rely on TextEncoder and TextDecoder
1 parent ef03e6f commit 290eb2d

File tree

1 file changed

+33
-124
lines changed

1 file changed

+33
-124
lines changed

runtime/js/mlBytes.js

Lines changed: 33 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -96,115 +96,6 @@ function caml_sub_uint8_array_to_jsbytes(a, i, len) {
9696
return s;
9797
}
9898

99-
//Provides: caml_utf8_of_utf16
100-
function caml_utf8_of_utf16(s) {
101-
for (var b = "", t = b, c, d, i = 0, l = s.length; i < l; i++) {
102-
c = s.charCodeAt(i);
103-
if (c < 0x80) {
104-
for (var j = i + 1; j < l && (c = s.charCodeAt(j)) < 0x80; j++);
105-
if (j - i > 512) {
106-
t.slice(0, 1);
107-
b += t;
108-
t = "";
109-
b += s.slice(i, j);
110-
} else t += s.slice(i, j);
111-
if (j === l) break;
112-
i = j;
113-
}
114-
if (c < 0x800) {
115-
t += String.fromCharCode(0xc0 | (c >> 6));
116-
t += String.fromCharCode(0x80 | (c & 0x3f));
117-
} else if (c < 0xd800 || c >= 0xdfff) {
118-
t += String.fromCharCode(
119-
0xe0 | (c >> 12),
120-
0x80 | ((c >> 6) & 0x3f),
121-
0x80 | (c & 0x3f),
122-
);
123-
} else if (
124-
c >= 0xdbff ||
125-
i + 1 === l ||
126-
(d = s.charCodeAt(i + 1)) < 0xdc00 ||
127-
d > 0xdfff
128-
) {
129-
// Unmatched surrogate pair, replaced by \ufffd (replacement character)
130-
t += "\xef\xbf\xbd";
131-
} else {
132-
i++;
133-
c = (c << 10) + d - 0x35fdc00;
134-
t += String.fromCharCode(
135-
0xf0 | (c >> 18),
136-
0x80 | ((c >> 12) & 0x3f),
137-
0x80 | ((c >> 6) & 0x3f),
138-
0x80 | (c & 0x3f),
139-
);
140-
}
141-
if (t.length > 1024) {
142-
t.slice(0, 1);
143-
b += t;
144-
t = "";
145-
}
146-
}
147-
return b + t;
148-
}
149-
150-
//Provides: caml_utf16_of_utf8
151-
function caml_utf16_of_utf8(s) {
152-
for (var b = "", t = "", c, c1, c2, v, i = 0, l = s.length; i < l; i++) {
153-
c1 = s.charCodeAt(i);
154-
if (c1 < 0x80) {
155-
for (var j = i + 1; j < l && (c1 = s.charCodeAt(j)) < 0x80; j++);
156-
if (j - i > 512) {
157-
t.slice(0, 1);
158-
b += t;
159-
t = "";
160-
b += s.slice(i, j);
161-
} else t += s.slice(i, j);
162-
if (j === l) break;
163-
i = j;
164-
}
165-
v = 1;
166-
if (++i < l && ((c2 = s.charCodeAt(i)) & -64) === 128) {
167-
c = c2 + (c1 << 6);
168-
if (c1 < 0xe0) {
169-
v = c - 0x3080;
170-
if (v < 0x80) v = 1;
171-
} else {
172-
v = 2;
173-
if (++i < l && ((c2 = s.charCodeAt(i)) & -64) === 128) {
174-
c = c2 + (c << 6);
175-
if (c1 < 0xf0) {
176-
v = c - 0xe2080;
177-
if (v < 0x800 || (v >= 0xd7ff && v < 0xe000)) v = 2;
178-
} else {
179-
v = 3;
180-
if (
181-
++i < l &&
182-
((c2 = s.charCodeAt(i)) & -64) === 128 &&
183-
c1 < 0xf5
184-
) {
185-
v = c2 - 0x3c82080 + (c << 6);
186-
if (v < 0x10000 || v > 0x10ffff) v = 3;
187-
}
188-
}
189-
}
190-
}
191-
}
192-
if (v < 4) {
193-
// Invalid sequence
194-
i -= v;
195-
t += "\ufffd";
196-
} else if (v > 0xffff)
197-
t += String.fromCharCode(0xd7c0 + (v >> 10), 0xdc00 + (v & 0x3ff));
198-
else t += String.fromCharCode(v);
199-
if (t.length > 1024) {
200-
t.slice(0, 1);
201-
b += t;
202-
t = "";
203-
}
204-
}
205-
return b + t;
206-
}
207-
20899
//Provides: jsoo_is_ascii
209100
function jsoo_is_ascii(s) {
210101
// The regular expression gets better at around this point for all browsers
@@ -401,17 +292,23 @@ function caml_bytes_set(s, i, c) {
401292
return caml_bytes_unsafe_set(s, i, c);
402293
}
403294

295+
//Provides: jsoo_text_encoder
296+
var jsoo_text_encoder = new TextEncoder();
297+
298+
//Provides: jsoo_text_decoder
299+
var jsoo_text_decoder = new TextDecoder();
300+
404301
//Provides: caml_bytes_of_utf16_jsstring
405-
//Requires: jsoo_is_ascii, caml_utf8_of_utf16, MlBytes
302+
//Requires: MlBytes, jsoo_text_encoder
406303
function caml_bytes_of_utf16_jsstring(s) {
407-
var tag = 9 /* BYTES | ASCII */;
408-
if (!jsoo_is_ascii(s))
409-
(tag = 8) /* BYTES | NOT_ASCII */, (s = caml_utf8_of_utf16(s));
410-
return new MlBytes(tag, s, s.length);
304+
var a = jsoo_text_encoder.encode(s);
305+
return new MlBytes(4, a, a.length);
411306
}
412307

413308
//Provides: MlBytes
414-
//Requires: caml_convert_string_to_bytes, jsoo_is_ascii, caml_utf16_of_utf8
309+
//Requires: caml_convert_string_to_bytes, jsoo_is_ascii
310+
//Requires: caml_uint8_array_of_bytes
311+
//Requires: jsoo_text_decoder
415312
class MlBytes {
416313
constructor(tag, contents, length) {
417314
this.t = tag;
@@ -437,9 +334,9 @@ class MlBytes {
437334
}
438335

439336
toUtf16() {
440-
var r = this.toString();
441-
if (this.t === 9) return r;
442-
return caml_utf16_of_utf8(r);
337+
if (this.t === 9) return this.c;
338+
var a = caml_uint8_array_of_bytes(this);
339+
return jsoo_text_decoder.decode(a);
443340
}
444341

445342
slice() {
@@ -767,20 +664,33 @@ function caml_jsbytes_of_string(x) {
767664
return x;
768665
}
769666

667+
//Provides: jsoo_text_decoder_buff
668+
var jsoo_text_decoder_buff = new ArrayBuffer(1024);
669+
770670
//Provides: caml_jsstring_of_string const
771-
//Requires: jsoo_is_ascii, caml_utf16_of_utf8
671+
//Requires: jsoo_is_ascii
672+
//Requires: jsoo_text_decoder
673+
//Requires: jsoo_text_decoder_buff
772674
//If: js-string
773675
function caml_jsstring_of_string(s) {
774676
if (jsoo_is_ascii(s)) return s;
775-
return caml_utf16_of_utf8(s);
677+
var a =
678+
s.length <= jsoo_text_decoder_buff.length
679+
? new Uint8Array(jsoo_text_decoder_buff, 0, s.length)
680+
: new Uint8Array(s.length);
681+
for (var i = 0; i < s.length; i++) {
682+
a[i] = s.charCodeAt(i);
683+
}
684+
return jsoo_text_decoder.decode(a);
776685
}
777686

778687
//Provides: caml_string_of_jsstring const
779-
//Requires: jsoo_is_ascii, caml_utf8_of_utf16, caml_string_of_jsbytes
688+
//Requires: caml_string_of_array
689+
//Requires: jsoo_text_encoder
780690
//If: js-string
781691
function caml_string_of_jsstring(s) {
782-
if (jsoo_is_ascii(s)) return caml_string_of_jsbytes(s);
783-
else return caml_string_of_jsbytes(caml_utf8_of_utf16(s));
692+
var a = jsoo_text_encoder.encode(s);
693+
return caml_string_of_array(a);
784694
}
785695

786696
//Provides: caml_bytes_of_jsbytes const
@@ -900,7 +810,6 @@ function caml_ml_bytes_content(s) {
900810
}
901811

902812
//Provides: caml_is_ml_string
903-
//Requires: jsoo_is_ascii
904813
//If: js-string
905814
function caml_is_ml_string(s) {
906815
// biome-ignore lint/suspicious/noControlCharactersInRegex: expected

0 commit comments

Comments
 (0)