Skip to content

Commit 86494da

Browse files
committed
Runtime: rely on TextEncoder and TextDecoder
1 parent 66c1115 commit 86494da

File tree

1 file changed

+29
-130
lines changed

1 file changed

+29
-130
lines changed

runtime/mlBytes.js

Lines changed: 29 additions & 130 deletions
Original file line numberDiff line numberDiff line change
@@ -82,111 +82,6 @@ function caml_subarray_to_jsbytes(a, i, len) {
8282
return s;
8383
}
8484

85-
//Provides: caml_utf8_of_utf16
86-
function caml_utf8_of_utf16(s) {
87-
for (var b = "", t = b, c, d, i = 0, l = s.length; i < l; i++) {
88-
c = s.charCodeAt(i);
89-
if (c < 0x80) {
90-
for (var j = i + 1; j < l && (c = s.charCodeAt(j)) < 0x80; j++);
91-
if (j - i > 512) {
92-
t.substr(0, 1);
93-
b += t;
94-
t = "";
95-
b += s.slice(i, j);
96-
} else t += s.slice(i, j);
97-
if (j == l) break;
98-
i = j;
99-
}
100-
if (c < 0x800) {
101-
t += String.fromCharCode(0xc0 | (c >> 6));
102-
t += String.fromCharCode(0x80 | (c & 0x3f));
103-
} else if (c < 0xd800 || c >= 0xdfff) {
104-
t += String.fromCharCode(
105-
0xe0 | (c >> 12),
106-
0x80 | ((c >> 6) & 0x3f),
107-
0x80 | (c & 0x3f),
108-
);
109-
} else if (
110-
c >= 0xdbff ||
111-
i + 1 == l ||
112-
(d = s.charCodeAt(i + 1)) < 0xdc00 ||
113-
d > 0xdfff
114-
) {
115-
// Unmatched surrogate pair, replaced by \ufffd (replacement character)
116-
t += "\xef\xbf\xbd";
117-
} else {
118-
i++;
119-
c = (c << 10) + d - 0x35fdc00;
120-
t += String.fromCharCode(
121-
0xf0 | (c >> 18),
122-
0x80 | ((c >> 12) & 0x3f),
123-
0x80 | ((c >> 6) & 0x3f),
124-
0x80 | (c & 0x3f),
125-
);
126-
}
127-
if (t.length > 1024) {
128-
t.substr(0, 1);
129-
b += t;
130-
t = "";
131-
}
132-
}
133-
return b + t;
134-
}
135-
136-
//Provides: caml_utf16_of_utf8
137-
function caml_utf16_of_utf8(s) {
138-
for (var b = "", t = "", c, c1, c2, v, i = 0, l = s.length; i < l; i++) {
139-
c1 = s.charCodeAt(i);
140-
if (c1 < 0x80) {
141-
for (var j = i + 1; j < l && (c1 = s.charCodeAt(j)) < 0x80; j++);
142-
if (j - i > 512) {
143-
t.substr(0, 1);
144-
b += t;
145-
t = "";
146-
b += s.slice(i, j);
147-
} else t += s.slice(i, j);
148-
if (j == l) break;
149-
i = j;
150-
}
151-
v = 1;
152-
if (++i < l && ((c2 = s.charCodeAt(i)) & -64) == 128) {
153-
c = c2 + (c1 << 6);
154-
if (c1 < 0xe0) {
155-
v = c - 0x3080;
156-
if (v < 0x80) v = 1;
157-
} else {
158-
v = 2;
159-
if (++i < l && ((c2 = s.charCodeAt(i)) & -64) == 128) {
160-
c = c2 + (c << 6);
161-
if (c1 < 0xf0) {
162-
v = c - 0xe2080;
163-
if (v < 0x800 || (v >= 0xd7ff && v < 0xe000)) v = 2;
164-
} else {
165-
v = 3;
166-
if (++i < l && ((c2 = s.charCodeAt(i)) & -64) == 128 && c1 < 0xf5) {
167-
v = c2 - 0x3c82080 + (c << 6);
168-
if (v < 0x10000 || v > 0x10ffff) v = 3;
169-
}
170-
}
171-
}
172-
}
173-
}
174-
if (v < 4) {
175-
// Invalid sequence
176-
i -= v;
177-
t += "\ufffd";
178-
} else if (v > 0xffff)
179-
t += String.fromCharCode(0xd7c0 + (v >> 10), 0xdc00 + (v & 0x3ff));
180-
else t += String.fromCharCode(v);
181-
if (t.length > 1024) {
182-
t.substr(0, 1);
183-
b += t;
184-
t = "";
185-
}
186-
}
187-
return b + t;
188-
}
189-
19085
//Provides: jsoo_is_ascii
19186
function jsoo_is_ascii(s) {
19287
// The regular expression gets better at around this point for all browsers
@@ -426,20 +321,18 @@ function caml_bytes_set(s, i, c) {
426321
}
427322

428323
//Provides: caml_bytes_of_utf16_jsstring
429-
//Requires: jsoo_is_ascii, caml_utf8_of_utf16, MlBytes
430-
function caml_bytes_of_utf16_jsstring(s) {
431-
var tag = 9 /* BYTES | ASCII */;
432-
if (!jsoo_is_ascii(s))
433-
(tag = 8) /* BYTES | NOT_ASCII */, (s = caml_utf8_of_utf16(s));
434-
return new MlBytes(tag, s, s.length);
324+
//Requires: MlBytes
325+
function caml_bytes_of_utf16_jsstring (s) {
326+
var e = new TextEncoder();
327+
var a = e.encode(s);
328+
return new MlBytes(4, a, a.length);
435329
}
436330

437331
//Provides: MlBytes
438-
//Requires: caml_convert_string_to_bytes, jsoo_is_ascii, caml_utf16_of_utf8
439-
function MlBytes(tag, contents, length) {
440-
this.t = tag;
441-
this.c = contents;
442-
this.l = length;
332+
//Requires: caml_convert_string_to_bytes, jsoo_is_ascii
333+
//Requires: caml_uint8_array_of_bytes
334+
function MlBytes (tag, contents, length) {
335+
this.t=tag; this.c=contents; this.l=length;
443336
}
444337
MlBytes.prototype.toString = function () {
445338
switch (this.t) {
@@ -457,12 +350,13 @@ MlBytes.prototype.toString = function () {
457350
return this.c;
458351
}
459352
};
460-
MlBytes.prototype.toUtf16 = function () {
461-
var r = this.toString();
462-
if (this.t == 9) return r;
463-
return caml_utf16_of_utf8(r);
464-
};
465-
MlBytes.prototype.slice = function () {
353+
MlBytes.prototype.toUtf16 = function (){
354+
if(this.t == 9) return this.c;
355+
var a = caml_uint8_array_of_bytes(this);
356+
let d = new TextDecoder();
357+
return d.decode(a);
358+
}
359+
MlBytes.prototype.slice = function (){
466360
var content = this.t == 4 ? this.c.slice() : this.c;
467361
return new MlBytes(this.t, content, this.l);
468362
};
@@ -775,19 +669,25 @@ function caml_jsbytes_of_string(x) {
775669
}
776670

777671
//Provides: caml_jsstring_of_string const
778-
//Requires: jsoo_is_ascii, caml_utf16_of_utf8
672+
//Requires: jsoo_is_ascii
779673
//If: js-string
780674
function caml_jsstring_of_string(s) {
781-
if (jsoo_is_ascii(s)) return s;
782-
return caml_utf16_of_utf8(s);
675+
if(jsoo_is_ascii(s)) return s;
676+
var a = new Uint8Array(s.length);
677+
for(var i = 0; i < s.length; i++){
678+
a[i] = s.charCodeAt(i);
679+
}
680+
var d = new TextDecoder();
681+
return d.decode(a);
783682
}
784683

785684
//Provides: caml_string_of_jsstring const
786-
//Requires: jsoo_is_ascii, caml_utf8_of_utf16, caml_string_of_jsbytes
685+
//Requires: caml_string_of_array
787686
//If: js-string
788-
function caml_string_of_jsstring(s) {
789-
if (jsoo_is_ascii(s)) return caml_string_of_jsbytes(s);
790-
else return caml_string_of_jsbytes(caml_utf8_of_utf16(s));
687+
function caml_string_of_jsstring (s) {
688+
var e = new TextEncoder();
689+
var a = e.encode(s);
690+
return caml_string_of_array(a);
791691
}
792692

793693
//Provides: caml_bytes_of_jsbytes const
@@ -907,7 +807,6 @@ function caml_ml_bytes_content(s) {
907807
}
908808

909809
//Provides: caml_is_ml_string
910-
//Requires: jsoo_is_ascii
911810
//If: js-string
912811
function caml_is_ml_string(s) {
913812
// biome-ignore lint/suspicious/noControlCharactersInRegex: expected

0 commit comments

Comments
 (0)