Skip to content

Commit e7b1865

Browse files
GearsDatapackslpil
authored andcommitted
Add support for utf16 and utf32 on the JavaScript target
1 parent f960226 commit e7b1865

File tree

3 files changed

+123
-9
lines changed

3 files changed

+123
-9
lines changed

compiler-core/src/javascript.rs

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -187,19 +187,35 @@ impl<'a> Generator<'a> {
187187

188188
if self.tracker.sized_integer_segment_used {
189189
self.register_prelude_usage(&mut imports, "sizedInt", None);
190-
};
190+
}
191191

192192
if self.tracker.string_bit_array_segment_used {
193193
self.register_prelude_usage(&mut imports, "stringBits", None);
194-
};
194+
}
195+
196+
if self.tracker.string_utf16_bit_array_segment_used {
197+
self.register_prelude_usage(&mut imports, "stringToUtf16", None);
198+
}
199+
200+
if self.tracker.string_utf32_bit_array_segment_used {
201+
self.register_prelude_usage(&mut imports, "stringToUtf32", None);
202+
}
195203

196204
if self.tracker.codepoint_bit_array_segment_used {
197205
self.register_prelude_usage(&mut imports, "codepointBits", None);
198-
};
206+
}
207+
208+
if self.tracker.codepoint_utf16_bit_array_segment_used {
209+
self.register_prelude_usage(&mut imports, "stringToUtf16", None);
210+
}
211+
212+
if self.tracker.codepoint_utf32_bit_array_segment_used {
213+
self.register_prelude_usage(&mut imports, "stringToUtf32", None);
214+
}
199215

200216
if self.tracker.float_bit_array_segment_used {
201217
self.register_prelude_usage(&mut imports, "sizedFloat", None);
202-
};
218+
}
203219

204220
let echo = if self.tracker.echo_used {
205221
if StdlibPackage::Present == self.stdlib_package {
@@ -861,7 +877,11 @@ pub(crate) struct UsageTracker {
861877
pub bit_array_slice_to_int_used: bool,
862878
pub sized_integer_segment_used: bool,
863879
pub string_bit_array_segment_used: bool,
880+
pub string_utf16_bit_array_segment_used: bool,
881+
pub string_utf32_bit_array_segment_used: bool,
864882
pub codepoint_bit_array_segment_used: bool,
883+
pub codepoint_utf16_bit_array_segment_used: bool,
884+
pub codepoint_utf32_bit_array_segment_used: bool,
865885
pub float_bit_array_segment_used: bool,
866886
pub echo_used: bool,
867887
}

compiler-core/src/javascript/expression.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,30 @@ impl<'module, 'a> Generator<'module, 'a> {
410410
Ok(docvec!["codepointBits(", value, ")"])
411411
}
412412

413+
// UTF16 strings
414+
[Opt::Utf16 { .. }] => {
415+
self.tracker.string_utf16_bit_array_segment_used = true;
416+
Ok(docvec!["stringToUtf16(", value, ")"])
417+
}
418+
419+
// UTF16 codepoints
420+
[Opt::Utf16Codepoint { .. }] => {
421+
self.tracker.codepoint_utf16_bit_array_segment_used = true;
422+
Ok(docvec!["codepointToUtf16(", value, ")"])
423+
}
424+
425+
// UTF32 strings
426+
[Opt::Utf32 { .. }] => {
427+
self.tracker.string_utf32_bit_array_segment_used = true;
428+
Ok(docvec!["stringToUtf32(", value, ")"])
429+
}
430+
431+
// UTF32 codepoints
432+
[Opt::Utf32Codepoint { .. }] => {
433+
self.tracker.codepoint_utf32_bit_array_segment_used = true;
434+
Ok(docvec!["codepointToUtf32(", value, ")"])
435+
}
436+
413437
// Bit arrays
414438
[Opt::Bits { .. }] => Ok(value),
415439

compiler-core/templates/prelude.mjs

Lines changed: 75 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ class ListIterator {
8181
}
8282
}
8383

84-
export class Empty extends List {}
84+
export class Empty extends List { }
8585

8686
export class NonEmpty extends List {
8787
constructor(head, tail) {
@@ -586,7 +586,7 @@ export function toBitArray(segments) {
586586
return new BitArray(segment);
587587
}
588588

589-
return new BitArray(new Uint8Array(/** @type {number[]} */ (segments)));
589+
return new BitArray(new Uint8Array(/** @type {number[]} */(segments)));
590590
}
591591

592592
// Count the total number of bits and check if all segments are numbers, i.e.
@@ -608,7 +608,7 @@ export function toBitArray(segments) {
608608
// If all segments are numbers then pass the segments array directly to the
609609
// Uint8Array constructor
610610
if (areAllSegmentsNumbers) {
611-
return new BitArray(new Uint8Array(/** @type {number[]} */ (segments)));
611+
return new BitArray(new Uint8Array(/** @type {number[]} */(segments)));
612612
}
613613

614614
// Pack the segments into a Uint8Array
@@ -1234,7 +1234,7 @@ function intFromUnalignedSliceUsingBigInt(
12341234

12351235
/**
12361236
* Interprets a 16-bit unsigned integer value as a 16-bit floating point value.
1237-
*
1237+
*
12381238
* @param {number} intValue
12391239
* @returns {number}
12401240
*/
@@ -1356,6 +1356,76 @@ export function codepointBits(codepoint) {
13561356
return stringBits(String.fromCodePoint(codepoint.value));
13571357
}
13581358

1359+
/**
1360+
* @internal
1361+
*
1362+
* Returns the UTF-16 bytes for a string.
1363+
*
1364+
* @param {string} string
1365+
* @returns {Uint8Array}
1366+
*/
1367+
export function stringToUtf16(string) {
1368+
const buffer = new ArrayBuffer(string.length * 2);
1369+
const bufferView = new Uint16Array(buffer);
1370+
1371+
for (let i = 0; i < string.length; i++) {
1372+
bufferView[i] = string.charCodeAt(i);
1373+
}
1374+
1375+
return new Uint8Array(buffer);
1376+
}
1377+
1378+
/**
1379+
* @internal
1380+
*
1381+
* Returns the UTF-16 bytes for a single UTF codepoint.
1382+
*
1383+
* @param {UtfCodepoint} codepoint
1384+
* @returns {Uint8Array}
1385+
*/
1386+
export function codePointToUtf16(codepoint) {
1387+
return stringToUtf16(String.fromCodePoint(codepoint.value));
1388+
}
1389+
1390+
/**
1391+
* @internal
1392+
*
1393+
* Returns the UTF-32 bytes for a string.
1394+
*
1395+
* @param {string} string
1396+
* @returns {Uint8Array}
1397+
*/
1398+
export function stringToUtf32(string) {
1399+
const buffer = new ArrayBuffer(string.length * 4);
1400+
const bufferView = new Uint32Array(buffer);
1401+
let length = 0;
1402+
1403+
for (let i = 0; i < string.length; i++) {
1404+
const codePoint = string.codePointAt(i);
1405+
1406+
bufferView[length] = codePoint;
1407+
length++;
1408+
1409+
if (codePoint > 0xFFFF) {
1410+
i++;
1411+
}
1412+
}
1413+
1414+
return new Uint8Array(buffer.slice(0, length * 4));
1415+
}
1416+
1417+
/**
1418+
* @internal
1419+
*
1420+
* Returns the UTF-32 bytes for a single UTF codepoint.
1421+
*
1422+
* @param {UtfCodepoint} codepoint
1423+
* @returns {Uint8Array}
1424+
*/
1425+
export function codePointToUtf32(codepoint) {
1426+
return stringToUtf32(String.fromCodePoint(codepoint.value));
1427+
}
1428+
13591429
export class Result extends CustomType {
13601430
// @internal
13611431
static isResult(data) {
@@ -1411,7 +1481,7 @@ export function isEqual(x, y) {
14111481
try {
14121482
if (a.equals(b)) continue;
14131483
else return false;
1414-
} catch {}
1484+
} catch { }
14151485
}
14161486

14171487
let [keys, get] = getters(a);

0 commit comments

Comments
 (0)