Skip to content

Commit 3016aaa

Browse files
GearsDatapackslpil
authored andcommitted
Support endianness for non-utf8 segments on js
1 parent 09a2b99 commit 3016aaa

File tree

3 files changed

+83
-26
lines changed

3 files changed

+83
-26
lines changed

compiler-core/src/javascript/expression.rs

Lines changed: 48 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -411,27 +411,67 @@ impl<'module, 'a> Generator<'module, 'a> {
411411
}
412412

413413
// UTF16 strings
414-
[Opt::Utf16 { .. }] => {
414+
[Opt::Utf16 { .. }]
415+
| [Opt::Utf16 { .. }, Opt::Big { .. }]
416+
| [Opt::Big { .. }, Opt::Utf16 { .. }] => {
415417
self.tracker.string_utf16_bit_array_segment_used = true;
416-
Ok(docvec!["stringToUtf16(", value, ")"])
418+
let is_big = "true".to_doc();
419+
Ok(docvec!["stringToUtf16(", value, ", ", is_big, ")"])
420+
}
421+
422+
[Opt::Utf16 { .. }, Opt::Little { .. }]
423+
| [Opt::Little { .. }, Opt::Utf16 { .. }] => {
424+
self.tracker.string_utf16_bit_array_segment_used = true;
425+
let is_big = "false".to_doc();
426+
Ok(docvec!["stringToUtf16(", value, ", ", is_big, ")"])
417427
}
418428

419429
// UTF16 codepoints
420-
[Opt::Utf16Codepoint { .. }] => {
430+
[Opt::Utf16Codepoint { .. }]
431+
| [Opt::Utf16Codepoint { .. }, Opt::Big { .. }]
432+
| [Opt::Big { .. }, Opt::Utf16Codepoint { .. }] => {
433+
self.tracker.codepoint_utf16_bit_array_segment_used = true;
434+
let is_big = "true".to_doc();
435+
Ok(docvec!["codepointToUtf16(", value, ", ", is_big, ")"])
436+
}
437+
438+
[Opt::Utf16Codepoint { .. }, Opt::Little { .. }]
439+
| [Opt::Little { .. }, Opt::Utf16Codepoint { .. }] => {
421440
self.tracker.codepoint_utf16_bit_array_segment_used = true;
422-
Ok(docvec!["codepointToUtf16(", value, ")"])
441+
let is_big = "false".to_doc();
442+
Ok(docvec!["codepointToUtf16(", value, ", ", is_big, ")"])
423443
}
424444

425445
// UTF32 strings
426-
[Opt::Utf32 { .. }] => {
446+
[Opt::Utf32 { .. }]
447+
| [Opt::Utf32 { .. }, Opt::Big { .. }]
448+
| [Opt::Big { .. }, Opt::Utf32 { .. }] => {
427449
self.tracker.string_utf32_bit_array_segment_used = true;
428-
Ok(docvec!["stringToUtf32(", value, ")"])
450+
let is_big = "true".to_doc();
451+
Ok(docvec!["stringToUtf32(", value, ", ", is_big, ")"])
452+
}
453+
454+
[Opt::Utf32 { .. }, Opt::Little { .. }]
455+
| [Opt::Little { .. }, Opt::Utf32 { .. }] => {
456+
self.tracker.string_utf32_bit_array_segment_used = true;
457+
let is_big = "false".to_doc();
458+
Ok(docvec!["stringToUtf32(", value, ", ", is_big, ")"])
429459
}
430460

431461
// UTF32 codepoints
432-
[Opt::Utf32Codepoint { .. }] => {
462+
[Opt::Utf32Codepoint { .. }]
463+
| [Opt::Utf32Codepoint { .. }, Opt::Big { .. }]
464+
| [Opt::Big { .. }, Opt::Utf32Codepoint { .. }] => {
465+
self.tracker.codepoint_utf32_bit_array_segment_used = true;
466+
let is_big = "true".to_doc();
467+
Ok(docvec!["codepointToUtf32(", value, ", ", is_big, ")"])
468+
}
469+
470+
[Opt::Utf32Codepoint { .. }, Opt::Little { .. }]
471+
| [Opt::Little { .. }, Opt::Utf32Codepoint { .. }] => {
433472
self.tracker.codepoint_utf32_bit_array_segment_used = true;
434-
Ok(docvec!["codepointToUtf32(", value, ")"])
473+
let is_big = "false".to_doc();
474+
Ok(docvec!["codepointToUtf32(", value, ", ", is_big, ")"])
435475
}
436476

437477
// Bit arrays

compiler-core/src/strings.rs

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
use ecow::EcoString;
22
use itertools::Itertools;
33

4+
use crate::ast::Endianness;
5+
46
/// Converts any escape sequences from the given string to their correct
57
/// bytewise UTF-8 representation and returns the resulting string.
68
pub fn convert_string_escape_chars(str: &EcoString) -> EcoString {
@@ -112,31 +114,42 @@ pub fn to_upper_camel_case(string: &str) -> EcoString {
112114
}
113115

114116
/// Converts a string into its UTF-16 representation in bytes
115-
pub fn string_to_utf16_bytes(string: &str) -> Vec<u8> {
117+
pub fn string_to_utf16_bytes(string: &str, endianness: Endianness) -> Vec<u8> {
116118
let mut bytes = Vec::with_capacity(string.len() * 2);
117119

118120
for character in string.chars() {
119121
let mut character_buffer = [0, 0];
120122

121123
_ = character.encode_utf16(&mut character_buffer);
122124

123-
bytes.extend(character_buffer[0].to_le_bytes());
125+
let first_two_bytes = match endianness {
126+
Endianness::Big => character_buffer[0].to_be_bytes(),
127+
Endianness::Little => character_buffer[0].to_le_bytes(),
128+
};
129+
bytes.extend(first_two_bytes);
124130

125131
if character_buffer[1] != 0 {
126-
bytes.extend(character_buffer[1].to_le_bytes());
132+
let next_two_bytes = match endianness {
133+
Endianness::Big => character_buffer[1].to_be_bytes(),
134+
Endianness::Little => character_buffer[1].to_le_bytes(),
135+
};
136+
bytes.extend(next_two_bytes);
127137
}
128138
}
129139

130140
bytes
131141
}
132142

133143
/// Converts a string into its UTF-32 representation in bytes
134-
pub fn string_to_utf32_bytes(string: &str) -> Vec<u8> {
144+
pub fn string_to_utf32_bytes(string: &str, endianness: Endianness) -> Vec<u8> {
135145
let mut bytes = Vec::with_capacity(string.len() * 4);
136146

137147
for character in string.chars() {
138-
let u32 = character as u32;
139-
bytes.extend(u32.to_le_bytes());
148+
let character_bytes = match endianness {
149+
Endianness::Big => (character as u32).to_be_bytes(),
150+
Endianness::Little => (character as u32).to_le_bytes(),
151+
};
152+
bytes.extend(character_bytes);
140153
}
141154

142155
bytes

compiler-core/templates/prelude.mjs

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1362,14 +1362,15 @@ export function codepointBits(codepoint) {
13621362
* Returns the UTF-16 bytes for a string.
13631363
*
13641364
* @param {string} string
1365+
* @param {boolean} isBigEndian
13651366
* @returns {Uint8Array}
13661367
*/
1367-
export function stringToUtf16(string) {
1368+
export function stringToUtf16(string, isBigEndian) {
13681369
const buffer = new ArrayBuffer(string.length * 2);
1369-
const bufferView = new Uint16Array(buffer);
1370+
const bufferView = new DataView(buffer);
13701371

13711372
for (let i = 0; i < string.length; i++) {
1372-
bufferView[i] = string.charCodeAt(i);
1373+
bufferView.setUint16(i * 2, string.charCodeAt(i), !isBigEndian);
13731374
}
13741375

13751376
return new Uint8Array(buffer);
@@ -1381,10 +1382,11 @@ export function stringToUtf16(string) {
13811382
* Returns the UTF-16 bytes for a single UTF codepoint.
13821383
*
13831384
* @param {UtfCodepoint} codepoint
1385+
* @param {boolean} isBigEndian
13841386
* @returns {Uint8Array}
13851387
*/
1386-
export function codePointToUtf16(codepoint) {
1387-
return stringToUtf16(String.fromCodePoint(codepoint.value));
1388+
export function codePointToUtf16(codepoint, isBigEndian) {
1389+
return stringToUtf16(String.fromCodePoint(codepoint.value), isBigEndian);
13881390
}
13891391

13901392
/**
@@ -1393,20 +1395,21 @@ export function codePointToUtf16(codepoint) {
13931395
* Returns the UTF-32 bytes for a string.
13941396
*
13951397
* @param {string} string
1398+
* @param {boolean} isBigEndian
13961399
* @returns {Uint8Array}
13971400
*/
1398-
export function stringToUtf32(string) {
1401+
export function stringToUtf32(string, isBigEndian) {
13991402
const buffer = new ArrayBuffer(string.length * 4);
1400-
const bufferView = new Uint32Array(buffer);
1403+
const bufferView = new DataView(buffer);
14011404
let length = 0;
14021405

14031406
for (let i = 0; i < string.length; i++) {
1404-
const codePoint = string.codePointAt(i);
1407+
const codepoint = string.codePointAt(i);
14051408

1406-
bufferView[length] = codePoint;
1409+
bufferView.setUint32(length * 4, codepoint, !isBigEndian)
14071410
length++;
14081411

1409-
if (codePoint > 0xFFFF) {
1412+
if (codepoint > 0xFFFF) {
14101413
i++;
14111414
}
14121415
}
@@ -1420,10 +1423,11 @@ export function stringToUtf32(string) {
14201423
* Returns the UTF-32 bytes for a single UTF codepoint.
14211424
*
14221425
* @param {UtfCodepoint} codepoint
1426+
* @param {boolean} isBigEndian
14231427
* @returns {Uint8Array}
14241428
*/
1425-
export function codePointToUtf32(codepoint) {
1426-
return stringToUtf32(String.fromCodePoint(codepoint.value));
1429+
export function codePointToUtf32(codepoint, isBigEndian) {
1430+
return stringToUtf32(String.fromCodePoint(codepoint.value), isBigEndian);
14271431
}
14281432

14291433
export class Result extends CustomType {

0 commit comments

Comments
 (0)