Skip to content

Commit 6ce00b5

Browse files
authored
fix(parser): use native UTF-16 byte order (#41)
1 parent 9cedb49 commit 6ce00b5

File tree

2 files changed

+18
-5
lines changed

2 files changed

+18
-5
lines changed

src/main/java/io/github/treesitter/jtreesitter/InputEncoding.java

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package io.github.treesitter.jtreesitter;
22

3+
import java.nio.ByteOrder;
34
import java.nio.charset.Charset;
45
import java.nio.charset.StandardCharsets;
56
import org.jspecify.annotations.NonNull;
@@ -9,7 +10,7 @@ public enum InputEncoding {
910
/** UTF-8 encoding. */
1011
UTF_8(StandardCharsets.UTF_8),
1112
/** UTF-16 encoding. */
12-
UTF_16(StandardCharsets.UTF_16BE);
13+
UTF_16(ByteOrder.nativeOrder() == ByteOrder.BIG_ENDIAN ? StandardCharsets.UTF_16BE : StandardCharsets.UTF_16LE);
1314

1415
private final @NonNull Charset charset;
1516

@@ -24,12 +25,14 @@ Charset charset() {
2425
/**
2526
* Convert a standard {@linkplain Charset} to an {@linkplain InputEncoding}.
2627
*
27-
* @param charset one of {@link StandardCharsets#UTF_8} or {@link StandardCharsets#UTF_16BE}
28+
* @param charset one of {@link StandardCharsets#UTF_8} or {@link StandardCharsets#UTF_16} ({@link StandardCharsets#UTF_16LE UTF_16LE} and {@link StandardCharsets#UTF_16BE UTF_16BE} will work too, but native byte order will be used)
2829
* @throws IllegalArgumentException If the character set is invalid.
2930
*/
3031
static @NonNull InputEncoding valueOf(@NonNull Charset charset) throws IllegalArgumentException {
3132
if (charset.equals(StandardCharsets.UTF_8)) return InputEncoding.UTF_8;
32-
if (charset.equals(StandardCharsets.UTF_16BE)) return InputEncoding.UTF_16;
33+
if (charset.equals(StandardCharsets.UTF_16BE)
34+
|| charset.equals(StandardCharsets.UTF_16LE)
35+
|| charset.equals(StandardCharsets.UTF_16)) return InputEncoding.UTF_16;
3336
throw new IllegalArgumentException("Invalid character set: %s".formatted(charset));
3437
}
3538
}

src/test/java/io/github/treesitter/jtreesitter/ParserTest.java

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,11 @@ void setIncludedRanges() {
7878
void parseUtf8() {
7979
parser.setLanguage(language);
8080
try (var tree = parser.parse("class Foo {}").orElseThrow()) {
81-
assertEquals(12, tree.getRootNode().getEndByte());
81+
var rootNode = tree.getRootNode();
82+
83+
assertEquals(12, rootNode.getEndByte());
84+
assertFalse(rootNode.isError());
85+
assertEquals("(program (class_declaration name: (identifier) body: (class_body)))", rootNode.toSexp());
8286
}
8387
}
8488

@@ -87,7 +91,13 @@ void parseUtf8() {
8791
void parseUtf16() {
8892
parser.setLanguage(language);
8993
try (var tree = parser.parse("var java = \"💩\";", InputEncoding.UTF_16).orElseThrow()) {
90-
assertEquals(32, tree.getRootNode().getEndByte());
94+
var rootNode = tree.getRootNode();
95+
96+
assertEquals(32, rootNode.getEndByte());
97+
assertFalse(rootNode.isError());
98+
assertEquals(
99+
"(program (local_variable_declaration type: (type_identifier) declarator: (variable_declarator name: (identifier) value: (string_literal (string_fragment)))))",
100+
rootNode.toSexp());
91101
}
92102
}
93103

0 commit comments

Comments
 (0)