Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package io.github.treesitter.jtreesitter;

import java.nio.ByteOrder;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import org.jspecify.annotations.NonNull;
Expand All @@ -9,7 +10,7 @@ public enum InputEncoding {
/** UTF-8 encoding. */
UTF_8(StandardCharsets.UTF_8),
/** UTF-16 encoding. */
UTF_16(StandardCharsets.UTF_16BE);
UTF_16(ByteOrder.nativeOrder() == ByteOrder.BIG_ENDIAN ? StandardCharsets.UTF_16BE : StandardCharsets.UTF_16LE);

private final @NonNull Charset charset;

Expand All @@ -24,12 +25,14 @@ Charset charset() {
/**
* Convert a standard {@linkplain Charset} to an {@linkplain InputEncoding}.
*
* @param charset one of {@link StandardCharsets#UTF_8} or {@link StandardCharsets#UTF_16BE}
* @param charset one of {@link StandardCharsets#UTF_8} or {@link StandardCharsets#UTF_16} ({@link StandardCharsets#UTF_16LE UTF_16LE} and {@link StandardCharsets#UTF_16BE UTF_16BE} will work too, but native byte order will be used)
* @throws IllegalArgumentException If the character set is invalid.
*/
static @NonNull InputEncoding valueOf(@NonNull Charset charset) throws IllegalArgumentException {
if (charset.equals(StandardCharsets.UTF_8)) return InputEncoding.UTF_8;
if (charset.equals(StandardCharsets.UTF_16BE)) return InputEncoding.UTF_16;
if (charset.equals(StandardCharsets.UTF_16BE)
|| charset.equals(StandardCharsets.UTF_16LE)
|| charset.equals(StandardCharsets.UTF_16)) return InputEncoding.UTF_16;
throw new IllegalArgumentException("Invalid character set: %s".formatted(charset));
}
}
14 changes: 12 additions & 2 deletions src/test/java/io/github/treesitter/jtreesitter/ParserTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,11 @@ void setIncludedRanges() {
void parseUtf8() {
parser.setLanguage(language);
try (var tree = parser.parse("class Foo {}").orElseThrow()) {
assertEquals(12, tree.getRootNode().getEndByte());
var rootNode = tree.getRootNode();

assertEquals(12, rootNode.getEndByte());
assertFalse(rootNode.isError());
assertEquals("(program (class_declaration name: (identifier) body: (class_body)))", rootNode.toSexp());
}
}

Expand All @@ -87,7 +91,13 @@ void parseUtf8() {
void parseUtf16() {
parser.setLanguage(language);
try (var tree = parser.parse("var java = \"💩\";", InputEncoding.UTF_16).orElseThrow()) {
assertEquals(32, tree.getRootNode().getEndByte());
var rootNode = tree.getRootNode();

assertEquals(32, rootNode.getEndByte());
assertFalse(rootNode.isError());
assertEquals(
"(program (local_variable_declaration type: (type_identifier) declarator: (variable_declarator name: (identifier) value: (string_literal (string_fragment)))))",
rootNode.toSexp());
}
}

Expand Down
Loading