Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@
<artifactId>junit-jupiter-api</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-params</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
Expand Down
18 changes: 14 additions & 4 deletions src/main/java/io/github/treesitter/jtreesitter/InputEncoding.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
package io.github.treesitter.jtreesitter;

import static io.github.treesitter.jtreesitter.internal.TreeSitter.TSInputEncodingUTF16BE;
import static io.github.treesitter.jtreesitter.internal.TreeSitter.TSInputEncodingUTF16LE;
import static io.github.treesitter.jtreesitter.internal.TreeSitter.TSInputEncodingUTF8;

import java.nio.ByteOrder;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
Expand All @@ -8,30 +12,36 @@
/** The encoding of source code. */
public enum InputEncoding {
/** UTF-8 encoding. */
UTF_8(StandardCharsets.UTF_8),
UTF_8(StandardCharsets.UTF_8, TSInputEncodingUTF8()),
/**
* UTF-16 little endian encoding.
*
* @since 0.25.0
*/
UTF_16LE(StandardCharsets.UTF_16LE),
UTF_16LE(StandardCharsets.UTF_16LE, TSInputEncodingUTF16LE()),
/**
* UTF-16 big endian encoding.
*
* @since 0.25.0
*/
UTF_16BE(StandardCharsets.UTF_16BE);
UTF_16BE(StandardCharsets.UTF_16BE, TSInputEncodingUTF16BE());

private final @NonNull Charset charset;
private final int tsInputEncoding;

InputEncoding(@NonNull Charset charset) {
InputEncoding(@NonNull Charset charset, int tsInputEncoding) {
this.charset = charset;
this.tsInputEncoding = tsInputEncoding;
}

Charset charset() {
return charset;
}

int tsInputEncoding() {
return tsInputEncoding;
}

private static final boolean IS_BIG_ENDIAN = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN);

/**
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/io/github/treesitter/jtreesitter/Parser.java
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ public Optional<Tree> parse(String source, InputEncoding encoding, @Nullable Tre
var bytes = source.getBytes(encoding.charset());
var string = alloc.allocateFrom(C_CHAR, bytes);
var old = oldTree == null ? MemorySegment.NULL : oldTree.segment();
var tree = ts_parser_parse_string_encoding(self, old, string, bytes.length, encoding.ordinal());
var tree = ts_parser_parse_string_encoding(self, old, string, bytes.length, encoding.tsInputEncoding());
if (tree.equals(MemorySegment.NULL)) return Optional.empty();
return Optional.of(new Tree(tree, language, source, encoding.charset()));
}
Expand Down Expand Up @@ -303,7 +303,7 @@ public Optional<Tree> parse(

var input = TSInput.allocate(arena);
TSInput.payload(input, MemorySegment.NULL);
TSInput.encoding(input, encoding.ordinal());
TSInput.encoding(input, encoding.tsInputEncoding());
// NOTE: can't use _ because of palantir/palantir-java-format#934
var read = TSInput.read.allocate(
(payload, index, point, bytes) -> {
Expand Down
17 changes: 17 additions & 0 deletions src/test/java/io/github/treesitter/jtreesitter/ParserTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import java.util.List;
import java.util.concurrent.*;
import org.junit.jupiter.api.*;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.EnumSource;

class ParserTest {
private static Language language;
Expand Down Expand Up @@ -86,6 +88,21 @@ void parseUtf16() {
}
}

@ParameterizedTest
@EnumSource(InputEncoding.class)
@DisplayName("parse(encoding)")
void parseEncoding(InputEncoding encoding) {
parser.setLanguage(language);
var source = "var text = \"☕fi𝄞\";";
try (var tree = parser.parse(source, encoding).orElseThrow()) {
var rootNode = tree.getRootNode();

assertFalse(rootNode.isError());
assertEquals(source, tree.getText());
assertEquals(source, rootNode.getText());
}
}

@Test
@DisplayName("parse(logger)")
void parseLogger() {
Expand Down