Skip to content

Commit 0c3aaa7

Browse files
committed
feat(inputencoding): use C enum values
Is more robust than implicitly relying on Java and C enum having same values.
1 parent 875bfd2 commit 0c3aaa7

File tree

4 files changed

+38
-6
lines changed

4 files changed

+38
-6
lines changed

pom.xml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,11 @@
6363
<artifactId>junit-jupiter-api</artifactId>
6464
<scope>test</scope>
6565
</dependency>
66+
<dependency>
67+
<groupId>org.junit.jupiter</groupId>
68+
<artifactId>junit-jupiter-params</artifactId>
69+
<scope>test</scope>
70+
</dependency>
6671
</dependencies>
6772
<build>
6873
<plugins>

src/main/java/io/github/treesitter/jtreesitter/InputEncoding.java

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
package io.github.treesitter.jtreesitter;
22

3+
import static io.github.treesitter.jtreesitter.internal.TreeSitter.TSInputEncodingUTF16BE;
4+
import static io.github.treesitter.jtreesitter.internal.TreeSitter.TSInputEncodingUTF16LE;
5+
import static io.github.treesitter.jtreesitter.internal.TreeSitter.TSInputEncodingUTF8;
6+
37
import java.nio.ByteOrder;
48
import java.nio.charset.Charset;
59
import java.nio.charset.StandardCharsets;
@@ -8,30 +12,36 @@
812
/** The encoding of source code. */
913
public enum InputEncoding {
1014
/** UTF-8 encoding. */
11-
UTF_8(StandardCharsets.UTF_8),
15+
UTF_8(StandardCharsets.UTF_8, TSInputEncodingUTF8()),
1216
/**
1317
* UTF-16 little endian encoding.
1418
*
1519
* @since 0.25.0
1620
*/
17-
UTF_16LE(StandardCharsets.UTF_16LE),
21+
UTF_16LE(StandardCharsets.UTF_16LE, TSInputEncodingUTF16LE()),
1822
/**
1923
* UTF-16 big endian encoding.
2024
*
2125
* @since 0.25.0
2226
*/
23-
UTF_16BE(StandardCharsets.UTF_16BE);
27+
UTF_16BE(StandardCharsets.UTF_16BE, TSInputEncodingUTF16BE());
2428

2529
private final @NonNull Charset charset;
30+
private final int tsInputEncoding;
2631

27-
InputEncoding(@NonNull Charset charset) {
32+
InputEncoding(@NonNull Charset charset, int tsInputEncoding) {
2833
this.charset = charset;
34+
this.tsInputEncoding = tsInputEncoding;
2935
}
3036

3137
Charset charset() {
3238
return charset;
3339
}
3440

41+
int tsInputEncoding() {
42+
return tsInputEncoding;
43+
}
44+
3545
private static final boolean IS_BIG_ENDIAN = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN);
3646

3747
/**

src/main/java/io/github/treesitter/jtreesitter/Parser.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@ public Optional<Tree> parse(String source, InputEncoding encoding, @Nullable Tre
254254
var bytes = source.getBytes(encoding.charset());
255255
var string = alloc.allocateFrom(C_CHAR, bytes);
256256
var old = oldTree == null ? MemorySegment.NULL : oldTree.segment();
257-
var tree = ts_parser_parse_string_encoding(self, old, string, bytes.length, encoding.ordinal());
257+
var tree = ts_parser_parse_string_encoding(self, old, string, bytes.length, encoding.tsInputEncoding());
258258
if (tree.equals(MemorySegment.NULL)) return Optional.empty();
259259
return Optional.of(new Tree(tree, language, source, encoding.charset()));
260260
}
@@ -303,7 +303,7 @@ public Optional<Tree> parse(
303303

304304
var input = TSInput.allocate(arena);
305305
TSInput.payload(input, MemorySegment.NULL);
306-
TSInput.encoding(input, encoding.ordinal());
306+
TSInput.encoding(input, encoding.tsInputEncoding());
307307
// NOTE: can't use _ because of palantir/palantir-java-format#934
308308
var read = TSInput.read.allocate(
309309
(payload, index, point, bytes) -> {

src/test/java/io/github/treesitter/jtreesitter/ParserTest.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
import java.util.List;
99
import java.util.concurrent.*;
1010
import org.junit.jupiter.api.*;
11+
import org.junit.jupiter.params.ParameterizedTest;
12+
import org.junit.jupiter.params.provider.EnumSource;
1113

1214
class ParserTest {
1315
private static Language language;
@@ -86,6 +88,21 @@ void parseUtf16() {
8688
}
8789
}
8890

91+
@ParameterizedTest
92+
@EnumSource(InputEncoding.class)
93+
@DisplayName("parse(encoding)")
94+
void parseEncoding(InputEncoding encoding) {
95+
parser.setLanguage(language);
96+
var source = "var text = \"☕fi𝄞\";";
97+
try (var tree = parser.parse(source, encoding).orElseThrow()) {
98+
var rootNode = tree.getRootNode();
99+
100+
assertFalse(rootNode.isError());
101+
assertEquals(source, tree.getText());
102+
assertEquals(source, rootNode.getText());
103+
}
104+
}
105+
89106
@Test
90107
@DisplayName("parse(logger)")
91108
void parseLogger() {

0 commit comments

Comments
 (0)