Skip to content

Commit c48b4e1

Browse files
feat!: update API to 0.25
1 parent feeff25 commit c48b4e1

File tree

12 files changed

+315
-55
lines changed

12 files changed

+315
-55
lines changed

scripts/jextract.ps1

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,35 +2,44 @@ $package = 'io.github.treesitter.jtreesitter.internal'
22
$output = "$($args[1])/generated-sources/jextract"
33
$lib = "$($args[0])/core/lib"
44

5-
& jextract.bat `
5+
& jextract.ps1 `
66
--include-struct TSInput `
77
--include-struct TSInputEdit `
88
--include-struct TSLogger `
99
--include-struct TSNode `
10+
--include-struct TSParseOptions `
11+
--include-struct TSParseState `
1012
--include-struct TSPoint `
1113
--include-struct TSQueryCapture `
14+
--include-struct TSQueryCursorOptions `
15+
--include-struct TSQueryCursorState `
1216
--include-struct TSQueryMatch `
1317
--include-struct TSQueryPredicateStep `
1418
--include-struct TSQueryPredicateStepType `
19+
--include-struct TSLanguageMetadata `
1520
--include-struct TSRange `
1621
--include-struct TSTreeCursor `
1722
--include-function free `
1823
--include-function malloc `
1924
--include-function calloc `
2025
--include-function realloc `
2126
--include-function ts_set_allocator `
27+
--include-function ts_language_abi_version `
2228
--include-function ts_language_copy `
2329
--include-function ts_language_delete `
2430
--include-function ts_language_field_count `
2531
--include-function ts_language_field_id_for_name `
2632
--include-function ts_language_field_name_for_id `
33+
--include-function ts_language_metadata `
34+
--include-function ts_language_name `
2735
--include-function ts_language_next_state `
2836
--include-function ts_language_state_count `
37+
--include-function ts_language_subtypes `
38+
--include-function ts_language_supertypes `
2939
--include-function ts_language_symbol_count `
3040
--include-function ts_language_symbol_for_name `
3141
--include-function ts_language_symbol_name `
3242
--include-function ts_language_symbol_type `
33-
--include-function ts_language_version `
3443
--include-function ts_lookahead_iterator_current_symbol `
3544
--include-function ts_lookahead_iterator_current_symbol_name `
3645
--include-function ts_lookahead_iterator_delete `
@@ -90,6 +99,7 @@ $lib = "$($args[0])/core/lib"
9099
--include-function ts_parser_parse `
91100
--include-function ts_parser_parse_string `
92101
--include-function ts_parser_parse_string_encoding `
102+
--include-function ts_parser_parse_with_options `
93103
--include-function ts_parser_print_dot_graphs `
94104
--include-function ts_parser_reset `
95105
--include-function ts_parser_set_cancellation_flag `
@@ -104,6 +114,7 @@ $lib = "$($args[0])/core/lib"
104114
--include-function ts_query_cursor_delete `
105115
--include-function ts_query_cursor_did_exceed_match_limit `
106116
--include-function ts_query_cursor_exec `
117+
--include-function ts_query_cursor_exec_with_options `
107118
--include-function ts_query_cursor_match_limit `
108119
--include-function ts_query_cursor_new `
109120
--include-function ts_query_cursor_next_capture `
@@ -157,7 +168,9 @@ $lib = "$($args[0])/core/lib"
157168
--include-function ts_tree_root_node_with_offset `
158169
--include-constant TREE_SITTER_LANGUAGE_VERSION `
159170
--include-constant TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION `
160-
--include-constant TSInputEncodingUTF16 `
171+
--include-constant TSInputEncodingCustom `
172+
--include-constant TSInputEncodingUTF16BE `
173+
--include-constant TSInputEncodingUTF16LE `
161174
--include-constant TSInputEncodingUTF8 `
162175
--include-constant TSLogTypeLex `
163176
--include-constant TSLogTypeParse `
@@ -180,6 +193,7 @@ $lib = "$($args[0])/core/lib"
180193
--include-constant TSSymbolTypeAuxiliary `
181194
--include-constant TSSymbolTypeRegular `
182195
--include-constant TSSymbolTypeSupertype `
196+
--include-typedef DecodeFunction `
183197
--header-class-name TreeSitter `
184198
--output $output `
185199
-t $package `

scripts/jextract.sh

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,30 +9,39 @@ exec jextract \
99
--include-struct TSInputEdit \
1010
--include-struct TSLogger \
1111
--include-struct TSNode \
12+
--include-struct TSParseOptions \
13+
--include-struct TSParseState \
1214
--include-struct TSPoint \
1315
--include-struct TSQueryCapture \
16+
--include-struct TSQueryCursorOptions \
17+
--include-struct TSQueryCursorState \
1418
--include-struct TSQueryMatch \
1519
--include-struct TSQueryPredicateStep \
1620
--include-struct TSQueryPredicateStepType \
21+
--include-struct TSLanguageMetadata \
1722
--include-struct TSRange \
1823
--include-struct TSTreeCursor \
1924
--include-function free \
2025
--include-function malloc \
2126
--include-function calloc \
2227
--include-function realloc \
2328
--include-function ts_set_allocator \
29+
--include-function ts_language_abi_version \
2430
--include-function ts_language_copy \
2531
--include-function ts_language_delete \
2632
--include-function ts_language_field_count \
2733
--include-function ts_language_field_id_for_name \
2834
--include-function ts_language_field_name_for_id \
35+
--include-function ts_language_metadata \
36+
--include-function ts_language_name \
2937
--include-function ts_language_next_state \
3038
--include-function ts_language_state_count \
39+
--include-function ts_language_subtypes \
40+
--include-function ts_language_supertypes \
3141
--include-function ts_language_symbol_count \
3242
--include-function ts_language_symbol_for_name \
3343
--include-function ts_language_symbol_name \
3444
--include-function ts_language_symbol_type \
35-
--include-function ts_language_version \
3645
--include-function ts_lookahead_iterator_current_symbol \
3746
--include-function ts_lookahead_iterator_current_symbol_name \
3847
--include-function ts_lookahead_iterator_delete \
@@ -92,6 +101,7 @@ exec jextract \
92101
--include-function ts_parser_parse \
93102
--include-function ts_parser_parse_string \
94103
--include-function ts_parser_parse_string_encoding \
104+
--include-function ts_parser_parse_with_options \
95105
--include-function ts_parser_print_dot_graphs \
96106
--include-function ts_parser_reset \
97107
--include-function ts_parser_set_cancellation_flag \
@@ -106,6 +116,7 @@ exec jextract \
106116
--include-function ts_query_cursor_delete \
107117
--include-function ts_query_cursor_did_exceed_match_limit \
108118
--include-function ts_query_cursor_exec \
119+
--include-function ts_query_cursor_exec_with_options \
109120
--include-function ts_query_cursor_match_limit \
110121
--include-function ts_query_cursor_new \
111122
--include-function ts_query_cursor_next_capture \
@@ -159,7 +170,9 @@ exec jextract \
159170
--include-function ts_tree_root_node_with_offset \
160171
--include-constant TREE_SITTER_LANGUAGE_VERSION \
161172
--include-constant TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION \
162-
--include-constant TSInputEncodingUTF16 \
173+
--include-constant TSInputEncodingCustom \
174+
--include-constant TSInputEncodingUTF16BE \
175+
--include-constant TSInputEncodingUTF16LE \
163176
--include-constant TSInputEncodingUTF8 \
164177
--include-constant TSLogTypeLex \
165178
--include-constant TSLogTypeParse \
@@ -182,6 +195,7 @@ exec jextract \
182195
--include-constant TSSymbolTypeAuxiliary \
183196
--include-constant TSSymbolTypeRegular \
184197
--include-constant TSSymbolTypeSupertype \
198+
--include-typedef DecodeFunction \
185199
--header-class-name TreeSitter \
186200
--output "$output" \
187201
-t "$package" \

src/main/java/io/github/treesitter/jtreesitter/InputEncoding.java

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,18 @@
99
public enum InputEncoding {
1010
/** UTF-8 encoding. */
1111
UTF_8(StandardCharsets.UTF_8),
12-
/** UTF-16 encoding. */
13-
UTF_16(ByteOrder.nativeOrder() == ByteOrder.BIG_ENDIAN ? StandardCharsets.UTF_16BE : StandardCharsets.UTF_16LE);
12+
/**
13+
* UTF-16 little endian encoding.
14+
*
15+
* @since 0.25.0
16+
*/
17+
UTF_16LE(StandardCharsets.UTF_16LE),
18+
/**
19+
* UTF-16 big endian encoding.
20+
*
21+
* @since 0.25.0
22+
*/
23+
UTF_16BE(StandardCharsets.UTF_16BE);
1424

1525
private final @NonNull Charset charset;
1626

@@ -22,17 +32,23 @@ Charset charset() {
2232
return charset;
2333
}
2434

35+
private static final boolean IS_BIG_ENDIAN = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN);
36+
2537
/**
2638
* Convert a standard {@linkplain Charset} to an {@linkplain InputEncoding}.
2739
*
28-
* @param charset one of {@link StandardCharsets#UTF_8} or {@link StandardCharsets#UTF_16} ({@link StandardCharsets#UTF_16LE UTF_16LE} and {@link StandardCharsets#UTF_16BE UTF_16BE} will work too, but native byte order will be used)
40+
* @param charset one of {@link StandardCharsets#UTF_8}, {@link StandardCharsets#UTF_16BE},
41+
* {@link StandardCharsets#UTF_16LE}, or {@link StandardCharsets#UTF_16} (native byte order).
2942
* @throws IllegalArgumentException If the character set is invalid.
3043
*/
44+
@SuppressWarnings("SameParameterValue")
3145
static @NonNull InputEncoding valueOf(@NonNull Charset charset) throws IllegalArgumentException {
3246
if (charset.equals(StandardCharsets.UTF_8)) return InputEncoding.UTF_8;
33-
if (charset.equals(StandardCharsets.UTF_16BE)
34-
|| charset.equals(StandardCharsets.UTF_16LE)
35-
|| charset.equals(StandardCharsets.UTF_16)) return InputEncoding.UTF_16;
47+
if (charset.equals(StandardCharsets.UTF_16BE)) return InputEncoding.UTF_16BE;
48+
if (charset.equals(StandardCharsets.UTF_16LE)) return InputEncoding.UTF_16LE;
49+
if (charset.equals(StandardCharsets.UTF_16)) {
50+
return IS_BIG_ENDIAN ? InputEncoding.UTF_16BE : InputEncoding.UTF_16LE;
51+
}
3652
throw new IllegalArgumentException("Invalid character set: %s".formatted(charset));
3753
}
3854
}

src/main/java/io/github/treesitter/jtreesitter/Language.java

Lines changed: 71 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import static io.github.treesitter.jtreesitter.internal.TreeSitter.*;
44

5+
import io.github.treesitter.jtreesitter.internal.TSLanguageMetadata;
56
import java.lang.foreign.*;
67
import org.jspecify.annotations.NullMarked;
78
import org.jspecify.annotations.Nullable;
@@ -40,7 +41,7 @@ public final class Language implements Cloneable {
4041
*/
4142
public Language(MemorySegment self) throws IllegalArgumentException {
4243
this.self = self.asReadOnly();
43-
version = ts_language_version(this.self);
44+
version = ts_language_abi_version(this.self);
4445
if (version < MIN_COMPATIBLE_LANGUAGE_VERSION || version > LANGUAGE_VERSION) {
4546
throw new IllegalArgumentException(String.format(
4647
"Incompatible language version %d. Must be between %d and %d.",
@@ -87,13 +88,51 @@ MemorySegment segment() {
8788
/**
8889
* Get the ABI version number for this language.
8990
*
90-
* <p>When a language is generated by the Tree-sitter CLI, it is assigned
91-
* an ABI version number that corresponds to the current CLI version.
91+
* <p>This version number is used to ensure that languages
92+
* were generated by a compatible version of Tree-sitter.
93+
*
94+
* @since 0.25.0
95+
*/
96+
public @Unsigned int getAbiVersion() {
97+
return version;
98+
}
99+
100+
/**
101+
* Get the ABI version number for this language.
102+
*
103+
* @deprecated Use {@link #getAbiVersion} instead.
92104
*/
105+
@Deprecated(since = "0.25.0", forRemoval = true)
93106
public @Unsigned int getVersion() {
94107
return version;
95108
}
96109

110+
/** Get the name of this language, if available. */
111+
public @Nullable String getName() {
112+
var name = ts_language_name(self);
113+
return name.equals(MemorySegment.NULL) ? null : name.getString(0);
114+
}
115+
116+
/**
117+
* Get the metadata for this language, if available.
118+
*
119+
* @apiNote This information is generated by the Tree-sitter
120+
* CLI and relies on the language author providing the correct
121+
* metadata in the language's {@code tree-sitter.json} file.
122+
*
123+
* @since 0.25.0
124+
*/
125+
public @Nullable LanguageMetadata getMetadata() {
126+
var metadata = ts_language_metadata(self);
127+
if (metadata.equals(MemorySegment.NULL)) return null;
128+
129+
short major = TSLanguageMetadata.major_version(metadata);
130+
short minor = TSLanguageMetadata.minor_version(metadata);
131+
short patch = TSLanguageMetadata.patch_version(metadata);
132+
var version = new LanguageMetadata.Version(major, minor, patch);
133+
return new LanguageMetadata(version);
134+
}
135+
97136
/** Get the number of distinct node types in this language. */
98137
public @Unsigned int getSymbolCount() {
99138
return ts_language_symbol_count(self);
@@ -109,6 +148,35 @@ MemorySegment segment() {
109148
return ts_language_field_count(self);
110149
}
111150

151+
/**
152+
* Get all supertype symbols for the language.
153+
*
154+
* @since 0.25.0
155+
*/
156+
public @Unsigned short[] getSupertypes() {
157+
try (var alloc = Arena.ofConfined()) {
158+
var length = alloc.allocate(C_INT.byteSize(), C_INT.byteAlignment());
159+
var supertypes = ts_language_supertypes(self, length);
160+
var isEmpty = length.get(C_INT, 0) == 0;
161+
return isEmpty ? new short[0] : supertypes.toArray(C_SHORT);
162+
}
163+
}
164+
165+
/**
166+
* Get all symbols for a given supertype symbol.
167+
*
168+
* @since 0.25.0
169+
* @see #getSupertypes()
170+
*/
171+
public @Unsigned short[] getSubtypes(@Unsigned short supertype) {
172+
try (var alloc = Arena.ofConfined()) {
173+
var length = alloc.allocate(C_INT.byteSize(), C_INT.byteAlignment());
174+
var subtypes = ts_language_subtypes(self, supertype, length);
175+
var isEmpty = length.get(C_INT, 0) == 0;
176+
return isEmpty ? new short[0] : subtypes.toArray(C_SHORT);
177+
}
178+
}
179+
112180
/** Get the node type for the given numerical ID. */
113181
public @Nullable String getSymbolName(@Unsigned short symbol) {
114182
var name = ts_language_symbol_name(self, symbol);
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
package io.github.treesitter.jtreesitter;
2+
3+
import org.jspecify.annotations.NullMarked;
4+
5+
/**
6+
* The metadata associated with a {@linkplain Language}.
7+
*
8+
* @since 0.25.0
9+
*/
10+
@NullMarked
11+
public record LanguageMetadata(Version version) {
12+
/**
13+
* The <a href="https://semver.org/">Semantic Version</a> of the {@linkplain Language}.
14+
*
15+
* <p>This version information may be used to signal if a given parser
16+
* is incompatible with existing queries when upgrading between versions.
17+
*
18+
* @since 0.25.0
19+
*/
20+
public record Version(@Unsigned short major, @Unsigned short minor, @Unsigned short patch) {
21+
@Override
22+
public String toString() {
23+
return "%d.%d.%d".formatted(major, minor, patch);
24+
}
25+
}
26+
}

src/main/java/io/github/treesitter/jtreesitter/NativeLibraryLookup.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
* by listing their fully qualified class name in a resource file named
1010
* {@code META-INF/services/io.github.treesitter.jtreesitter.NativeLibraryLookup}.
1111
*
12+
* @since 0.25.0
1213
* @see java.util.ServiceLoader
1314
*/
1415
@FunctionalInterface
@@ -17,6 +18,7 @@ public interface NativeLibraryLookup {
1718
* Get the {@link SymbolLookup} to be used for the tree-sitter native library.
1819
*
1920
* @param arena The arena that will manage the native memory.
21+
* @since 0.25.0
2022
*/
2123
SymbolLookup get(Arena arena);
2224
}

src/main/java/io/github/treesitter/jtreesitter/Node.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -434,7 +434,7 @@ public void edit(InputEdit edit) {
434434
children = null;
435435
}
436436

437-
/** Create a new tree cursor starting from this node. */
437+
/** Create a new {@linkplain TreeCursor tree cursor} starting from this node. */
438438
public TreeCursor walk() {
439439
return new TreeCursor(this, tree);
440440
}

src/main/java/io/github/treesitter/jtreesitter/ParseCallback.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,7 @@ public interface ParseCallback extends BiFunction<Integer, Point, String> {
1212
*
1313
* @param offset the current byte offset
1414
* @param point the current point
15-
* @return A chunk of text or {@code null}
16-
* to indicate the end of the document.
15+
* @return A chunk of text or {@code null} to indicate the end of the document.
1716
*/
1817
@Override
1918
@Nullable

0 commit comments

Comments
 (0)