Skip to content

Commit b8e0e0a

Browse files
fix(node): use source encoding in getText()
1 parent 5c560d3 commit b8e0e0a

File tree

3 files changed

+21
-11
lines changed

3 files changed

+21
-11
lines changed

src/main/java/io/github/treesitter/jtreesitter/Node.java

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ public final class Node {
2121
private final Tree tree;
2222
private @Nullable List<Node> children;
2323
private final Arena arena = Arena.ofAuto();
24+
private boolean wasEdited = false;
2425

2526
Node(MemorySegment self, Tree tree) {
2627
this.self = self;
@@ -409,10 +410,7 @@ public Optional<Node> getChildWithDescendant(Node descendant) {
409410

410411
/** Get the source code of the node, if available. */
411412
public @Nullable String getText() {
412-
var text = tree.getText();
413-
if (text == null) return null;
414-
var endByte = Math.min(getEndByte(), text.length());
415-
return text.substring(getStartByte(), endByte);
413+
return !wasEdited ? tree.getRegion(getStartByte(), getEndByte()) : null;
416414
}
417415

418416
/**
@@ -426,6 +424,7 @@ public Optional<Node> getChildWithDescendant(Node descendant) {
426424
*/
427425
public void edit(InputEdit edit) {
428426
ts_node_edit(self, edit.into(arena));
427+
wasEdited = true;
429428
children = null;
430429
}
431430

src/main/java/io/github/treesitter/jtreesitter/Parser.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ public Optional<Tree> parse(String source, InputEncoding encoding, @Nullable Tre
244244
var old = oldTree == null ? MemorySegment.NULL : oldTree.segment();
245245
var tree = ts_parser_parse_string_encoding(self, old, string, bytes.length, encoding.ordinal());
246246
if (tree.equals(MemorySegment.NULL)) return Optional.empty();
247-
return Optional.of(new Tree(tree, language, source));
247+
return Optional.of(new Tree(tree, language, source, encoding.charset()));
248248
}
249249
}
250250

@@ -299,7 +299,7 @@ public Optional<Tree> parse(ParseCallback callback, InputEncoding encoding, @Nul
299299
var old = oldTree == null ? MemorySegment.NULL : oldTree.segment();
300300
var tree = ts_parser_parse(self, old, input);
301301
if (tree.equals(MemorySegment.NULL)) return Optional.empty();
302-
return Optional.of(new Tree(tree, language, null));
302+
return Optional.of(new Tree(tree, language, null, null));
303303
}
304304

305305
/**

src/main/java/io/github/treesitter/jtreesitter/Tree.java

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import io.github.treesitter.jtreesitter.internal.TSRange;
66
import io.github.treesitter.jtreesitter.internal.TreeSitter;
77
import java.lang.foreign.*;
8+
import java.nio.charset.Charset;
89
import java.util.ArrayList;
910
import java.util.Collections;
1011
import java.util.List;
@@ -15,16 +16,18 @@
1516
@NullMarked
1617
public final class Tree implements AutoCloseable, Cloneable {
1718
private final MemorySegment self;
18-
private @Nullable String source;
19+
private byte[] source;
20+
private @Nullable Charset charset;
1921
private final Arena arena;
2022
private final Language language;
2123
private @Nullable List<Range> includedRanges;
2224

23-
Tree(MemorySegment self, Language language, @Nullable String source) {
25+
Tree(MemorySegment self, Language language, @Nullable String source, @Nullable Charset charset) {
2426
arena = Arena.ofShared();
2527
this.self = self.reinterpret(arena, TreeSitter::ts_tree_delete);
2628
this.language = language;
27-
this.source = source;
29+
this.source = source != null && charset != null ? source.getBytes(charset) : new byte[] {};
30+
this.charset = charset;
2831
}
2932

3033
private Tree(Tree tree) {
@@ -33,21 +36,28 @@ private Tree(Tree tree) {
3336
self = copy.reinterpret(arena, TreeSitter::ts_tree_delete);
3437
language = tree.language;
3538
source = tree.source;
39+
charset = tree.charset;
3640
includedRanges = tree.includedRanges;
3741
}
3842

3943
MemorySegment segment() {
4044
return self;
4145
}
4246

47+
@Nullable
48+
String getRegion(@Unsigned int start, @Unsigned int end) {
49+
var length = Math.min(end, source.length) - start;
50+
return charset != null ? new String(source, start, length, charset) : null;
51+
}
52+
4353
/** Get the language that was used to parse the syntax tree. */
4454
public Language getLanguage() {
4555
return language;
4656
}
4757

4858
/** Get the source code of the syntax tree, if available. */
4959
public @Nullable String getText() {
50-
return source;
60+
return charset != null ? new String(source, charset) : null;
5161
}
5262

5363
/** Get the root node of the syntax tree. */
@@ -122,7 +132,8 @@ public void edit(InputEdit edit) {
122132
try (var alloc = Arena.ofConfined()) {
123133
ts_tree_edit(self, edit.into(alloc));
124134
} finally {
125-
source = null;
135+
source = new byte[] {};
136+
charset = null;
126137
}
127138
}
128139

0 commit comments

Comments
 (0)