diff --git a/src/main/java/io/github/treesitter/jtreesitter/Node.java b/src/main/java/io/github/treesitter/jtreesitter/Node.java index 6bc655b..9b2ae54 100644 --- a/src/main/java/io/github/treesitter/jtreesitter/Node.java +++ b/src/main/java/io/github/treesitter/jtreesitter/Node.java @@ -21,6 +21,7 @@ public final class Node { private final Tree tree; private @Nullable List children; private final Arena arena = Arena.ofAuto(); + private boolean wasEdited = false; Node(MemorySegment self, Tree tree) { this.self = self; @@ -409,10 +410,7 @@ public Optional getChildWithDescendant(Node descendant) { /** Get the source code of the node, if available. */ public @Nullable String getText() { - var text = tree.getText(); - if (text == null) return null; - var endByte = Math.min(getEndByte(), text.length()); - return text.substring(getStartByte(), endByte); + return !wasEdited ? tree.getRegion(getStartByte(), getEndByte()) : null; } /** @@ -426,6 +424,7 @@ public Optional getChildWithDescendant(Node descendant) { */ public void edit(InputEdit edit) { ts_node_edit(self, edit.into(arena)); + wasEdited = true; children = null; } diff --git a/src/main/java/io/github/treesitter/jtreesitter/Parser.java b/src/main/java/io/github/treesitter/jtreesitter/Parser.java index bbdaa53..a9a467f 100644 --- a/src/main/java/io/github/treesitter/jtreesitter/Parser.java +++ b/src/main/java/io/github/treesitter/jtreesitter/Parser.java @@ -244,7 +244,7 @@ public Optional parse(String source, InputEncoding encoding, @Nullable Tre var old = oldTree == null ? MemorySegment.NULL : oldTree.segment(); var tree = ts_parser_parse_string_encoding(self, old, string, bytes.length, encoding.ordinal()); if (tree.equals(MemorySegment.NULL)) return Optional.empty(); - return Optional.of(new Tree(tree, language, source)); + return Optional.of(new Tree(tree, language, source, encoding.charset())); } } @@ -299,7 +299,7 @@ public Optional parse(ParseCallback callback, InputEncoding encoding, @Nul var old = oldTree == null ? MemorySegment.NULL : oldTree.segment(); var tree = ts_parser_parse(self, old, input); if (tree.equals(MemorySegment.NULL)) return Optional.empty(); - return Optional.of(new Tree(tree, language, null)); + return Optional.of(new Tree(tree, language, null, null)); } /** diff --git a/src/main/java/io/github/treesitter/jtreesitter/Tree.java b/src/main/java/io/github/treesitter/jtreesitter/Tree.java index 0e18c6d..7040726 100644 --- a/src/main/java/io/github/treesitter/jtreesitter/Tree.java +++ b/src/main/java/io/github/treesitter/jtreesitter/Tree.java @@ -5,6 +5,7 @@ import io.github.treesitter.jtreesitter.internal.TSRange; import io.github.treesitter.jtreesitter.internal.TreeSitter; import java.lang.foreign.*; +import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -15,16 +16,18 @@ @NullMarked public final class Tree implements AutoCloseable, Cloneable { private final MemorySegment self; - private @Nullable String source; + private byte[] source; + private @Nullable Charset charset; private final Arena arena; private final Language language; private @Nullable List includedRanges; - Tree(MemorySegment self, Language language, @Nullable String source) { + Tree(MemorySegment self, Language language, @Nullable String source, @Nullable Charset charset) { arena = Arena.ofShared(); this.self = self.reinterpret(arena, TreeSitter::ts_tree_delete); this.language = language; - this.source = source; + this.source = source != null && charset != null ? source.getBytes(charset) : new byte[0]; + this.charset = charset; } private Tree(Tree tree) { @@ -33,6 +36,7 @@ private Tree(Tree tree) { self = copy.reinterpret(arena, TreeSitter::ts_tree_delete); language = tree.language; source = tree.source; + charset = tree.charset; includedRanges = tree.includedRanges; } @@ -40,6 +44,12 @@ MemorySegment segment() { return self; } + @Nullable + String getRegion(@Unsigned int start, @Unsigned int end) { + var length = Math.min(end, source.length) - start; + return charset != null ? new String(source, start, length, charset) : null; + } + /** Get the language that was used to parse the syntax tree. */ public Language getLanguage() { return language; @@ -47,7 +57,7 @@ public Language getLanguage() { /** Get the source code of the syntax tree, if available. */ public @Nullable String getText() { - return source; + return charset != null ? new String(source, charset) : null; } /** Get the root node of the syntax tree. */ @@ -122,7 +132,8 @@ public void edit(InputEdit edit) { try (var alloc = Arena.ofConfined()) { ts_tree_edit(self, edit.into(alloc)); } finally { - source = null; + source = new byte[0]; + charset = null; } } diff --git a/src/test/java/io/github/treesitter/jtreesitter/NodeTest.java b/src/test/java/io/github/treesitter/jtreesitter/NodeTest.java index b1a2ffc..5d8b1e4 100644 --- a/src/test/java/io/github/treesitter/jtreesitter/NodeTest.java +++ b/src/test/java/io/github/treesitter/jtreesitter/NodeTest.java @@ -13,7 +13,7 @@ class NodeTest { static void beforeAll() { var language = new Language(TreeSitterJava.language()); try (var parser = new Parser(language)) { - tree = parser.parse("class Foo {}").orElseThrow(); + tree = parser.parse("class Foo {} // uni©ode").orElseThrow(); node = tree.getRootNode(); } } @@ -100,13 +100,13 @@ void getStartByte() { @Test void getEndByte() { - assertEquals(12, node.getEndByte()); + assertEquals(24, node.getEndByte()); } @Test void getRange() { - Point startPoint = new Point(0, 0), endPoint = new Point(0, 12); - assertEquals(new Range(startPoint, endPoint, 0, 12), node.getRange()); + Point startPoint = new Point(0, 0), endPoint = new Point(0, 24); + assertEquals(new Range(startPoint, endPoint, 0, 24), node.getRange()); } @Test @@ -116,22 +116,22 @@ void getStartPoint() { @Test void getEndPoint() { - assertEquals(new Point(0, 12), node.getEndPoint()); + assertEquals(new Point(0, 24), node.getEndPoint()); } @Test void getChildCount() { - assertEquals(1, node.getChildCount()); + assertEquals(2, node.getChildCount()); } @Test void getNamedChildCount() { - assertEquals(1, node.getNamedChildCount()); + assertEquals(2, node.getNamedChildCount()); } @Test void getDescendantCount() { - assertEquals(7, node.getDescendantCount()); + assertEquals(8, node.getDescendantCount()); } @Test @@ -273,8 +273,8 @@ void getChildWithDescendant() { @Test void getText() { - var child = node.getDescendant(6, 9).orElseThrow(); - assertEquals("Foo", child.getText()); + var child = node.getChild(1).orElseThrow(); + assertEquals("// uni©ode", child.getText()); } @Test @@ -298,7 +298,7 @@ void walk() { @Test void toSexp() { - var sexp = "(program (class_declaration name: (identifier) body: (class_body)))"; + var sexp = "(program (class_declaration name: (identifier) body: (class_body)) (line_comment))"; assertEquals(sexp, node.toSexp()); }