Skip to content

Commit b8d1c2a

Browse files
authored
Merge pull request #26 from ubolonton/type-revamp
Type Revamp
2 parents a558c3e + 236e38e commit b8d1c2a

File tree

13 files changed

+222
-163
lines changed

13 files changed

+222
-163
lines changed

CHANGELOG.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Changelog
2+
All notable changes to this project will be documented in this file.
3+
4+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
5+
6+
## [Unreleased]
7+
- Changed all APIs to use Emacs's 1-based byte positions instead of 0-based byte offsets.
8+
- Changed all APIs to use Emacs's 1-based line numbers instead of 0-based row numbering.
9+
- Changed representation of tree-sitter point from 2-element vector to cons cell.
10+
- Changed representation of query match/capture from 2-element vector to cons cell.
11+
12+
## [0.2.0] - 2020-02-02
13+
- Upgraded `tree-sitter` to 0.6.0.
14+
- Added `tree-sitter-cli`.
15+
- Added `tree-sitter-langs` (utilities to download pre-compiled modules and grammars).
16+
17+
## [0.1.0] - 2020-01-27
18+
Initial release
19+
20+
[Unreleased]: https://github.com/ubolonton/emacs-tree-sitte/compare/0.2.0...HEAD
21+
[0.2.0]: https://github.com/ubolonton/emacs-tree-sitte/compare/0.1.0...0.2.0

Cargo.lock

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ name = "tree_sitter_dyn"
1111
crate-type = ["cdylib"]
1212

1313
[dependencies]
14-
emacs = "0.12.1"
14+
emacs = "0.12.3"
1515
libloading = "0.5.1"
1616
tree-sitter = "0.6.0"
1717

README.md

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,11 +74,17 @@ If you want to hack on `emacs-tree-sitter` itself, see the section [Setup for De
7474
### Types
7575
7676
- `language`, `parser`, `tree`, `node`, `cursor`, `query`: corresponding tree-sitter types, embedded in `user-ptr` objects.
77-
- `point`: a vector in the form of `[row column]`, where `row` and `column` are zero-based. This is different from Emacs's concept of "point". Also note that `column` counts bytes, unlike the current built-in function `current-column`.
78-
- `range`: a vector in the form of `[start-point end-point]`.
77+
- `point`: a pair of `(LINE-NUMBER . BYTE-COLUMN)`.
78+
+ `LINE-NUMBER` is the absolute line number returned by `line-number-at-pos`, counting from 1.
79+
+ `BYTE-COLUMN` counts from 0, like `current-column`. However, unlike that function, it counts bytes, instead of displayed glyphs.
80+
- `range`: a vector in the form of `[START-BYTEPOS END-BYTEPOS START-POINT END-POINT]`.
7981
8082
These types are understood only by this package. They are not recognized by `type-of`, but have corresponding type-checking predicates, which are useful for debugging: `ts-language-p`, `ts-tree-p`, `ts-node-p`...
8183
84+
For consistency with Emacs's conventions, this binding has some differences compared to the tree-sitter's C/Rust APIs:
85+
- It uses 1-based byte position, not 0-based byte offset.
86+
- It uses 1-based line number, not 0-based row.
87+
8288
### Functions
8389
8490
- Language:

src/cursor.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,12 +53,12 @@ fn current_field_name(cursor: Value) -> Result<Value> {
5353
}
5454

5555
macro_rules! defun_cursor_walks {
56-
($($(#[$meta:meta])* $($lisp_name:literal)? fn $name:ident $( ( $( $param:ident: $itype:ty ),* ) )? -> $type:ty)*) => {
56+
($($(#[$meta:meta])* $($lisp_name:literal)? fn $name:ident $( ( $( $param:ident $($into:ident)? : $itype:ty ),* ) )? -> $type:ty)*) => {
5757
$(
5858
$(#[$meta])*
5959
#[defun$((name = $lisp_name))?]
6060
fn $name(cursor: &mut RCursor, $( $( $param: $itype ),* )? ) -> Result<$type> {
61-
Ok(cursor.borrow_mut().$name( $( $( $param ),* )? ))
61+
Ok(cursor.borrow_mut().$name( $( $( $param $(.$into())? ),* )? ))
6262
}
6363
)*
6464
};
@@ -77,9 +77,9 @@ defun_cursor_walks! {
7777
/// Return t if CURSOR successfully moved, nil if there was no next sibling node.
7878
fn goto_next_sibling -> bool
7979

80-
/// Move CURSOR to the first child that extends beyond the given byte offset.
80+
/// Move CURSOR to the first child that extends beyond the given BYTEPOS.
8181
/// Return the index of the child node if one was found, nil otherwise.
82-
"goto-first-child-for-byte" fn goto_first_child_for_byte(index: usize) -> Option<usize>
82+
"goto-first-child-for-byte" fn goto_first_child_for_byte(bytepos into: BytePos) -> Option<usize>
8383
}
8484

8585
/// Re-initialize CURSOR to start at a different NODE.

src/node.rs

Lines changed: 32 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -69,19 +69,19 @@ defun_node_props! {
6969

7070
// Position ------------------------------------------------------------------------------------
7171

72-
/// Return NODE's start byte.
73-
"node-start-byte" fn start_byte -> usize
72+
/// Return NODE's start byte position.
73+
"node-start-byte" fn start_byte -> BytePos; into
7474

75-
/// Return NODE's start point, as a `[ROW COLUMN]' vector.
75+
/// Return NODE's start point, in the form of (LINE-NUMBER . BYTE-COLUMN).
7676
"node-start-point" fn start_position -> Point; into
7777

78-
/// Return NODE's end byte.
79-
"node-end-byte" fn end_byte -> usize
78+
/// Return NODE's end byte position.
79+
"node-end-byte" fn end_byte -> BytePos; into
8080

81-
/// Return NODE's end point, as a `[ROW COLUMN]' vector.
81+
/// Return NODE's end point, in the form of (LINE-NUMBER . BYTE-COLUMN).
8282
"node-end-point" fn end_position -> Point; into
8383

84-
/// Return NODE's `[START-BYTE END-BYTE START-POINT END-POINT]'.
84+
/// Return a vector of NODE's [START-BYTEPOS END-BYTEPOS START-POINT END-POINT].
8585
"node-range" fn range -> Range; into
8686

8787
// Counting child nodes ------------------------------------------------------------------------
@@ -116,10 +116,10 @@ defun_node_navs! {
116116

117117
// Child ---------------------------------------------------------------------------------------
118118

119-
/// Return NODE's child at the given zero-based index.
119+
/// Return NODE's child at the given 0-based index.
120120
"get-nth-child" fn child(i: usize)
121121

122-
/// Return NODE's named child at the given zero-based index.
122+
/// Return NODE's named child at the given 0-based index.
123123
"get-nth-named-child" fn named_child(i: usize)
124124

125125
/// Return NODE's child with the given FIELD-NAME.
@@ -144,16 +144,18 @@ defun_node_navs! {
144144

145145
// Descendant ----------------------------------------------------------------------------------
146146

147-
/// Return the smallest node within NODE that spans the given range of bytes.
148-
"get-descendant-for-byte-range" fn descendant_for_byte_range(start: usize, end: usize)
147+
/// Return the smallest node within NODE that spans the given range of byte
148+
/// positions.
149+
"get-descendant-for-byte-range" fn descendant_for_byte_range(start into: BytePos, end into: BytePos)
149150

150-
/// Return the smallest node within NODE that spans the given range of points.
151+
/// Return the smallest node within NODE that spans the given point range.
151152
"get-descendant-for-point-range" fn descendant_for_point_range(start into: Point, end into: Point)
152153

153-
/// Return the smallest named node within NODE that spans the given range of bytes.
154-
"get-named-descendant-for-byte-range" fn named_descendant_for_byte_range(start: usize, end: usize)
154+
/// Return the smallest named node within NODE that spans the given range of byte
155+
/// positions.
156+
"get-named-descendant-for-byte-range" fn named_descendant_for_byte_range(start into: BytePos, end into: BytePos)
155157

156-
/// Return the smallest named node within NODE that spans the given range of points.
158+
/// Return the smallest named node within NODE that spans the given point range.
157159
"get-named-descendant-for-point-range" fn named_descendant_for_point_range(start into: Point, end into: Point)
158160
}
159161

@@ -164,26 +166,33 @@ defun_node_props! {
164166

165167
/// Edit NODE to keep it in sync with source code that has been edited.
166168
///
169+
/// You must describe the edit both in terms of byte positions and in terms of
170+
/// (LINE-NUMBER . BYTE-COLUMN) coordinates.
171+
///
172+
/// LINE-NUMBER should be the number returned by `line-number-at-pos', which counts
173+
/// from 1.
174+
///
175+
/// BYTE-COLUMN should count from 0, like Emacs's `current-column'. However, unlike
176+
/// that function, it should count bytes, instead of displayed glyphs.
177+
///
167178
/// This function is only rarely needed. When you edit a syntax tree, all of the
168179
/// nodes that you retrieve from the tree afterward will already reflect the edit.
169180
/// You only need to use this function when you have a node that you want to keep
170181
/// and continue to use after an edit.
171-
///
172-
/// Note that indexing must be zero-based.
173182
#[defun]
174183
fn edit_node(
175184
node: &mut RNode,
176-
start_byte: usize,
177-
old_end_byte: usize,
178-
new_end_byte: usize,
185+
start_bytepos: BytePos,
186+
old_end_bytepos: BytePos,
187+
new_end_bytepos: BytePos,
179188
start_point: Point,
180189
old_end_point: Point,
181190
new_end_point: Point,
182191
) -> Result<()> {
183192
let edit = InputEdit {
184-
start_byte,
185-
old_end_byte,
186-
new_end_byte,
193+
start_byte: start_bytepos.into(),
194+
old_end_byte: old_end_bytepos.into(),
195+
new_end_byte: new_end_bytepos.into(),
187196
start_position: start_point.into(),
188197
old_end_position: old_end_point.into(),
189198
new_end_position: new_end_point.into(),

src/parser.rs

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use emacs::{defun, Result, Value, Vector};
22
use emacs::failure;
33

4-
use tree_sitter::{Parser, Point, Tree};
4+
use tree_sitter::{Parser, Tree};
55

66
use crate::types::*;
77

@@ -34,18 +34,22 @@ fn language(parser: &Parser) -> Result<Option<Language>> {
3434

3535
/// Parse source code generated by INPUT-FUNCTION with PARSER, returning a tree.
3636
///
37-
/// INPUT-FUNCTION should take 3 parameters: (BYTE-OFFSET ROW COLUMN), and return a
38-
/// fragment of the source code, starting from the position identified by either
39-
/// BYTE-OFFSET or [ROW COLUMN].
37+
/// INPUT-FUNCTION should take 3 parameters: (BYTEPOS LINE-NUMBER BYTE-COLUMN), and
38+
/// return a fragment of the source code, starting from the position identified by
39+
/// either BYTEPOS or (LINE-NUMBER . BYTE-COLUMN).
40+
///
41+
/// BYTEPOS is Emacs's 1-based byte position.
42+
///
43+
/// LINE-NUMBER is the number returned by `line-number-at-pos', which counts from 1.
44+
///
45+
/// BYTE-COLUMN counts from 0, likes Emacs's `current-column'. However, unlike that
46+
/// function, it counts bytes, instead of displayed glyphs.
4047
///
4148
/// If you have already parsed an earlier version of this document, and it has since
4249
/// been edited, pass the previously parsed OLD-TREE so that its unchanged parts can
4350
/// be reused. This will save time and memory. For this to work correctly, you must
4451
/// have already edited it using `ts-edit-tree' function in a way that exactly
4552
/// matches the source code changes.
46-
///
47-
/// Note that indexing is assumed to be zero-based, while Emacs normally uses
48-
/// one-based indexing for accessing buffer content.
4953
#[defun]
5054
fn parse(parser: &mut Parser, input_function: Value, old_tree: Option<&Shared<Tree>>) -> Result<Shared<Tree>> {
5155
let old_tree = match old_tree {
@@ -60,8 +64,10 @@ fn parse(parser: &mut Parser, input_function: Value, old_tree: Option<&Shared<Tr
6064
// unwinding across FFI boundary during a panic is UB (future Rust versions will abort).
6165
// See https://github.com/rust-lang/rust/issues/52652.
6266
let mut input_error = None;
63-
let input = &mut |byte: usize, position: Point| -> String {
64-
input_function.call((byte, position.row, position.column))
67+
let input = &mut |byte: usize, point: tree_sitter::Point| -> String {
68+
let bytepos: BytePos = byte.into();
69+
let point: Point = point.into();
70+
input_function.call((bytepos, point.line_number(), point.byte_column()))
6571
.and_then(|v| v.into_rust())
6672
.unwrap_or_else(|e| {
6773
input_error = Some(e);

src/query.rs

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -28,20 +28,20 @@ fn _make_query(language: Language, source: String) -> Result<Query> {
2828
}
2929

3030
macro_rules! defun_query_methods {
31-
($($(#[$meta:meta])* $($lisp_name:literal)? fn $name:ident $( ( $( $param:ident : $type:ty ),* ) )? -> $rtype:ty )*) => {
31+
($($(#[$meta:meta])* $($lisp_name:literal)? fn $name:ident $( ( $( $param:ident : $type:ty ),* ) )? -> $rtype:ty $(; $into:ident)? )*) => {
3232
$(
3333
#[defun$((name = $lisp_name))?]
3434
$(#[$meta])*
3535
fn $name(query: &Query, $( $( $param : $type ),* )? ) -> Result<$rtype> {
36-
Ok(query.$name( $( $( $param ),* )? ))
36+
Ok(query.$name( $( $( $param ),* )? )$(.$into())?)
3737
}
3838
)*
3939
};
4040
}
4141

4242
defun_query_methods! {
43-
/// Return the byte offset where the NTH pattern starts in QUERY's source.
44-
"query-start-byte-for-pattern" fn start_byte_for_pattern(nth: usize) -> usize
43+
/// Return the byte position where the NTH pattern starts in QUERY's source.
44+
"query-start-byte-for-pattern" fn start_byte_for_pattern(nth: usize) -> BytePos; into
4545

4646
/// Return the number of patterns in QUERY.
4747
"query-count-patterns" fn pattern_count -> usize
@@ -120,13 +120,13 @@ fn _query_cursor_matches<'e>(
120120
for (ci, c) in m.captures.iter().enumerate() {
121121
let captured_node = node.map(|_| c.node);
122122
let capture = if index_only.is_some() {
123-
env.vector((c.index, captured_node))?
123+
env.cons(c.index, captured_node)?
124124
} else {
125-
env.vector((&capture_names[c.index as usize], captured_node))?
125+
env.cons(&capture_names[c.index as usize], captured_node)?
126126
};
127127
captures.set(ci, capture)?;
128128
}
129-
let _match = env.vector((m.pattern_index, captures))?;
129+
let _match = env.cons(m.pattern_index, captures)?;
130130
vec.push(_match);
131131
}
132132
vec_to_vector(env, vec)
@@ -156,23 +156,26 @@ fn _query_cursor_captures<'e>(
156156
let c = m.captures[capture_index];
157157
let captured_node = node.map(|_| c.node);
158158
let capture = if index_only.is_some() {
159-
env.vector((c.index, captured_node))?
159+
env.cons(c.index, captured_node)?
160160
} else {
161-
env.vector((&capture_names[c.index as usize], captured_node))?
161+
env.cons(&capture_names[c.index as usize], captured_node)?
162162
};
163163
vec.push(capture);
164164
}
165165
vec_to_vector(env, vec)
166166
}
167167

168-
/// Limit CURSOR's query executions to the byte range [BEG END].
168+
/// Limit CURSOR's query executions to the range of byte positions, from BEG to END.
169169
#[defun]
170-
fn set_byte_range(cursor: &mut QueryCursor, beg: usize, end: usize) -> Result<()> {
171-
cursor.set_byte_range(beg, end);
170+
fn set_byte_range(cursor: &mut QueryCursor, beg: BytePos, end: BytePos) -> Result<()> {
171+
cursor.set_byte_range(beg.into(), end.into());
172172
Ok(())
173173
}
174174

175-
/// Limit CURSOR's query executions to the point range [BEG END].
175+
/// Limit CURSOR's query executions to the point range, from BEG to END.
176+
///
177+
/// A "point" in this context is a (LINE-NUMBER . BYTE-COLUMN) pair. See `ts-parse'
178+
/// for a more detailed explanation.
176179
#[defun]
177180
fn set_point_range(cursor: &mut QueryCursor, beg: Point, end: Point) -> Result<()> {
178181
cursor.set_point_range(beg.into(), end.into());

src/tree.rs

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,22 +24,28 @@ fn root_node(tree: Borrowed<Tree>) -> Result<RNode> {
2424

2525
/// Edit the syntax TREE to keep it in sync with source code that has been edited.
2626
///
27-
/// You must describe the edit both in terms of byte offsets and in terms of
28-
/// `[ROW COLUMN]' coordinates, using zero-based indexing.
27+
/// You must describe the edit both in terms of byte positions and in terms of
28+
/// (LINE-NUMBER . BYTE-COLUMN) coordinates.
29+
///
30+
/// LINE-NUMBER should be the number returned by `line-number-at-pos', which counts
31+
/// from 1.
32+
///
33+
/// BYTE-COLUMN should count from 0, like Emacs's `current-column'. However, unlike
34+
/// that function, it should count bytes, instead of displayed glyphs.
2935
#[defun]
3036
fn edit_tree(
3137
tree: Borrowed<Tree>,
32-
start_byte: usize,
33-
old_end_byte: usize,
34-
new_end_byte: usize,
38+
start_bytepos: BytePos,
39+
old_end_bytepos: BytePos,
40+
new_end_bytepos: BytePos,
3541
start_point: Point,
3642
old_end_point: Point,
3743
new_end_point: Point,
3844
) -> Result<()> {
3945
let edit = InputEdit {
40-
start_byte,
41-
old_end_byte,
42-
new_end_byte,
46+
start_byte: start_bytepos.into(),
47+
old_end_byte: old_end_bytepos.into(),
48+
new_end_byte: new_end_bytepos.into(),
4349
start_position: start_point.into(),
4450
old_end_position: old_end_point.into(),
4551
new_end_position: new_end_point.into(),
@@ -48,8 +54,6 @@ fn edit_tree(
4854
Ok(())
4955
}
5056

51-
// TODO: walk_with_properties
52-
5357
/// Compare an edited OLD-TREE to NEW-TREE, both representing the same document.
5458
///
5559
/// This function returns a vector of ranges whose syntactic structure has changed.

0 commit comments

Comments
 (0)