Skip to content

Commit 1149375

Browse files
authored
Add asterisk (*) support for zero-or-more matching in concrete syntax (#735)
## Summary Added support for `:[var*]` pattern matching in concrete syntax templates, allowing zero-or-more node matching similar to regex quantifiers. ## Changes - Added `MatchMode` enum to replace boolean parameters for cleaner API - Implemented `ZeroPlus` mode in `handle_template_variable_matching` - Added unit tests for zero-or-more matching behavior ## Implementation Details The asterisk implementation first attempts to match zero nodes, then falls back to one-or-more matching if zero match fails. This ensures `:[var*]` behaves as expected: - `class Example { }` matches `class :[name] { :[body*] }` with empty body - `class Example { int x = 1; }` matches same pattern with body containing the field Like before, concrete syntax continues to be lazy during matching; (otherwise there is a state space explosion)
1 parent 8adc5db commit 1149375

File tree

2 files changed

+102
-11
lines changed

2 files changed

+102
-11
lines changed

src/models/concrete_syntax.rs

Lines changed: 78 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
limitations under the License.
1212
*/
1313

14+
use crate::models::matches::Point;
1415
use crate::models::matches::Range;
1516

1617
use regex::Regex;
@@ -22,10 +23,22 @@ use tree_sitter_traversal::Cursor;
2223
use crate::models::capture_group_patterns::ConcreteSyntax;
2324
use crate::models::matches::Match;
2425

26+
/// Represents the different matching modes for template variables
27+
#[derive(Clone, Copy, PartialEq, Eq)]
28+
enum MatchMode {
29+
/// Match exactly one node :[var]
30+
Single,
31+
/// Match one or more nodes :[var+]
32+
OnePlus,
33+
/// Match zero or more nodes :[var*]
34+
ZeroPlus,
35+
}
36+
2537
// Precompile the regex outside the function
2638
lazy_static! {
2739
static ref RE_VAR: Regex = Regex::new(r"^:\[(?P<var_name>\w+)\]").unwrap();
2840
static ref RE_VAR_PLUS: Regex = Regex::new(r"^:\[(?P<var_name>\w+)\+\]").unwrap();
41+
static ref RE_VAR_ASTERISK: Regex = Regex::new(r"^:\[(?P<var_name>\w+)\*\]").unwrap();
2942
}
3043

3144
// Struct to avoid dealing with lifetimes
@@ -200,10 +213,34 @@ pub(crate) fn get_matches_for_subsequence_of_nodes(
200213

201214
if let Some(caps) = RE_VAR_PLUS.captures(match_template) {
202215
// If template starts with a template variable
203-
handle_template_variable_matching(cursor, source_code, top_node, caps, match_template, true)
216+
handle_template_variable_matching(
217+
cursor,
218+
source_code,
219+
top_node,
220+
caps,
221+
match_template,
222+
MatchMode::OnePlus,
223+
)
204224
} else if let Some(caps) = RE_VAR.captures(match_template) {
205225
// If template starts with a template variable
206-
handle_template_variable_matching(cursor, source_code, top_node, caps, match_template, false)
226+
handle_template_variable_matching(
227+
cursor,
228+
source_code,
229+
top_node,
230+
caps,
231+
match_template,
232+
MatchMode::Single,
233+
)
234+
} else if let Some(caps) = RE_VAR_ASTERISK.captures(match_template) {
235+
// If template starts with a template variable with asterisk (zero or more)
236+
handle_template_variable_matching(
237+
cursor,
238+
source_code,
239+
top_node,
240+
caps,
241+
match_template,
242+
MatchMode::ZeroPlus,
243+
)
207244
} else if node.child_count() == 0 {
208245
// If the current node if a leaf
209246
return handle_leaf_node(cursor, source_code, match_template, top_node);
@@ -214,17 +251,18 @@ pub(crate) fn get_matches_for_subsequence_of_nodes(
214251
}
215252
}
216253

217-
/// This function does the template variable matching against entire tree nodes.function
218-
/// Keep in my mind that it will only attempt to match the template variables against nodes
219-
/// at either the current level of the traversal, or it's children. It can also operate on
220-
/// single node templates [args], and multiple nodes templates :[args+].
221-
222-
/// For successful matches, it returns the assignment of each template varaible against a
223-
/// particular range. The Option<usize> indicates whether a match was succesfull, and keeps
254+
/// This function does the template variable matching against entire tree nodes.
255+
/// It handles different matching modes:
256+
/// - Single: Match exactly one node :[var]
257+
/// - OnePlus: Match one or more nodes :[var+]
258+
/// - ZeroPlus: Match zero or more nodes :[var*]
259+
///
260+
/// For successful matches, it returns the assignment of each template variable against a
261+
/// particular range. The Option<usize> indicates whether a match was successful, and keeps
224262
/// track of the last sibling node that was matched (wrt to the match_sequential_siblings function)
225263
fn handle_template_variable_matching(
226264
cursor: &mut TreeCursor, source_code: &[u8], top_node: &Node, caps: regex::Captures,
227-
match_template: &str, one_plus: bool,
265+
match_template: &str, mode: MatchMode,
228266
) -> (HashMap<String, CapturedNode>, Option<usize>) {
229267
let var_name = &caps["var_name"];
230268
let cs_adv_len = caps[0].len();
@@ -235,6 +273,35 @@ fn handle_template_variable_matching(
235273
.to_string(),
236274
);
237275

276+
// For zero_plus patterns, first try to match with zero nodes
277+
if mode == MatchMode::ZeroPlus {
278+
let mut tmp_cursor = cursor.clone();
279+
if let (mut recursive_matches, Some(last_matched_node_idx)) =
280+
get_matches_for_subsequence_of_nodes(
281+
&mut tmp_cursor,
282+
source_code,
283+
&cs_advanced,
284+
true, // nodes_left_to_match
285+
top_node,
286+
)
287+
{
288+
// Successfully matched with zero nodes
289+
recursive_matches.insert(
290+
var_name.to_string(),
291+
CapturedNode {
292+
range: Range {
293+
start_byte: 0,
294+
end_byte: 0,
295+
start_point: Point { row: 0, column: 0 },
296+
end_point: Point { row: 0, column: 0 },
297+
},
298+
text: String::new(),
299+
},
300+
);
301+
return (recursive_matches, Some(last_matched_node_idx));
302+
}
303+
}
304+
238305
// Matching :[var] against a sequence of nodes [first_node, ... last_node]
239306
loop {
240307
let first_node = cursor.node();
@@ -303,7 +370,7 @@ fn handle_template_variable_matching(
303370
should_match = find_next_sibling_or_ancestor_sibling(&mut next_node_cursor);
304371
}
305372

306-
if !one_plus {
373+
if mode == MatchMode::Single {
307374
break;
308375
}
309376
}

src/models/unit_tests/concrete_syntax_test.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,3 +180,27 @@ fn test_match_anything() {
180180
JAVA,
181181
);
182182
}
183+
184+
#[test]
185+
fn test_asterisk_zero_or_more() {
186+
// Test asterisk (zero or more) matching
187+
run_test(
188+
"class Example { }",
189+
"class :[name] { :[body*] }",
190+
2,
191+
vec![vec![("name", "Example"), ("body", "")]],
192+
JAVA,
193+
);
194+
}
195+
196+
#[test]
197+
fn test_asterisk_one_or_more() {
198+
// Test asterisk (zero or more) matching with actual content
199+
run_test(
200+
"import java.util.ArrayList; class Example { int x = 1; int y = 2; }",
201+
"class :[name] { :[body*] }",
202+
1,
203+
vec![vec![("name", "Example"), ("body", "int x = 1; int y = 2;")]],
204+
JAVA,
205+
);
206+
}

0 commit comments

Comments
 (0)