Skip to content

Commit 43abbb4

Browse files
authored
Change concrete syntax grammar to allow escaping both @ and :[ (#777)
With our current version of concrete syntax we can't match @ as a literal, nor :[ I changed the grammar to allow us to escape such characters. I've also added tests to illustrate it.
1 parent 8129e68 commit 43abbb4

File tree

8 files changed

+80
-25
lines changed

8 files changed

+80
-25
lines changed

.github/workflows/build.yml

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,9 @@ jobs:
1515
uses: actions/checkout@v4
1616

1717
- name: Install stable toolchain
18-
uses: actions-rs/toolchain@v1
18+
uses: dtolnay/rust-toolchain@stable
1919
with:
20-
profile: minimal
21-
toolchain: stable
22-
override: true
23-
components: rustfmt
20+
components: rustfmt, clippy
2421

2522
- name: Set up Python 3.9
2623
uses: actions/setup-python@v5

crates/concrete-syntax/src/concrete_syntax.pest

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,9 @@ capture = { (":[" ~ identifier ~ capture_mode? ~ "]") | "@"~identifier } // FIXM
1010
capture_mode = { "+" | "*" | "?"}
1111
identifier = { (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")* }
1212

13-
// Literal text - single word/token without whitespace
14-
literal_text = { (!( ":[" | "|>" | "@" ) ~ ANY)+ }
13+
// Literal text - single word/token without whitespace, with escape support
14+
literal_text = { (escaped_char | (!( ":[" | "|>" | "@" ) ~ ANY))+ }
15+
escaped_char = { "\\" ~ ("@" | ":" | "\\") }
1516
WHITESPACE = _{ (" " | "\t" | "\r" | "\n")+ }
1617

1718
// Where constraints (extensible for future constraint types)

crates/concrete-syntax/src/models/concrete_syntax/interpreter.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -353,8 +353,8 @@ fn match_at_all_tree_levels(
353353
///
354354
/// 1) capture = text(node1...node2)
355355
/// 2) match_cs_pattern(
356-
/// remaining_elements, // CS elements still to match
357-
/// /* should_match */ true if node3 exists
356+
/// remaining_elements, // CS elements still to match
357+
/// /* should_match */ true if node3 exists
358358
/// ) starting at node3, node4, ...
359359
fn try_match_node_range(
360360
ctx: &mut MatchingContext<'_>, var_name: &str, constraints: &[CsConstraint],

crates/concrete-syntax/src/models/concrete_syntax/parser.rs

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ pub enum CaptureMode {
7070
Optional, // :[var?]
7171
}
7272

73-
/// Decode \" \\ \n \t \/ … inside a string literal.
73+
/// Decode \" \\ \n \t \/ \@ \: … inside a string literal.
7474
fn unescape(src: &str) -> String {
7575
let mut out = String::with_capacity(src.len());
7676
let mut chars = src.chars();
@@ -82,6 +82,8 @@ fn unescape(src: &str) -> String {
8282
Some('n') => out.push('\n'),
8383
Some('t') => out.push('\t'),
8484
Some('/') => out.push('/'),
85+
Some('@') => out.push('@'),
86+
Some(':') => out.push(':'),
8587
Some(other) => {
8688
out.push('\\');
8789
out.push(other);
@@ -383,7 +385,8 @@ impl ConcreteSyntax {
383385
literal_text => {
384386
// Split the literal text on whitespace, similar to Python's .split()
385387
let text = pair.as_str();
386-
Ok(Self::parse_literal_tokens(text))
388+
let unescaped_text = unescape(text);
389+
Ok(Self::parse_literal_tokens(&unescaped_text))
387390
}
388391
delimited_literal => {
389392
// Same as literal_text but with escape handling for \/
@@ -416,7 +419,3 @@ impl ConcreteSyntax {
416419
})
417420
}
418421
}
419-
420-
#[cfg(test)]
421-
#[path = "unit_tests/parser_test.rs"]
422-
mod parser_test;

crates/concrete-syntax/src/models/concrete_syntax/resolver.rs

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,3 @@ impl ConcreteSyntax {
182182
})
183183
}
184184
}
185-
186-
#[cfg(test)]
187-
#[path = "unit_tests/resolver_test.rs"]
188-
mod resolver_test;

crates/concrete-syntax/src/models/concrete_syntax/tree_sitter_adapter.rs

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,11 @@ pub type Node<'a> = NativeNode<'a>;
134134
#[cfg(feature = "native")]
135135
pub type TreeCursor<'a> = NativeCursor<'a>;
136136

137+
#[cfg(not(feature = "native"))]
138+
pub type Node<'a> = WasmNodeWrapper;
139+
#[cfg(not(feature = "native"))]
140+
pub type TreeCursor<'a> = WasmCursorWrapper;
141+
137142
// Re-export raw tree-sitter types with different names to avoid confusion
138143
#[cfg(feature = "native")]
139144
pub use tree_sitter::{Node as RawNode, TreeCursor as RawTreeCursor};
@@ -287,12 +292,6 @@ impl SyntaxCursor for WasmCursorWrapper {
287292
}
288293
}
289294

290-
// WASM type aliases
291-
#[cfg(feature = "wasm")]
292-
pub type Node<'a> = WasmNodeWrapper;
293-
#[cfg(feature = "wasm")]
294-
pub type TreeCursor<'a> = WasmCursorWrapper;
295-
296295
/// Adapter functions for working with tree-sitter types directly
297296
/// These provide a bridge between the trait-based and direct tree-sitter APIs
298297
#[cfg(feature = "native")]

crates/concrete-syntax/src/models/concrete_syntax/unit_tests/interpreter_test.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -542,3 +542,25 @@ fn test_contains_constraint_bug_minimal() {
542542
GO,
543543
);
544544
}
545+
546+
#[test]
547+
fn test_escaped_at_symbol() {
548+
run_test(
549+
"var x = @something;",
550+
"var :[name] = \\@something;",
551+
1,
552+
vec![vec![("name", "x")]],
553+
GO,
554+
);
555+
}
556+
557+
#[test]
558+
fn test_escaped_colon_symbol() {
559+
run_test(
560+
"var x = :something;",
561+
"var :[name] = \\:something;",
562+
1,
563+
vec![vec![("name", "x")]],
564+
GO,
565+
);
566+
}

crates/concrete-syntax/src/models/concrete_syntax/unit_tests/parser_test.rs

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -501,4 +501,45 @@ mod tests {
501501
.unwrap_err()
502502
.contains("'root contains' is not supported"));
503503
}
504+
505+
#[test]
506+
fn test_parse_escaped_at_symbol() {
507+
let input = "var :[name] = \\@something;";
508+
let result = ConcreteSyntax::parse(input).unwrap();
509+
let elements = result.pattern.sequence;
510+
511+
// Should have: "var", capture "name", "=", "@something", ";"
512+
assert!(elements.len() >= 4);
513+
514+
// Find the literal with @something
515+
let at_literal = elements
516+
.iter()
517+
.find(|e| matches!(e, CsElement::Literal(text) if text.contains("@")));
518+
519+
match at_literal {
520+
Some(CsElement::Literal(text)) => {
521+
assert_eq!(text, "@something;");
522+
}
523+
_ => panic!("Expected literal with @something, got: {elements:#?}"),
524+
}
525+
}
526+
527+
#[test]
528+
fn test_parse_escaped_colon_symbol() {
529+
let input = "var :[name] = \\:[something]";
530+
let result = ConcreteSyntax::parse(input).unwrap();
531+
let elements = result.pattern.sequence;
532+
533+
// Find the literal with :something
534+
let colon_literal = elements
535+
.iter()
536+
.find(|e| matches!(e, CsElement::Literal(text) if text.contains(":")));
537+
538+
match colon_literal {
539+
Some(CsElement::Literal(text)) => {
540+
assert_eq!(text, ":[something]");
541+
}
542+
_ => panic!("Expected literal with :something, got: {elements:#?}"),
543+
}
544+
}
504545
}

0 commit comments

Comments
 (0)