Skip to content

Commit 5dc1c4f

Browse files
authored
Merge pull request #832 from posit-dev/feature/sync-tree-sitter-r
Sync with tree-sitter-r
2 parents f4dd8ed + dbb0867 commit 5dc1c4f

16 files changed

+113
-153
lines changed

Cargo.lock

Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/ark/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ stdext = { path = "../stdext" }
4747
tokio = { version = "1.26.0", features = ["full"] }
4848
tower-lsp = "0.19.0"
4949
tree-sitter = "0.23.0"
50-
tree-sitter-r = { git = "https://github.com/r-lib/tree-sitter-r", rev = "c094bd57652f8a08edc31d79a31222268fe798ee" }
50+
tree-sitter-r = { git = "https://github.com/r-lib/tree-sitter-r", rev = "95aff097aa927a66bb357f715b58cde821be8867" }
5151
uuid = "1.3.0"
5252
url = "2.4.1"
5353
walkdir = "2"
@@ -69,7 +69,7 @@ stdext = { path = "../stdext", features = ["testing"] }
6969
tempfile = "3.13.0"
7070

7171
[build-dependencies]
72-
cc = "1.0"
72+
cc = "1.1.22"
7373
chrono = "0.4.23"
7474
embed-resource = "2.5.0"
7575

crates/ark/src/lsp/diagnostics.rs

Lines changed: 4 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -690,80 +690,11 @@ fn recurse_parenthesized_expression(
690690
context: &mut DiagnosticContext,
691691
diagnostics: &mut Vec<Diagnostic>,
692692
) -> Result<()> {
693-
let mut n = 0;
694-
let mut cursor = node.walk();
695-
696-
for child in node.children_by_field_name("body", &mut cursor) {
697-
recurse(child, context, diagnostics)?;
698-
n = n + 1;
699-
}
700-
701-
if n > 1 {
702-
// The tree-sitter grammar allows multiple `body` statements, but we warn
703-
// the user about this as it is not allowed by the R parser.
704-
let range = node.range();
705-
let range = convert_tree_sitter_range_to_lsp_range(context.contents, range);
706-
let message = format!("Expected at most 1 statement within parentheses, found {n}.");
707-
let diagnostic = Diagnostic::new_simple(range, message);
708-
diagnostics.push(diagnostic);
709-
}
710-
711-
().ok()
712-
}
713-
714-
// TODO: This should be a syntax check, as the grammar should not allow
715-
// two `Argument` nodes side by side
716-
fn check_call_like_next_sibling(
717-
child: Node,
718-
parent_type: &NodeType,
719-
context: &mut DiagnosticContext,
720-
diagnostics: &mut Vec<Diagnostic>,
721-
) -> Result<()> {
722-
let Some(next) = child.next_sibling() else {
723-
return ().ok();
724-
};
725-
726-
let close = match parent_type {
727-
NodeType::Call => ")",
728-
NodeType::Subset => "]",
729-
NodeType::Subset2 => "]]",
730-
_ => bail!("Parent must be a call, subset, or subset2 node."),
731-
};
732-
733-
let ok = match next.node_type() {
734-
NodeType::Comma => true,
735-
NodeType::Anonymous(kind) if kind.as_str() == close => true,
736-
NodeType::Comment => true,
737-
// Should be handled elsewhere
738-
NodeType::Error => true,
739-
_ => false,
740-
};
741-
742-
if ok {
743-
return ().ok();
744-
}
745-
746-
// Children can be arbitrarily large, so report the issue between the end of `child`
747-
// and the start of `next` (it's not really the child's fault anyways,
748-
// it's the fault of the thing right after it).
749-
let start_byte = child.end_byte();
750-
let start_point = child.end_position();
751-
let end_byte = next.start_byte();
752-
let end_point = next.start_position();
753-
754-
let range = Range {
755-
start_byte,
756-
start_point,
757-
end_byte,
758-
end_point,
693+
let Some(body) = node.child_by_field_name("body") else {
694+
// Would be unexpected, grammar requires exactly 1 `body`
695+
return Ok(());
759696
};
760-
761-
let range = convert_tree_sitter_range_to_lsp_range(context.contents, range);
762-
let message = "Expected ',' between expressions.";
763-
let diagnostic = Diagnostic::new_simple(range, message.into());
764-
diagnostics.push(diagnostic);
765-
766-
().ok()
697+
recurse(body, context, diagnostics)
767698
}
768699

769700
/// Default recursion for arguments of a call-like node
@@ -792,8 +723,6 @@ fn recurse_call_like_arguments_default(
792723
// every function behaves like `list()`, which is our default model of
793724
// strict evaluation.
794725
with_in_call_like_arguments(context, |context| {
795-
let call_type = node.node_type();
796-
797726
let Some(arguments) = node.child_by_field_name("arguments") else {
798727
return Ok(());
799728
};
@@ -803,9 +732,6 @@ fn recurse_call_like_arguments_default(
803732
let children = arguments.children_by_field_name("argument", &mut cursor);
804733

805734
for child in children {
806-
// Warn if the next sibling is neither a comma nor the correct closing delimiter
807-
check_call_like_next_sibling(child, &call_type, context, diagnostics)?;
808-
809735
// Recurse into `value`s
810736
if let Some(value) = child.child_by_field_name("value") {
811737
recurse(value, context, diagnostics)?;
@@ -1111,23 +1037,6 @@ foo
11111037
})
11121038
}
11131039

1114-
#[test]
1115-
fn test_expression_after_call_argument() {
1116-
r_task(|| {
1117-
let text = "match(1, 2 3)";
1118-
let document = Document::new(text, None);
1119-
1120-
let diagnostics = generate_diagnostics(document, DEFAULT_STATE.clone());
1121-
assert_eq!(diagnostics.len(), 1);
1122-
1123-
// Diagnostic highlights between the `2` and `3`
1124-
let diagnostic = diagnostics.get(0).unwrap();
1125-
insta::assert_snapshot!(diagnostic.message);
1126-
assert_eq!(diagnostic.range.start, Position::new(0, 10));
1127-
assert_eq!(diagnostic.range.end, Position::new(0, 11));
1128-
})
1129-
}
1130-
11311040
#[test]
11321041
fn test_no_diagnostic_for_dot_dot_i() {
11331042
r_task(|| {

crates/ark/src/lsp/diagnostics_syntax.rs

Lines changed: 80 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -73,39 +73,17 @@ fn recurse_children(
7373
}
7474

7575
fn syntax_diagnostic(node: Node, context: &DiagnosticContext) -> anyhow::Result<Diagnostic> {
76-
if let Some(diagnostic) = syntax_diagnostic_missing_open(node, context)? {
77-
return Ok(diagnostic);
78-
}
76+
// We used to try and analyze the `ERROR` structure of the tree to provide "precise"
77+
// error messages, but this is extremely prone to false positives due to the fact that
78+
// tree-sitter reports a "generic" error range, and it's up to us to try and infer the
79+
// problem based on the surrounding nodes, which tree-sitter often "recovers"
80+
// incorrectly. It's better to wait for us to have a custom R pratt parser, which
81+
// would be able to report precise error locations/messages, because it "knows" why a
82+
// parse error occurred.
7983

8084
Ok(syntax_diagnostic_default(node, context))
8185
}
8286

83-
// Use a heuristic that if we see a syntax error and it just contains a `)`, `}`, or `]`,
84-
// then it is probably a case of missing a matching open token.
85-
fn syntax_diagnostic_missing_open(
86-
node: Node,
87-
context: &DiagnosticContext,
88-
) -> anyhow::Result<Option<Diagnostic>> {
89-
let text = context.contents.node_slice(&node)?;
90-
91-
let open_token = if text == ")" {
92-
"("
93-
} else if text == "}" {
94-
"{"
95-
} else if text == "]" {
96-
"["
97-
} else {
98-
// Not an unmatched closing token
99-
return Ok(None);
100-
};
101-
102-
let range = node.range();
103-
104-
Ok(Some(new_missing_open_diagnostic(
105-
open_token, range, context,
106-
)))
107-
}
108-
10987
fn syntax_diagnostic_default(node: Node, context: &DiagnosticContext) -> Diagnostic {
11088
let range = node.range();
11189
let row_span = range.end_point.row - range.start_point.row;
@@ -309,15 +287,6 @@ fn diagnose_missing_close(
309287
Ok(())
310288
}
311289

312-
fn new_missing_open_diagnostic(
313-
open_token: &str,
314-
range: Range,
315-
context: &DiagnosticContext,
316-
) -> Diagnostic {
317-
let message = format!("Unmatched closing delimiter. Missing an opening '{open_token}'.");
318-
new_syntax_diagnostic(message, range, context)
319-
}
320-
321290
fn new_missing_close_diagnostic(
322291
close_token: &str,
323292
range: Range,
@@ -436,8 +405,17 @@ identity(1)
436405
let diagnostic = diagnostics.get(0).unwrap();
437406
insta::assert_snapshot!(diagnostic.message);
438407

408+
// tree-sitter grammar knows R doesn't allow empty `()` without a body
439409
let diagnostics = text_diagnostics("()");
440-
assert!(diagnostics.is_empty());
410+
assert_eq!(diagnostics.len(), 1);
411+
let diagnostic = diagnostics.get(0).unwrap();
412+
insta::assert_snapshot!(diagnostic.message);
413+
414+
// tree-sitter grammar knows R doesn't allow >1 expressions in `()`
415+
let diagnostics = text_diagnostics("(1+2\n1+2)");
416+
assert_eq!(diagnostics.len(), 1);
417+
let diagnostic = diagnostics.get(0).unwrap();
418+
insta::assert_snapshot!(diagnostic.message);
441419

442420
let diagnostics = text_diagnostics("( 1 + 2 )");
443421
assert!(diagnostics.is_empty());
@@ -526,4 +504,67 @@ function(x {
526504
assert_eq!(diagnostic.range.start, Position::new(3, 0));
527505
assert_eq!(diagnostic.range.end, Position::new(3, 1));
528506
}
507+
508+
#[test]
509+
fn test_repeated_call_arguments_without_delimiter() {
510+
let text = "match(1, 2 3)";
511+
512+
let diagnostics = text_diagnostics(text);
513+
assert_eq!(diagnostics.len(), 1);
514+
515+
// Diagnostic highlights the `2`
516+
let diagnostic = diagnostics.get(0).unwrap();
517+
insta::assert_snapshot!(diagnostic.message);
518+
assert_eq!(diagnostic.range.start, Position::new(0, 9));
519+
assert_eq!(diagnostic.range.end, Position::new(0, 10));
520+
}
521+
522+
#[test]
523+
fn test_no_syntax_diagnostic_on_dots_and_dot_dot_i() {
524+
let text = "x$...";
525+
let diagnostics = text_diagnostics(text);
526+
assert!(diagnostics.is_empty());
527+
528+
let text = "x$..1";
529+
let diagnostics = text_diagnostics(text);
530+
assert!(diagnostics.is_empty());
531+
532+
let text = "x::...";
533+
let diagnostics = text_diagnostics(text);
534+
assert!(diagnostics.is_empty());
535+
}
536+
537+
#[test]
538+
fn test_no_syntax_diagnostic_on_raw_strings() {
539+
// https://github.com/r-lib/tree-sitter-r/issues/162
540+
let text = r#"
541+
r"-()-)-"
542+
r"--()-")--"
543+
"#;
544+
let diagnostics = text_diagnostics(text);
545+
assert!(diagnostics.is_empty());
546+
}
547+
548+
#[test]
549+
fn test_no_syntax_diagnostic_on_null_argument_name() {
550+
// https://github.com/r-lib/tree-sitter-r/issues/164
551+
let text = r#"
552+
switch(
553+
typeof(x),
554+
NULL = ,
555+
)
556+
"#;
557+
let diagnostics = text_diagnostics(text);
558+
assert!(diagnostics.is_empty());
559+
}
560+
561+
#[test]
562+
fn test_no_syntax_diagnostic_on_binary_exponent() {
563+
// https://github.com/r-lib/tree-sitter-r/issues/159
564+
let text = r#"
565+
0x0p-123
566+
"#;
567+
let diagnostics = text_diagnostics(text);
568+
assert!(diagnostics.is_empty());
569+
}
529570
}

crates/ark/src/lsp/snapshots/ark__lsp__diagnostics__tests__expression_after_call_argument.snap

Lines changed: 0 additions & 5 deletions
This file was deleted.

crates/ark/src/lsp/snapshots/ark__lsp__diagnostics__tests__mixed_syntax_and_semantic_diagnostics.snap

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
source: crates/ark/src/lsp/diagnostics.rs
33
expression: diagnostic.message
44
---
5-
Unmatched closing delimiter. Missing an opening '{'.
5+
Syntax error

crates/ark/src/lsp/snapshots/ark__lsp__diagnostics_syntax__tests__error_precision.snap

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
source: crates/ark/src/lsp/diagnostics_syntax.rs
33
expression: diagnostic.message
44
---
5-
Unmatched closing delimiter. Missing an opening '('.
5+
Syntax error
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
source: crates/ark/src/lsp/diagnostics_syntax.rs
3+
expression: diagnostic.message
4+
---
5+
Syntax error

crates/ark/src/lsp/snapshots/ark__lsp__diagnostics_syntax__tests__unmatched_binary_operator.snap

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
source: crates/ark/src/lsp/diagnostics_syntax.rs
33
expression: diagnostic.message
44
---
5-
Unmatched closing delimiter. Missing an opening '{'.
5+
Syntax error

crates/ark/src/lsp/snapshots/ark__lsp__diagnostics_syntax__tests__unmatched_closing_token-2.snap

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
source: crates/ark/src/lsp/diagnostics_syntax.rs
33
expression: diagnostic.message
44
---
5-
Unmatched closing delimiter. Missing an opening '('.
5+
Syntax error

0 commit comments

Comments
 (0)