Skip to content

Commit c5a3681

Browse files
Validate query capture names (#2147)
Show error message when an invalid capture name is used Fixes #1433 ## Checklist - [x] I have added [tests](https://www.cursorless.org/docs/contributing/test-case-recorder/) - [-] I have updated the [docs](https://github.com/cursorless-dev/cursorless/tree/main/docs) and [cheatsheet](https://github.com/cursorless-dev/cursorless/tree/main/cursorless-talon/src/cheatsheet) - [-] I have not broken the cheatsheet --------- Co-authored-by: Pokey Rule <[email protected]>
1 parent 3aff2da commit c5a3681

File tree

10 files changed

+295
-55
lines changed

10 files changed

+295
-55
lines changed

packages/cursorless-engine/src/languages/LanguageDefinition.ts

Lines changed: 33 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,15 @@ import {
44
SimpleScopeType,
55
showError,
66
} from "@cursorless/common";
7-
import { dirname, join } from "path";
7+
import { basename, dirname, join } from "path";
88
import { TreeSitterScopeHandler } from "../processTargets/modifiers/scopeHandlers";
99
import { TreeSitterTextFragmentScopeHandler } from "../processTargets/modifiers/scopeHandlers/TreeSitterScopeHandler/TreeSitterTextFragmentScopeHandler";
1010
import { ScopeHandler } from "../processTargets/modifiers/scopeHandlers/scopeHandler.types";
1111
import { ide } from "../singletons/ide.singleton";
1212
import { TreeSitter } from "../typings/TreeSitter";
1313
import { matchAll } from "../util/regex";
1414
import { TreeSitterQuery } from "./TreeSitterQuery";
15+
import { validateQueryCaptures } from "./TreeSitterQuery/validateQueryCaptures";
1516
import { TEXT_FRAGMENT_CAPTURE_NAME } from "./captureNames";
1617

1718
/**
@@ -105,6 +106,8 @@ async function readQueryFileAndImports(
105106
[languageQueryPath]: null,
106107
};
107108

109+
const doValidation = ide().runMode !== "production";
110+
108111
// Keep reading imports until we've read all the imports. Every time we
109112
// encounter an import in a query file, we add it to the map with a value
110113
// of null, so that it will be read on the next iteration
@@ -114,6 +117,8 @@ async function readQueryFileAndImports(
114117
continue;
115118
}
116119

120+
const fileName = basename(queryPath);
121+
117122
let rawQuery = await fileSystem.readBundledFile(queryPath);
118123

119124
if (rawQuery == null) {
@@ -137,6 +142,10 @@ async function readQueryFileAndImports(
137142
rawQuery = "";
138143
}
139144

145+
if (doValidation) {
146+
validateQueryCaptures(fileName, rawQuery);
147+
}
148+
140149
rawQueryStrings[queryPath] = rawQuery;
141150
matchAll(
142151
rawQuery,
@@ -150,18 +159,9 @@ async function readQueryFileAndImports(
150159
/^[^\S\r\n]*;;?[^\S\r\n]*(?:import|include)[^\S\r\n]+['"]?([\w|/.]+)['"]?[^\S\r\n]*$/gm,
151160
(match) => {
152161
const relativeImportPath = match[1];
153-
const canonicalSyntax = `;; import ${relativeImportPath}`;
154-
155-
if (match[0] !== canonicalSyntax) {
156-
showError(
157-
ide().messages,
158-
"LanguageDefinition.readQueryFileAndImports.malformedImport",
159-
`Malformed import statement in ${queryPath}: "${match[0]}". Import statements must be of the form "${canonicalSyntax}"`,
160-
);
161-
162-
if (ide().runMode === "test") {
163-
throw new Error("Invalid import statement");
164-
}
162+
163+
if (doValidation) {
164+
validateImportSyntax(fileName, relativeImportPath, match[0]);
165165
}
166166

167167
const importQueryPath = join(dirname(queryPath), relativeImportPath);
@@ -174,3 +174,23 @@ async function readQueryFileAndImports(
174174

175175
return Object.values(rawQueryStrings).join("\n");
176176
}
177+
178+
function validateImportSyntax(
179+
file: string,
180+
relativeImportPath: string,
181+
actual: string,
182+
) {
183+
const canonicalSyntax = `;; import ${relativeImportPath}`;
184+
185+
if (actual !== canonicalSyntax) {
186+
showError(
187+
ide().messages,
188+
"LanguageDefinition.readQueryFileAndImports.malformedImport",
189+
`Malformed import statement in ${file}: "${actual}". Import statements must be of the form "${canonicalSyntax}"`,
190+
);
191+
192+
if (ide().runMode === "test") {
193+
throw new Error("Invalid import statement");
194+
}
195+
}
196+
}
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
import { FakeIDE } from "@cursorless/common";
2+
import assert from "assert";
3+
import { injectIde } from "../../singletons/ide.singleton";
4+
import { validateQueryCaptures } from "./validateQueryCaptures";
5+
6+
const testCases: { name: string; isOk: boolean; content: string }[] = [
7+
{
8+
name: "Scope captures",
9+
isOk: true,
10+
content: "(if_statement) @statement @ifStatement @comment",
11+
},
12+
{
13+
name: "Relationships",
14+
isOk: true,
15+
content: "(if_statement) @statement.domain @statement.interior @_.removal",
16+
},
17+
{
18+
name: "Position captures",
19+
isOk: true,
20+
content:
21+
"(if_statement) @statement.startOf @statement.leading.startOf @statement.trailing.endOf",
22+
},
23+
{
24+
name: "Range captures",
25+
isOk: true,
26+
content:
27+
"(if_statement) @statement.start @statement.start.endOf @statement.removal.start @statement.interior.start.endOf",
28+
},
29+
{
30+
name: "Dummy capture",
31+
isOk: true,
32+
content: "(if_statement) @_foo",
33+
},
34+
{
35+
name: "No range dummy relationships",
36+
isOk: false,
37+
content: "(if_statement) @_foo.start @_foo.startOf",
38+
},
39+
{
40+
name: "Text fragment",
41+
isOk: true,
42+
content: "(comment) @textFragment",
43+
},
44+
{
45+
name: "Iteration",
46+
isOk: true,
47+
content: "(document) @statement.iteration @statement.iteration.domain",
48+
},
49+
{
50+
name: "Unknown capture in comment",
51+
isOk: true,
52+
content: ";; (if_statement) @unknown",
53+
},
54+
{
55+
name: "Unknown capture",
56+
isOk: false,
57+
content: "(if_statement) @unknown",
58+
},
59+
{
60+
name: "Unknown relationship",
61+
isOk: false,
62+
content: "(if_statement) @statement.unknown",
63+
},
64+
{
65+
name: "Single @",
66+
isOk: false,
67+
content: "(if_statement) @",
68+
},
69+
{
70+
name: "Single wildcard",
71+
isOk: false,
72+
content: "(if_statement) @_",
73+
},
74+
{
75+
name: "Wildcard start",
76+
isOk: false,
77+
content: "(if_statement) @_.start",
78+
},
79+
{
80+
name: "Leading start",
81+
isOk: false,
82+
content: "(if_statement) @statement.leading.start",
83+
},
84+
{
85+
name: "Text fragment removal",
86+
isOk: false,
87+
content: "(comment) @textFragment.removal",
88+
},
89+
];
90+
91+
suite("validateQueryCaptures", function () {
92+
suiteSetup(() => {
93+
injectIde(new FakeIDE());
94+
});
95+
96+
for (const testCase of testCases) {
97+
const name = [testCase.isOk ? "OK" : "Error", testCase.name].join(": ");
98+
99+
test(name, () => {
100+
const runTest = () =>
101+
validateQueryCaptures(testCase.name, testCase.content);
102+
103+
if (testCase.isOk) {
104+
assert.doesNotThrow(runTest);
105+
} else {
106+
assert.throws(runTest);
107+
}
108+
});
109+
}
110+
});
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
import { showError, simpleScopeTypeTypes } from "@cursorless/common";
2+
import { ide } from "../../singletons/ide.singleton";
3+
4+
const wildcard = "_";
5+
const textFragment = "textFragment";
6+
const captureNames = [wildcard, ...simpleScopeTypeTypes];
7+
8+
const positionRelationships = ["prefix", "leading", "trailing"];
9+
const positionSuffixes = ["startOf", "endOf"];
10+
11+
const rangeRelationships = [
12+
"domain",
13+
"removal",
14+
"interior",
15+
"iteration",
16+
"iteration.domain",
17+
];
18+
const rangeSuffixes = [
19+
"start",
20+
"end",
21+
"start.startOf",
22+
"start.endOf",
23+
"end.startOf",
24+
"end.endOf",
25+
];
26+
27+
const allowedCaptures = new Set<string>();
28+
29+
allowedCaptures.add(textFragment);
30+
31+
for (const suffix of rangeSuffixes) {
32+
allowedCaptures.add(`${textFragment}.${suffix}`);
33+
}
34+
35+
for (const captureName of captureNames) {
36+
// Wildcard is not allowed by itself without a relationship
37+
if (captureName !== wildcard) {
38+
// eg: statement
39+
allowedCaptures.add(captureName);
40+
41+
// eg: statement.start | statement.start.endOf
42+
for (const suffix of rangeSuffixes) {
43+
allowedCaptures.add(`${captureName}.${suffix}`);
44+
}
45+
}
46+
47+
for (const relationship of positionRelationships) {
48+
// eg: statement.leading
49+
allowedCaptures.add(`${captureName}.${relationship}`);
50+
51+
for (const suffix of positionSuffixes) {
52+
// eg: statement.leading.endOf
53+
allowedCaptures.add(`${captureName}.${relationship}.${suffix}`);
54+
}
55+
}
56+
57+
for (const relationship of rangeRelationships) {
58+
// eg: statement.domain
59+
allowedCaptures.add(`${captureName}.${relationship}`);
60+
61+
for (const suffix of rangeSuffixes) {
62+
// eg: statement.domain.start | statement.domain.start.endOf
63+
allowedCaptures.add(`${captureName}.${relationship}.${suffix}`);
64+
}
65+
}
66+
}
67+
68+
// Not a comment. ie line is not starting with `;;`
69+
// Capture starts with `@` and is followed by words and/or dots
70+
const capturePattern = new RegExp(`^(?!;;).*@([\\w.]*)`, "gm");
71+
72+
export function validateQueryCaptures(file: string, rawQuery: string): void {
73+
const matches = rawQuery.matchAll(capturePattern);
74+
75+
const errors: string[] = [];
76+
77+
for (const match of matches) {
78+
const captureName = match[1];
79+
80+
if (
81+
captureName.length > 1 &&
82+
!captureName.includes(".") &&
83+
captureName.startsWith("_")
84+
) {
85+
// Allow @_foo dummy captures to use for referring to in query predicates
86+
continue;
87+
}
88+
89+
if (!allowedCaptures.has(captureName)) {
90+
const lineNumber = match.input!.slice(0, match.index!).split("\n").length;
91+
errors.push(`${file}(${lineNumber}) invalid capture '@${captureName}'.`);
92+
}
93+
}
94+
95+
if (errors.length === 0) {
96+
return;
97+
}
98+
99+
const message = errors.join("\n");
100+
101+
showError(
102+
ide().messages,
103+
"validateQueryCaptures.invalidCaptureName",
104+
message,
105+
);
106+
107+
if (ide().runMode === "test") {
108+
throw new Error(message);
109+
}
110+
}

packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/languages/scm/clearEveryEntry4.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ initialState:
1313
documentContents: |-
1414
(
1515
(_
16-
(_) @dummy
16+
(_) @_dummy
1717
(capture) @name @_.domain.end
1818
) @_.domain.start
1919
)

queries/go.scm

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -246,8 +246,8 @@
246246
(
247247
(if_statement
248248
consequence: (block) @branch.end.endOf
249-
) @_if @branch.start.startOf
250-
(#not-parent-type? @_if if_statement)
249+
) @branch.start.startOf
250+
(#not-parent-type? @branch.start.startOf if_statement)
251251
(#insertion-delimiter! @branch.start.startOf " ")
252252
)
253253

@@ -271,6 +271,6 @@
271271

272272
;; iteration scope is always the outermost if statement
273273
(
274-
(if_statement) @_if @branch.iteration
275-
(#not-parent-type? @_if if_statement)
274+
(if_statement) @branch.iteration
275+
(#not-parent-type? @branch.iteration if_statement)
276276
)

queries/javascript.core.scm

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,11 @@
2323
(export_statement
2424
(_
2525
name: (_) @name
26-
) @dummy
26+
) @_dummy
2727

2828
;; We have a special case for this one. Note we don't need to list the other
2929
;; special cases from above because they can't be exported
30-
(#not-type? @dummy variable_declarator)
30+
(#not-type? @_dummy variable_declarator)
3131
) @_.domain
3232

3333
;; Special cases for `(let | const | var) foo = ...;` because the full statement
@@ -266,8 +266,8 @@
266266
.
267267
value: (_)? @value
268268
) @_.domain
269-
) @dummy
270-
(#has-multiple-children-of-type? @dummy variable_declarator)
269+
) @_dummy
270+
(#has-multiple-children-of-type? @_dummy variable_declarator)
271271
)
272272

273273
(expression_statement
@@ -696,9 +696,9 @@
696696
(_) @argumentOrParameter
697697
.
698698
(_)? @_.trailing.startOf
699-
) @dummy
699+
) @_dummy
700700
(#not-type? @argumentOrParameter "comment")
701-
(#single-or-multi-line-delimiter! @argumentOrParameter @dummy ", " ",\n")
701+
(#single-or-multi-line-delimiter! @argumentOrParameter @_dummy ", " ",\n")
702702
)
703703

704704
;;!! foo("bar")
@@ -710,9 +710,9 @@
710710
(_) @argumentOrParameter
711711
.
712712
(_)? @_.trailing.startOf
713-
) @dummy
713+
) @_dummy
714714
(#not-type? @argumentOrParameter "comment")
715-
(#single-or-multi-line-delimiter! @argumentOrParameter @dummy ", " ",\n")
715+
(#single-or-multi-line-delimiter! @argumentOrParameter @_dummy ", " ",\n")
716716
)
717717

718718
(_

0 commit comments

Comments
 (0)