Skip to content

Commit 7750284

Browse files
Fix text fragment extractor fallbacks (#1863)
The fallback logic for determining whether to use text-based surrounding pairs or parse-tree-based was broken for text fragment extractors based on next-gen scope handlers (ie using the new `@textFragment` tag in a query file). This breakage meant that in a string like `"""hello"""` in Python, the surrounding pair finder fell back to text-based, which resulted in it thinking `"hello"` was a string, rather than `"""hello"""`, as would happen if it fell back to parse-tree-based This PR fixes the issue by unifying the fallback logic between legacy and next-gen text fragment extractors, so that they don't fall out of sync again See also #1812 (comment); arguably that will make this PR irrelevant, but until then, it's better to have string not be broken when we migrate a language to use `@textFragment` Note that the tests in this PR don't actually test the code yet, because Python is still using legacy text fragment extractors. The tests will start biting when this PR is merged into main and then merged into #1862 - This PR is required by #1862 ## Checklist - [ ] I have added [tests](https://www.cursorless.org/docs/contributing/test-case-recorder/) - [ ] I have updated the [docs](https://github.com/cursorless-dev/cursorless/tree/main/docs) and [cheatsheet](https://github.com/cursorless-dev/cursorless/tree/main/cursorless-talon/src/cheatsheet) - [ ] I have not broken the cheatsheet --------- Co-authored-by: Andreas Arvidsson <[email protected]>
1 parent 376f9f5 commit 7750284

File tree

9 files changed

+199
-55
lines changed

9 files changed

+199
-55
lines changed

packages/cursorless-engine/src/languages/getTextFragmentExtractor.ts

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { Range, UnsupportedLanguageError } from "@cursorless/common";
1+
import { Range } from "@cursorless/common";
22
import type { SyntaxNode } from "web-tree-sitter";
33
import { SelectionWithEditor } from "../typings/Types";
44
import { notSupported } from "../util/nodeMatchers";
@@ -104,14 +104,8 @@ function constructHackedStringTextFragmentExtractor(
104104
*/
105105
export default function getTextFragmentExtractor(
106106
languageId: string,
107-
): TextFragmentExtractor {
108-
const extractor = textFragmentExtractors[languageId as LegacyLanguageId];
109-
110-
if (extractor == null) {
111-
throw new UnsupportedLanguageError(languageId);
112-
}
113-
114-
return extractor;
107+
): TextFragmentExtractor | null {
108+
return textFragmentExtractors[languageId as LegacyLanguageId];
115109
}
116110

117111
// NB: For now when we want use the entire file as a text fragment we just

packages/cursorless-engine/src/processTargets/modifiers/surroundingPair/index.ts

Lines changed: 35 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,7 @@ import {
55
} from "@cursorless/common";
66
import type { SyntaxNode } from "web-tree-sitter";
77
import { LanguageDefinitions } from "../../../languages/LanguageDefinitions";
8-
import getTextFragmentExtractor, {
9-
TextFragmentExtractor,
10-
} from "../../../languages/getTextFragmentExtractor";
8+
import getTextFragmentExtractor from "../../../languages/getTextFragmentExtractor";
119
import { Target } from "../../../typings/target.types";
1210
import { SurroundingPairTarget } from "../../targets";
1311
import { getContainingScopeTarget } from "../getContainingScopeTarget";
@@ -73,40 +71,6 @@ function processSurroundingPairCore(
7371
] ?? [scopeType.delimiter];
7472

7573
let node: SyntaxNode | null;
76-
let textFragmentExtractor: TextFragmentExtractor;
77-
78-
const textFragmentScopeHandler =
79-
languageDefinition?.getTextFragmentScopeHandler();
80-
81-
if (textFragmentScopeHandler != null) {
82-
const containingScope = getContainingScopeTarget(
83-
target,
84-
textFragmentScopeHandler,
85-
0,
86-
);
87-
88-
if (containingScope != null) {
89-
const surroundingRange = findSurroundingPairTextBased(
90-
editor,
91-
range,
92-
containingScope[0].contentRange,
93-
delimiters,
94-
scopeType,
95-
);
96-
if (surroundingRange != null) {
97-
// Found the pair within this text fragment or comment, e.g. "(abc)"
98-
return surroundingRange;
99-
}
100-
// Search in the rest of the file, to find e.g. ("abc")
101-
return findSurroundingPairTextBased(
102-
editor,
103-
range,
104-
null,
105-
delimiters,
106-
scopeType,
107-
);
108-
}
109-
}
11074

11175
try {
11276
node = languageDefinitions.getNodeAtLocation(document, range);
@@ -122,8 +86,6 @@ function processSurroundingPairCore(
12286
scopeType,
12387
);
12488
}
125-
126-
textFragmentExtractor = getTextFragmentExtractor(document.languageId);
12789
} catch (err) {
12890
if ((err as Error).name === "UnsupportedLanguageError") {
12991
// If we're in a language where we don't have a parse tree we use the text
@@ -140,14 +102,41 @@ function processSurroundingPairCore(
140102
}
141103
}
142104

143-
// If we have a parse tree but we are in a string node or in a comment node,
144-
// then we use the text-based algorithm
145-
const selectionWithEditor = {
146-
editor,
147-
selection: new Selection(range.start, range.end),
148-
};
149-
const textFragmentRange = textFragmentExtractor(node, selectionWithEditor);
105+
const textFragmentRange = (() => {
106+
// First try to use the text fragment scope handler if it exists
107+
const textFragmentScopeHandler =
108+
languageDefinition?.getTextFragmentScopeHandler();
109+
110+
if (textFragmentScopeHandler != null) {
111+
const containingScope = getContainingScopeTarget(
112+
target,
113+
textFragmentScopeHandler,
114+
0,
115+
);
116+
117+
return containingScope?.[0].contentRange;
118+
}
119+
120+
// Then try to use the legacy text fragment extractor if it exists
121+
const textFragmentExtractor = getTextFragmentExtractor(document.languageId);
122+
123+
if (textFragmentExtractor == null) {
124+
// If the text fragment extractor doesn't exist, or if it explicitly is
125+
// set to `null`, then we just use text-based algorithm on entire document
126+
return document.range;
127+
}
128+
129+
const selectionWithEditor = {
130+
editor,
131+
selection: new Selection(range.start, range.end),
132+
};
133+
134+
return textFragmentExtractor(node, selectionWithEditor);
135+
})();
136+
150137
if (textFragmentRange != null) {
138+
// If we have a parse tree but we are in a string node or in a comment node,
139+
// then we use the text-based algorithm
151140
const surroundingRange = findSurroundingPairTextBased(
152141
editor,
153142
range,
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
languageId: python
2+
command:
3+
version: 6
4+
spokenForm: change string
5+
action:
6+
name: clearAndSetSelection
7+
target:
8+
type: primitive
9+
modifiers:
10+
- type: containingScope
11+
scopeType: {type: surroundingPair, delimiter: string}
12+
usePrePhraseSnapshot: true
13+
initialState:
14+
documentContents: "\"\"\"hello\"\"\""
15+
selections:
16+
- anchor: {line: 0, character: 3}
17+
active: {line: 0, character: 3}
18+
marks: {}
19+
finalState:
20+
documentContents: ""
21+
selections:
22+
- anchor: {line: 0, character: 0}
23+
active: {line: 0, character: 0}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
languageId: python
2+
command:
3+
version: 6
4+
spokenForm: change string
5+
action:
6+
name: clearAndSetSelection
7+
target:
8+
type: primitive
9+
modifiers:
10+
- type: containingScope
11+
scopeType: {type: surroundingPair, delimiter: string}
12+
usePrePhraseSnapshot: true
13+
initialState:
14+
documentContents: "\"\"\"aaa\"\"\""
15+
selections:
16+
- anchor: {line: 0, character: 0}
17+
active: {line: 0, character: 0}
18+
marks: {}
19+
finalState:
20+
documentContents: ""
21+
selections:
22+
- anchor: {line: 0, character: 0}
23+
active: {line: 0, character: 0}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
languageId: python
2+
command:
3+
version: 6
4+
spokenForm: change string
5+
action:
6+
name: clearAndSetSelection
7+
target:
8+
type: primitive
9+
modifiers:
10+
- type: containingScope
11+
scopeType: {type: surroundingPair, delimiter: string}
12+
usePrePhraseSnapshot: false
13+
initialState:
14+
documentContents: "\"\"\"aaa\"\"\""
15+
selections:
16+
- anchor: {line: 0, character: 9}
17+
active: {line: 0, character: 9}
18+
marks: {}
19+
finalState:
20+
documentContents: ""
21+
selections:
22+
- anchor: {line: 0, character: 0}
23+
active: {line: 0, character: 0}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
languageId: python
2+
command:
3+
version: 6
4+
spokenForm: change string
5+
action:
6+
name: clearAndSetSelection
7+
target:
8+
type: primitive
9+
modifiers:
10+
- type: containingScope
11+
scopeType: {type: surroundingPair, delimiter: string}
12+
usePrePhraseSnapshot: false
13+
initialState:
14+
documentContents: f"""aaa"""
15+
selections:
16+
- anchor: {line: 0, character: 6}
17+
active: {line: 0, character: 6}
18+
marks: {}
19+
finalState:
20+
documentContents: ""
21+
selections:
22+
- anchor: {line: 0, character: 0}
23+
active: {line: 0, character: 0}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
languageId: python
2+
command:
3+
version: 6
4+
spokenForm: change string
5+
action:
6+
name: clearAndSetSelection
7+
target:
8+
type: primitive
9+
modifiers:
10+
- type: containingScope
11+
scopeType: {type: surroundingPair, delimiter: string}
12+
usePrePhraseSnapshot: false
13+
initialState:
14+
documentContents: "'''aaa'''"
15+
selections:
16+
- anchor: {line: 0, character: 3}
17+
active: {line: 0, character: 3}
18+
marks: {}
19+
finalState:
20+
documentContents: ""
21+
selections:
22+
- anchor: {line: 0, character: 0}
23+
active: {line: 0, character: 0}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
languageId: python
2+
command:
3+
version: 6
4+
spokenForm: change string
5+
action:
6+
name: clearAndSetSelection
7+
target:
8+
type: primitive
9+
modifiers:
10+
- type: containingScope
11+
scopeType: {type: surroundingPair, delimiter: string}
12+
usePrePhraseSnapshot: false
13+
initialState:
14+
documentContents: "'aaa'"
15+
selections:
16+
- anchor: {line: 0, character: 0}
17+
active: {line: 0, character: 0}
18+
marks: {}
19+
finalState:
20+
documentContents: ""
21+
selections:
22+
- anchor: {line: 0, character: 0}
23+
active: {line: 0, character: 0}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
languageId: python
2+
command:
3+
version: 6
4+
spokenForm: change string
5+
action:
6+
name: clearAndSetSelection
7+
target:
8+
type: primitive
9+
modifiers:
10+
- type: containingScope
11+
scopeType: {type: surroundingPair, delimiter: string}
12+
usePrePhraseSnapshot: false
13+
initialState:
14+
documentContents: "'aaa'"
15+
selections:
16+
- anchor: {line: 0, character: 1}
17+
active: {line: 0, character: 1}
18+
marks: {}
19+
finalState:
20+
documentContents: ""
21+
selections:
22+
- anchor: {line: 0, character: 0}
23+
active: {line: 0, character: 0}

0 commit comments

Comments
 (0)