Fix text fragment extractor fallbacks (#1863)

pokey · AndreasArvidsson · web-flow · commit 77502849ce85 · 2023-09-08T21:19:24.000Z
The fallback logic for determining whether to use text-based surrounding pairs or parse-tree-based was broken for text fragment extractors based on next-gen scope handlers (ie using the new `@textFragment` tag in a query file). This breakage meant that in a string like `"""hello"""` in Python, the surrounding pair finder fell back to text-based, which resulted in it thinking `"hello"` was a string, rather than `"""hello"""`, as would happen if it fell back to parse-tree-based This PR fixes the issue by unifying the fallback logic between legacy and next-gen text fragment extractors, so that they don't fall out of sync again See also #1812 (comment); arguably that will make this PR irrelevant, but until then, it's better to have string not be broken when we migrate a language to use `@textFragment` Note that the tests in this PR don't actually test the code yet, because Python is still using legacy text fragment extractors. The tests will start biting when this PR is merged into main and then merged into #1862 - This PR is required by #1862 ## Checklist - [ ] I have added [tests](https://www.cursorless.org/docs/contributing/test-case-recorder/) - [ ] I have updated the [docs](https://github.com/cursorless-dev/cursorless/tree/main/docs) and [cheatsheet](https://github.com/cursorless-dev/cursorless/tree/main/cursorless-talon/src/cheatsheet) - [ ] I have not broken the cheatsheet --------- Co-authored-by: Andreas Arvidsson <andreas.arvidsson87@gmail.com>
diff --git a/packages/cursorless-engine/src/languages/getTextFragmentExtractor.ts b/packages/cursorless-engine/src/languages/getTextFragmentExtractor.ts
@@ -1,4 +1,4 @@
-import { Range, UnsupportedLanguageError } from "@cursorless/common";
+import { Range } from "@cursorless/common";
 import type { SyntaxNode } from "web-tree-sitter";
 import { SelectionWithEditor } from "../typings/Types";
 import { notSupported } from "../util/nodeMatchers";
@@ -104,14 +104,8 @@ function constructHackedStringTextFragmentExtractor(
  */
 export default function getTextFragmentExtractor(
   languageId: string,
-): TextFragmentExtractor {
-  const extractor = textFragmentExtractors[languageId as LegacyLanguageId];
-
-  if (extractor == null) {
-    throw new UnsupportedLanguageError(languageId);
-  }
-
-  return extractor;
+): TextFragmentExtractor | null {
+  return textFragmentExtractors[languageId as LegacyLanguageId];
 }
 
 // NB: For now when we want use the entire file as a text fragment we just
diff --git a/packages/cursorless-engine/src/processTargets/modifiers/surroundingPair/index.ts b/packages/cursorless-engine/src/processTargets/modifiers/surroundingPair/index.ts
@@ -5,9 +5,7 @@ import {
 } from "@cursorless/common";
 import type { SyntaxNode } from "web-tree-sitter";
 import { LanguageDefinitions } from "../../../languages/LanguageDefinitions";
-import getTextFragmentExtractor, {
-  TextFragmentExtractor,
-} from "../../../languages/getTextFragmentExtractor";
+import getTextFragmentExtractor from "../../../languages/getTextFragmentExtractor";
 import { Target } from "../../../typings/target.types";
 import { SurroundingPairTarget } from "../../targets";
 import { getContainingScopeTarget } from "../getContainingScopeTarget";
@@ -73,40 +71,6 @@ function processSurroundingPairCore(
   ] ?? [scopeType.delimiter];
 
   let node: SyntaxNode | null;
-  let textFragmentExtractor: TextFragmentExtractor;
-
-  const textFragmentScopeHandler =
-    languageDefinition?.getTextFragmentScopeHandler();
-
-  if (textFragmentScopeHandler != null) {
-    const containingScope = getContainingScopeTarget(
-      target,
-      textFragmentScopeHandler,
-      0,
-    );
-
-    if (containingScope != null) {
-      const surroundingRange = findSurroundingPairTextBased(
-        editor,
-        range,
-        containingScope[0].contentRange,
-        delimiters,
-        scopeType,
-      );
-      if (surroundingRange != null) {
-        // Found the pair within this text fragment or comment, e.g. "(abc)"
-        return surroundingRange;
-      }
-      // Search in the rest of the file, to find e.g. ("abc")
-      return findSurroundingPairTextBased(
-        editor,
-        range,
-        null,
-        delimiters,
-        scopeType,
-      );
-    }
-  }
 
   try {
     node = languageDefinitions.getNodeAtLocation(document, range);
@@ -122,8 +86,6 @@ function processSurroundingPairCore(
         scopeType,
       );
     }
-
-    textFragmentExtractor = getTextFragmentExtractor(document.languageId);
   } catch (err) {
     if ((err as Error).name === "UnsupportedLanguageError") {
       // If we're in a language where we don't have a parse tree we use the text
@@ -140,14 +102,41 @@ function processSurroundingPairCore(
     }
   }
 
-  // If we have a parse tree but we are in a string node or in a comment node,
-  // then we use the text-based algorithm
-  const selectionWithEditor = {
-    editor,
-    selection: new Selection(range.start, range.end),
-  };
-  const textFragmentRange = textFragmentExtractor(node, selectionWithEditor);
+  const textFragmentRange = (() => {
+    // First try to use the text fragment scope handler if it exists
+    const textFragmentScopeHandler =
+      languageDefinition?.getTextFragmentScopeHandler();
+
+    if (textFragmentScopeHandler != null) {
+      const containingScope = getContainingScopeTarget(
+        target,
+        textFragmentScopeHandler,
+        0,
+      );
+
+      return containingScope?.[0].contentRange;
+    }
+
+    // Then try to use the legacy text fragment extractor if it exists
+    const textFragmentExtractor = getTextFragmentExtractor(document.languageId);
+
+    if (textFragmentExtractor == null) {
+      // If the text fragment extractor doesn't exist, or if it explicitly is
+      // set to `null`, then we just use text-based algorithm on entire document
+      return document.range;
+    }
+
+    const selectionWithEditor = {
+      editor,
+      selection: new Selection(range.start, range.end),
+    };
+
+    return textFragmentExtractor(node, selectionWithEditor);
+  })();
+
   if (textFragmentRange != null) {
+    // If we have a parse tree but we are in a string node or in a comment node,
+    // then we use the text-based algorithm
     const surroundingRange = findSurroundingPairTextBased(
       editor,
       range,
diff --git a/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/surroundingPair/parseTree/python/changeString.yml b/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/surroundingPair/parseTree/python/changeString.yml
@@ -0,0 +1,23 @@
+languageId: python
+command:
+  version: 6
+  spokenForm: change string
+  action:
+    name: clearAndSetSelection
+    target:
+      type: primitive
+      modifiers:
+        - type: containingScope
+          scopeType: {type: surroundingPair, delimiter: string}
+  usePrePhraseSnapshot: true
+initialState:
+  documentContents: "\"\"\"hello\"\"\""
+  selections:
+    - anchor: {line: 0, character: 3}
+      active: {line: 0, character: 3}
+  marks: {}
+finalState:
+  documentContents: ""
+  selections:
+    - anchor: {line: 0, character: 0}
+      active: {line: 0, character: 0}
diff --git a/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/surroundingPair/parseTree/python/changeString3.yml b/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/surroundingPair/parseTree/python/changeString3.yml
@@ -0,0 +1,23 @@
+languageId: python
+command:
+  version: 6
+  spokenForm: change string
+  action:
+    name: clearAndSetSelection
+    target:
+      type: primitive
+      modifiers:
+        - type: containingScope
+          scopeType: {type: surroundingPair, delimiter: string}
+  usePrePhraseSnapshot: true
+initialState:
+  documentContents: "\"\"\"aaa\"\"\""
+  selections:
+    - anchor: {line: 0, character: 0}
+      active: {line: 0, character: 0}
+  marks: {}
+finalState:
+  documentContents: ""
+  selections:
+    - anchor: {line: 0, character: 0}
+      active: {line: 0, character: 0}
diff --git a/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/surroundingPair/parseTree/python/changeString4.yml b/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/surroundingPair/parseTree/python/changeString4.yml
@@ -0,0 +1,23 @@
+languageId: python
+command:
+  version: 6
+  spokenForm: change string
+  action:
+    name: clearAndSetSelection
+    target:
+      type: primitive
+      modifiers:
+        - type: containingScope
+          scopeType: {type: surroundingPair, delimiter: string}
+  usePrePhraseSnapshot: false
+initialState:
+  documentContents: "\"\"\"aaa\"\"\""
+  selections:
+    - anchor: {line: 0, character: 9}
+      active: {line: 0, character: 9}
+  marks: {}
+finalState:
+  documentContents: ""
+  selections:
+    - anchor: {line: 0, character: 0}
+      active: {line: 0, character: 0}
diff --git a/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/surroundingPair/parseTree/python/changeString5.yml b/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/surroundingPair/parseTree/python/changeString5.yml
@@ -0,0 +1,23 @@
+languageId: python
+command:
+  version: 6
+  spokenForm: change string
+  action:
+    name: clearAndSetSelection
+    target:
+      type: primitive
+      modifiers:
+        - type: containingScope
+          scopeType: {type: surroundingPair, delimiter: string}
+  usePrePhraseSnapshot: false
+initialState:
+  documentContents: f"""aaa"""
+  selections:
+    - anchor: {line: 0, character: 6}
+      active: {line: 0, character: 6}
+  marks: {}
+finalState:
+  documentContents: ""
+  selections:
+    - anchor: {line: 0, character: 0}
+      active: {line: 0, character: 0}
diff --git a/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/surroundingPair/parseTree/python/changeString6.yml b/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/surroundingPair/parseTree/python/changeString6.yml
@@ -0,0 +1,23 @@
+languageId: python
+command:
+  version: 6
+  spokenForm: change string
+  action:
+    name: clearAndSetSelection
+    target:
+      type: primitive
+      modifiers:
+        - type: containingScope
+          scopeType: {type: surroundingPair, delimiter: string}
+  usePrePhraseSnapshot: false
+initialState:
+  documentContents: "'''aaa'''"
+  selections:
+    - anchor: {line: 0, character: 3}
+      active: {line: 0, character: 3}
+  marks: {}
+finalState:
+  documentContents: ""
+  selections:
+    - anchor: {line: 0, character: 0}
+      active: {line: 0, character: 0}
diff --git a/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/surroundingPair/parseTree/python/changeString7.yml b/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/surroundingPair/parseTree/python/changeString7.yml
@@ -0,0 +1,23 @@
+languageId: python
+command:
+  version: 6
+  spokenForm: change string
+  action:
+    name: clearAndSetSelection
+    target:
+      type: primitive
+      modifiers:
+        - type: containingScope
+          scopeType: {type: surroundingPair, delimiter: string}
+  usePrePhraseSnapshot: false
+initialState:
+  documentContents: "'aaa'"
+  selections:
+    - anchor: {line: 0, character: 0}
+      active: {line: 0, character: 0}
+  marks: {}
+finalState:
+  documentContents: ""
+  selections:
+    - anchor: {line: 0, character: 0}
+      active: {line: 0, character: 0}
diff --git a/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/surroundingPair/parseTree/python/changeString8.yml b/packages/cursorless-vscode-e2e/src/suite/fixtures/recorded/surroundingPair/parseTree/python/changeString8.yml
@@ -0,0 +1,23 @@
+languageId: python
+command:
+  version: 6
+  spokenForm: change string
+  action:
+    name: clearAndSetSelection
+    target:
+      type: primitive
+      modifiers:
+        - type: containingScope
+          scopeType: {type: surroundingPair, delimiter: string}
+  usePrePhraseSnapshot: false
+initialState:
+  documentContents: "'aaa'"
+  selections:
+    - anchor: {line: 0, character: 1}
+      active: {line: 0, character: 1}
+  marks: {}
+finalState:
+  documentContents: ""
+  selections:
+    - anchor: {line: 0, character: 0}
+      active: {line: 0, character: 0}