Skip to content

Commit b7c2148

Browse files
Break sentence scope on [.!?] (#1747)
sbd would sometimes not break lines ending with [.!?] when `new_line_boundaries: false` is used ## Checklist - [x] I have added [tests](https://www.cursorless.org/docs/contributing/test-case-recorder/) - [/] I have updated the [docs](https://github.com/cursorless-dev/cursorless/tree/main/docs) and [cheatsheet](https://github.com/cursorless-dev/cursorless/tree/main/cursorless-talon/src/cheatsheet) - [/] I have not broken the cheatsheet --------- Co-authored-by: Pokey Rule <[email protected]>
1 parent 278f188 commit b7c2148

File tree

3 files changed

+63
-9
lines changed

3 files changed

+63
-9
lines changed

packages/cursorless-engine/src/processTargets/modifiers/scopeHandlers/SentenceScopeHandler/SentenceSegmenter.ts

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,19 @@
11
import * as sbd from "sbd";
22
import { MatchedText, matchRegex, testRegex } from "../../../../util/regex";
33

4-
// A sentence starts with a letter
5-
const leadingOffsetRegex = /\p{L}/u;
6-
// A line with no letters is invalid and breaks sentences
7-
const invalidLineRegex = /(\n[^\p{L}]*\n)/gu;
4+
// A sentence starts with a letter with adjacent leading symbols. Whitespace excluded.
5+
const leadingOffsetRegex = /\S*\p{L}/u;
6+
/**
7+
* This regex is used to split the text that comes back from sbd. Anything
8+
* matching this regex will be discarded from the returned sentence, and split
9+
* the sentence in two if it occurs in the middle of a sentence. The regex
10+
* matches either of the following:
11+
* 1. An entire line containing no letters.
12+
* 2. If a line ends in `[.!?]`, possibly followed by whitespace, we split on
13+
* that. This is a workaround for #1753. FIXME: Remove this second term once
14+
* #1753 is fixed.
15+
*/
16+
const skipPartRegex = /(\r?\n[^\p{L}]*\r?\n)|(?<=[.!?])(\s*\r?\n)/gu;
817

918
const options: sbd.Options = {
1019
["newline_boundaries"]: false,
@@ -17,10 +26,10 @@ export default class SentenceSegmenter {
1726
let index = 0;
1827

1928
for (const sentence of sentences) {
20-
const parts = sentence.split(invalidLineRegex);
29+
const parts = sentence.split(skipPartRegex).filter((p) => p != null);
2130

2231
for (const part of parts) {
23-
if (!isInvalidLine(part)) {
32+
if (!skipPart(part)) {
2433
const segment = createSegment(part, index);
2534
if (segment != null) {
2635
yield segment;
@@ -56,6 +65,6 @@ function createSegment(
5665
};
5766
}
5867

59-
function isInvalidLine(text: string): boolean {
60-
return testRegex(invalidLineRegex, text);
68+
function skipPart(text: string): boolean {
69+
return testRegex(skipPartRegex, text);
6170
}

packages/cursorless-engine/src/test/fixtures/sentenceSegmeter.fixture.ts

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,14 @@ export const sentenceSegmenterFixture: Fixture[] = [
2626
},
2727
{
2828
input: "1Foo",
29-
expectedOutput: ["Foo"],
29+
expectedOutput: ["1Foo"],
3030
},
3131
{
3232
input: "_foo",
33+
expectedOutput: ["_foo"],
34+
},
35+
{
36+
input: "* foo",
3337
expectedOutput: ["foo"],
3438
},
3539
{
@@ -40,6 +44,18 @@ export const sentenceSegmenterFixture: Fixture[] = [
4044
input: "* Foo \nbar. *",
4145
expectedOutput: ["Foo \nbar."],
4246
},
47+
{
48+
input: "Foo bar. Baz bongo.",
49+
expectedOutput: ["Foo bar.", "Baz bongo."],
50+
},
51+
{
52+
input: "Foo bar. \nBaz bongo.",
53+
expectedOutput: ["Foo bar.", "Baz bongo."],
54+
},
55+
{
56+
input: "Foo Bar. \nBaz bongo.",
57+
expectedOutput: ["Foo Bar.", "Baz bongo."],
58+
},
4359
{
4460
input: "Foo \n*\nbar",
4561
expectedOutput: ["Foo", "bar"],
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
languageId: markdown
2+
command:
3+
version: 6
4+
spokenForm: change every sentence
5+
action:
6+
name: clearAndSetSelection
7+
target:
8+
type: primitive
9+
modifiers:
10+
- type: everyScope
11+
scopeType: {type: sentence}
12+
usePrePhraseSnapshot: true
13+
initialState:
14+
documentContents: |-
15+
1. **Custom subtitles** - User customizable subtitles for Talon.
16+
- [Subtitles and notifications](./core/on_phrase/subtitles_and_notifications)
17+
selections:
18+
- anchor: {line: 0, character: 0}
19+
active: {line: 0, character: 0}
20+
marks: {}
21+
finalState:
22+
documentContents: |-
23+
1.
24+
-
25+
selections:
26+
- anchor: {line: 0, character: 3}
27+
active: {line: 0, character: 3}
28+
- anchor: {line: 1, character: 6}
29+
active: {line: 1, character: 6}

0 commit comments

Comments
 (0)