Skip to content

Commit 0385b48

Browse files
committed
avoid allocating hats to the first letter of a token
We could get much fancier than this, but after running this with a day it appears to help some, and it is nice and simple. I propose that we declare that it fixes #1658, at least for now.
1 parent 342a771 commit 0385b48

File tree

4 files changed

+40
-8
lines changed

4 files changed

+40
-8
lines changed

packages/cursorless-engine/src/tokenGraphemeSplitter/tokenGraphemeSplitter.ts

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -119,12 +119,26 @@ export class TokenGraphemeSplitter {
119119
* @param token The token to split
120120
* @returns A list of normalised graphemes in {@link token}
121121
*/
122-
getTokenGraphemes = (token: string): Grapheme[] =>
123-
matchAll<Grapheme>(token, GRAPHEME_SPLIT_REGEX, (match) => ({
124-
text: this.normalizeGrapheme(match[0]),
125-
tokenStartOffset: match.index!,
126-
tokenEndOffset: match.index! + match[0].length,
127-
}));
122+
getTokenGraphemes = (token: string): Grapheme[] => {
123+
const graphemes = matchAll<Grapheme>(
124+
token,
125+
GRAPHEME_SPLIT_REGEX,
126+
(match) => ({
127+
text: this.normalizeGrapheme(match[0]),
128+
tokenStartOffset: match.index!,
129+
tokenEndOffset: match.index! + match[0].length,
130+
isFirstLetterGrapheme: false,
131+
}),
132+
);
133+
// iterate through the graphemes, marking the first letter
134+
for (const grapheme of graphemes) {
135+
if (grapheme.text.match(/[a-z]/)) {
136+
grapheme.isFirstLetterGrapheme = true;
137+
break;
138+
}
139+
}
140+
return graphemes;
141+
};
128142

129143
/**
130144
* Normalizes the grapheme {@link rawGraphemeText} based on user
@@ -201,4 +215,12 @@ export interface Grapheme {
201215

202216
/** The end offset of the grapheme within its containing token */
203217
tokenEndOffset: number;
218+
219+
/**
220+
* Whether this grapheme is the first letter grapheme of the text
221+
* See https://github.com/cursorless-dev/cursorless/issues/1658
222+
* TODO: Consider instead whether this grapheme is the beginning of a word inside the text.
223+
* This is more complicated, because the definition of a word can vary by language.
224+
*/
225+
isFirstLetterGrapheme: boolean;
204226
}

packages/cursorless-engine/src/util/allocateHats/HatMetrics.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,12 @@ export type HatMetric = (hat: HatCandidate) => number;
1414
*/
1515
export const negativePenalty: HatMetric = ({ penalty }) => -penalty;
1616

17+
/**
18+
* @returns A metric that penalizes graphemes that are the first letter of a token
19+
*/
20+
export const avoidFirstLetter: HatMetric = ({ grapheme }) =>
21+
grapheme.isFirstLetterGrapheme ? -1 : 0;
22+
1723
/**
1824
* @param hatOldTokenRanks A map from a hat candidate (grapheme+style combination) to the score of the
1925
* token that used the given hat in the previous hat allocation.

packages/cursorless-engine/src/util/allocateHats/allocateHats.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ function getTokenRemainingHatCandidates(
151151
}
152152

153153
/**
154-
* @param token The token that recevied the hat
154+
* @param token The token that received the hat
155155
* @param chosenHat The hat we chose for the token
156156
* @returns An object indicating the hat assigned to the token, along with the
157157
* range of the grapheme upon which it sits

packages/cursorless-engine/src/util/allocateHats/chooseTokenHat.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import { HatStability, TokenHat } from "@cursorless/common";
22
import { HatCandidate } from "./allocateHats";
33
import { RankingContext } from "./getHatRankingContext";
44
import {
5+
avoidFirstLetter,
56
hatOldTokenRank,
67
isOldTokenHat,
78
minimumTokenRankContainingGrapheme,
@@ -71,7 +72,10 @@ export function chooseTokenHat(
7172
// 4. Narrow to the hats with the lowest penalty
7273
negativePenalty,
7374

74-
// 5. Prefer hats that sit on a grapheme that doesn't appear in any highly
75+
// 5. Avoid the first grapheme of the token if possible
76+
avoidFirstLetter,
77+
78+
// 6. Prefer hats that sit on a grapheme that doesn't appear in any highly
7579
// ranked token
7680
minimumTokenRankContainingGrapheme(tokenRank, graphemeTokenRanks),
7781
])!;

0 commit comments

Comments
 (0)