Skip to content

Commit 7e5d78c

Browse files
committed
refactor(web): adds SearchSpace interface
This new interface is being added in preparation for efficient multi-tokenization correction-search. SearchPath has been modified to implement it, and a new type (SearchCluster) will be added in the near future as an additional implementing type. Build-bot: skip build:web Test-bot: skip
1 parent cf96a0f commit 7e5d78c

File tree

5 files changed

+103
-34
lines changed

5 files changed

+103
-34
lines changed

web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -204,18 +204,7 @@ export class ContextToken {
204204
* received that can correspond to the current instance.
205205
*/
206206
get exampleInput(): string {
207-
/*
208-
* TODO: with clear limits (strict cost minimization?) / prior calculation
209-
* attempts, return the best _suggestion_ for this token. This is
210-
* especially relevant for epic/dict-breaker - we want to best model the token
211-
* as it would apply within the word-breaking algorithm.
212-
*
213-
* If not possible, find the best of the deepest search paths and append the
214-
* most likely keystroke data afterward.
215-
*/
216-
const transforms = this.searchSpace.inputSequence.map((dist) => dist[0].sample)
217-
const composite = transforms.reduce((accum, current) => buildMergedTransform(accum, current), {insert: '', deleteLeft: 0});
218-
return composite.insert;
207+
return this.searchSpace.bestExample.text;
219208
}
220209

221210
/**

web/src/engine/predictive-text/worker-thread/src/main/correction/distance-modeler.ts

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import { LexicalModelTypes } from '@keymanapp/common-types';
66
import { ClassicalDistanceCalculation } from './classical-calculation.js';
77
import { ExecutionTimer, STANDARD_TIME_BETWEEN_DEFERS } from './execution-timer.js';
88
import { QUEUE_NODE_COMPARATOR, SearchPath } from './search-path.js';
9+
import { PathResult } from './search-space.js';
910
import { subsetByChar, subsetByInterval, mergeSubset, TransformSubset } from '../transform-subsets.js';
1011

1112
import Distribution = LexicalModelTypes.Distribution;
@@ -599,23 +600,6 @@ export class SearchResult {
599600
}
600601
}
601602

602-
type NullPath = {
603-
type: 'none'
604-
}
605-
606-
type IntermediateSearchPath = {
607-
type: 'intermediate',
608-
cost: number
609-
}
610-
611-
type CompleteSearchPath = {
612-
type: 'complete',
613-
cost: number,
614-
finalNode: SearchNode
615-
}
616-
617-
export type PathResult = NullPath | IntermediateSearchPath | CompleteSearchPath;
618-
619603
// Current best guesstimate of how compositor will retrieve ideal corrections.
620604
export async function *getBestMatches(searchSpace: SearchPath, timer: ExecutionTimer): AsyncGenerator<SearchResult> {
621605
let currentReturns: {[resultKey: string]: SearchNode} = {};

web/src/engine/predictive-text/worker-thread/src/main/correction/search-path.ts

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,11 @@
88
* engine.
99
*/
1010

11-
import { QueueComparator as Comparator, PriorityQueue } from '@keymanapp/web-utils';
11+
import { QueueComparator as Comparator, KMWString, PriorityQueue } from '@keymanapp/web-utils';
1212
import { LexicalModelTypes } from '@keymanapp/common-types';
1313

14-
import { EDIT_DISTANCE_COST_SCALE, PathResult, SearchNode, SearchResult } from './distance-modeler.js';
14+
import { EDIT_DISTANCE_COST_SCALE, SearchNode, SearchResult } from './distance-modeler.js';
15+
import { PathResult, SearchSpace } from './search-space.js';
1516

1617
import Distribution = LexicalModelTypes.Distribution;
1718
import LexicalModel = LexicalModelTypes.LexicalModel;
@@ -25,7 +26,7 @@ export const QUEUE_NODE_COMPARATOR: Comparator<SearchNode> = function(arg1, arg2
2526

2627
// The set of search spaces corresponding to the same 'context' for search.
2728
// Whenever a wordbreak boundary is crossed, a new instance should be made.
28-
export class SearchPath {
29+
export class SearchPath implements SearchSpace {
2930
private selectionQueue: PriorityQueue<SearchNode> = new PriorityQueue(QUEUE_NODE_COMPARATOR);
3031
private inputs: Distribution<Transform>;
3132

@@ -110,6 +111,20 @@ export class SearchPath {
110111
}
111112
}
112113

114+
public get inputCount(): number {
115+
return (this.parentPath?.inputCount ?? 0) + (this.inputs ? 1 : 0);
116+
}
117+
118+
public get bestExample(): {text: string, p: number} {
119+
const bestPrefix = this.parentPath?.bestExample ?? { text: '', p: 1 };
120+
const bestLocalInput = this.inputs?.reduce((max, curr) => max.p < curr.p ? curr : max) ?? { sample: { insert: '', deleteLeft: 0 }, p: 1};
121+
122+
return {
123+
text: KMWString.substring(bestPrefix.text, 0, KMWString.length(bestPrefix.text) - bestLocalInput.sample.deleteLeft) + bestLocalInput.sample.insert,
124+
p: bestPrefix.p * bestLocalInput.p
125+
}
126+
}
127+
113128
increaseMaxEditDistance() {
114129
this.parentPath.increaseMaxEditDistance();
115130

@@ -304,7 +319,7 @@ export class SearchPath {
304319
};
305320
}
306321

307-
public previousResults(): SearchResult[] {
322+
public get previousResults(): SearchResult[] {
308323
return Object.values(this.returnedValues).map(v => new SearchResult(v));
309324
}
310325
}
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
/*
2+
* Keyman is copyright (C) SIL Global. MIT License.
3+
*
4+
* Created by jahorton on 2025-10-09
5+
*
6+
* This file the predictive-text engine's SearchSpace class, which is used to
7+
* manage the search-space(s) for text corrections within the engine.
8+
*/
9+
10+
import { SearchNode, SearchResult } from "./distance-modeler.js";
11+
12+
export let SPACE_ID_SEED = 0;
13+
14+
export function generateSpaceSeed(): number {
15+
return SPACE_ID_SEED++;
16+
}
17+
18+
type NullPath = {
19+
type: 'none'
20+
}
21+
22+
type IntermediateSearchPath = {
23+
type: 'intermediate',
24+
cost: number
25+
}
26+
27+
type CompleteSearchPath = {
28+
type: 'complete',
29+
cost: number,
30+
finalNode: SearchNode
31+
}
32+
33+
export type PathResult = NullPath | IntermediateSearchPath | CompleteSearchPath;
34+
35+
/**
36+
* Represents all or a portion of the dynamically-generated graph used to search
37+
* for predictive-text corrections.
38+
*/
39+
export interface SearchSpace {
40+
/**
41+
* Retrieves the lowest-cost / lowest-distance edge from the batcher's search
42+
* area, checks its validity as a correction to the input text, and reports on
43+
* what sort of result the edge's destination node represents.
44+
* @returns
45+
*/
46+
handleNextNode(): PathResult;
47+
48+
/**
49+
* Reports the cost of the lowest-cost / lowest-distance edge held within the
50+
* batcher's search area.
51+
* @returns
52+
*/
53+
readonly currentCost: number;
54+
55+
/**
56+
* Returns the set of previously-processed results under this batcher's domain.
57+
*/
58+
readonly previousResults: SearchResult[];
59+
60+
/**
61+
* When true, this indicates that the currently-represented portion of context
62+
* has fat-finger data available, which itself indicates that the user has
63+
* corrections enabled.
64+
*/
65+
readonly correctionsEnabled: boolean;
66+
67+
/**
68+
* Reports the total number of input keystrokes represented by this
69+
* graph/subgraph.
70+
*
71+
* (Their fat-finger alternates, when provided, do not influence this count -
72+
* they're associated with the original keystroke that affected the context.)
73+
*/
74+
readonly inputCount: number;
75+
76+
/**
77+
* Determines the best example text representable by this batcher's portion of
78+
* the correction-search graph and its paths.
79+
*/
80+
readonly bestExample: { text: string, p: number };
81+
}

web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -532,7 +532,7 @@ export async function correctAndEnumerate(
532532
* Worst-case, it's possible to temporarily add normalization if a code deep-dive
533533
* is needed in the future.
534534
*/
535-
if(searchSpace.inputSequence.length <= 1) {
535+
if(searchSpace.inputCount <= 1) {
536536
/* Suppose a key distribution: most likely with p=0.5, second-most with 0.4 - a pretty
537537
* ambiguous case that would only arise very near the center of the boundary between two keys.
538538
* Raising (0.5/0.4)^16 ~= 35.53. (At time of writing, SINGLE_CHAR_KEY_PROB_EXPONENT = 16.)

0 commit comments

Comments
 (0)