Skip to content

Commit 3a56e23

Browse files
committed
refactor(web): generalize SearchQuotientSpur parent requirements
As an upcoming goal is to introduce a new SearchQuotientNode type that will assist with context-caching across multiple tokenizaitons, it is wise to generalize SearchQuotientSpur and functions utilizing it to accept any SearchQuotientNode-implementing type as its parent. Build-bot: skip build:web Test-bot: skip
1 parent 445cca2 commit 3a56e23

File tree

9 files changed

+525
-172
lines changed

9 files changed

+525
-172
lines changed

web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import { LexicalModelTypes } from '@keymanapp/common-types';
1212
import { deepCopy, KMWString } from "@keymanapp/web-utils";
1313

1414
import { SearchQuotientSpur } from "./search-quotient-spur.js";
15+
import { SearchQuotientNode } from "./search-quotient-node.js";
1516
import { TokenSplitMap } from "./context-tokenization.js";
1617

1718
import Distribution = LexicalModelTypes.Distribution;
@@ -58,10 +59,10 @@ export class ContextToken {
5859
* Contains all relevant correction-search data for use in generating
5960
* corrections for this ContextToken instance.
6061
*/
61-
public get searchModule(): SearchQuotientSpur {
62+
public get searchModule(): SearchQuotientNode {
6263
return this._searchModule;
6364
}
64-
private _searchModule: SearchQuotientSpur;
65+
private _searchModule: SearchQuotientNode;
6566

6667
isPartial: boolean;
6768

web/src/engine/predictive-text/worker-thread/src/main/correction/distance-modeler.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ type RealizedInput = ProbabilityMass<Transform>[]; // NOT Distribution - they'r
2222
export const EDIT_DISTANCE_COST_SCALE = 5;
2323
export const MIN_KEYSTROKE_PROBABILITY = 0.0001;
2424

25+
export const DEFAULT_ALLOTTED_CORRECTION_TIME_INTERVAL = 33; // in milliseconds.
26+
2527
export type TraversableToken<TUnit> = {
2628
key: TUnit,
2729
traversal: LexiconTraversal

web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,12 @@ export interface SearchQuotientNode {
4949
*/
5050
readonly spaceId: number;
5151

52+
/**
53+
* Notes the SearchQuotientNode(s) whose correction-search paths are extended
54+
* by this SearchQuotientNode.
55+
*/
56+
readonly parents: SearchQuotientNode[];
57+
5258
/**
5359
* Retrieves the lowest-cost / lowest-distance edge from the batcher's search
5460
* area, checks its validity as a correction to the input text, and reports on
@@ -57,6 +63,25 @@ export interface SearchQuotientNode {
5763
*/
5864
handleNextNode(): PathResult;
5965

66+
/**
67+
* Denotes whether or not the represented search space includes paths built from
68+
* the specified set of keystroke input distributions. The distribution count
69+
* should match .inputCount - no omissions or extras are permitted.
70+
*
71+
* Designed explicitly for use in unit testing; it's not super-efficient, so
72+
* avoid live use.
73+
*
74+
* @param keystrokeDistributions
75+
* @internal
76+
*/
77+
hasInputs(keystrokeDistributions: Distribution<Transform>[]): boolean;
78+
79+
/**
80+
* Increases the editing range that will be considered for determining
81+
* correction distances.
82+
*/
83+
increaseMaxEditDistance(): void;
84+
6085
/**
6186
* Reports the cost of the lowest-cost / lowest-distance edge held within the
6287
* batcher's search area.

web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts

Lines changed: 95 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@ import Distribution = LexicalModelTypes.Distribution;
1818
import LexicalModel = LexicalModelTypes.LexicalModel;
1919
import Transform = LexicalModelTypes.Transform;
2020

21-
export const DEFAULT_ALLOTTED_CORRECTION_TIME_INTERVAL = 33; // in milliseconds.
22-
2321
export const QUEUE_NODE_COMPARATOR: Comparator<SearchNode> = function(arg1, arg2) {
2422
return arg1.currentCost - arg2.currentCost;
2523
}
@@ -30,9 +28,11 @@ export class SearchQuotientSpur implements SearchQuotientNode {
3028
private selectionQueue: PriorityQueue<SearchNode> = new PriorityQueue(QUEUE_NODE_COMPARATOR);
3129
readonly inputs?: Distribution<Readonly<Transform>>;
3230

33-
private parentPath: SearchQuotientSpur;
31+
private parentNode: SearchQuotientNode;
3432
readonly spaceId: number;
3533

34+
readonly inputCount: number;
35+
3636
/**
3737
* Marks all results that have already been returned from this instance of SearchPath.
3838
* Should be deleted and cleared if any paths consider this one as a parent.
@@ -52,47 +52,96 @@ export class SearchQuotientSpur implements SearchQuotientNode {
5252
* @param model
5353
*/
5454
constructor(model: LexicalModel);
55-
constructor(space: SearchQuotientSpur, inputs: Distribution<Transform>, bestProbFromSet: number);
56-
constructor(arg1: LexicalModel | SearchQuotientSpur, inputs?: Distribution<Transform>, bestProbFromSet?: number) {
55+
constructor(space: SearchQuotientNode, inputs: Distribution<Transform>, bestProbFromSet: number);
56+
constructor(arg1: LexicalModel | SearchQuotientNode, inputs?: Distribution<Transform>, bestProbFromSet?: number) {
5757
this.spaceId = generateSpaceSeed();
5858

5959
if(arg1 instanceof SearchQuotientSpur) {
60-
const parentNode = arg1 as SearchQuotientSpur;
60+
const parentNode = arg1 as SearchQuotientNode;
6161
const logTierCost = -Math.log(bestProbFromSet);
6262

6363
this.inputs = inputs;
64+
this.inputCount = parentNode.inputCount + 1;
6465
this.lowestPossibleSingleCost = parentNode.lowestPossibleSingleCost + logTierCost;
65-
this.parentPath = parentNode;
66+
this.parentNode = parentNode;
6667

67-
this.addEdgesForNodes(parentNode.previousResults.map(v => v.node));
68+
this.addEdgesForNodes(parentNode.previousResults.map(r => r.node));
6869

6970
return;
7071
}
7172

7273
const model = arg1 as LexicalModel;
7374
this.selectionQueue.enqueue(new SearchNode(model.traverseFromRoot(), this.spaceId, t => model.toKey(t)));
7475
this.lowestPossibleSingleCost = 0;
76+
this.inputCount = 0;
7577
}
7678

7779
/**
7880
* Retrieves the sequences of inputs that led to this SearchPath.
7981
*/
8082
public get inputSequence(): Distribution<Transform>[] {
81-
if(this.parentPath) {
82-
return [...this.parentPath.inputSequence, this.inputs];
83+
if(this.parentNode) {
84+
return [...this.parentNode.inputSequence, this.inputs];
8385
} else if(this.inputs) {
8486
return [this.inputs];
8587
} else {
8688
return [];
8789
}
8890
}
8991

90-
public get inputCount(): number {
91-
return (this.parentPath?.inputCount ?? 0) + (this.inputs ? 1 : 0);
92+
public hasInputs(keystrokeDistributions: Distribution<Transform>[]): boolean {
93+
if(this.inputCount == 0) {
94+
return keystrokeDistributions.length == 0;
95+
} else if(keystrokeDistributions.length != this.inputCount) {
96+
return false;
97+
}
98+
99+
const tailInput = [...keystrokeDistributions[keystrokeDistributions.length - 1]];
100+
keystrokeDistributions = keystrokeDistributions.slice(0, keystrokeDistributions.length - 1);
101+
const localInput = this.lastInput;
102+
103+
const parentHasInput = () => !!this.parents.find(p => p.hasInputs(keystrokeDistributions));
104+
105+
// Actual reference match? Easy mode.
106+
if(localInput == tailInput) {
107+
return parentHasInput();
108+
} else if(localInput.length != tailInput.length) {
109+
return false;
110+
} else {
111+
for(let entry of tailInput) {
112+
const matchIndex = localInput.findIndex((x) => {
113+
const s1 = x.sample;
114+
const s2 = entry.sample;
115+
// Check for equal reference first before the other checks; it makes a nice shortcut.
116+
if(x == entry) {
117+
return true;
118+
} if(x.p == entry.p && s1.deleteLeft == s2.deleteLeft
119+
&& s1.id == s2.id && ((s1.deleteRight ?? 0) == (s2.deleteRight ?? 0)) && s1.insert == s2.insert
120+
) {
121+
return true;
122+
}
123+
return false;
124+
});
125+
126+
if(matchIndex == -1) {
127+
return false;
128+
} else {
129+
tailInput.splice(matchIndex, 1);
130+
}
131+
}
132+
133+
return parentHasInput();
134+
}
135+
}
136+
137+
public get lastInput(): Distribution<Readonly<Transform>> {
138+
// Shallow-copies the array to prevent external modification; the Transforms
139+
// are marked Readonly to prevent their modification as well.
140+
return [...this.inputs];
92141
}
93142

94143
public get bestExample(): {text: string, p: number} {
95-
const bestPrefix = this.parentPath?.bestExample ?? { text: '', p: 1 };
144+
const bestPrefix = this.parentNode?.bestExample ?? { text: '', p: 1 };
96145
const bestLocalInput = this.inputs?.reduce((max, curr) => max.p < curr.p ? curr : max) ?? { sample: { insert: '', deleteLeft: 0 }, p: 1};
97146

98147
return {
@@ -101,8 +150,13 @@ export class SearchQuotientSpur implements SearchQuotientNode {
101150
}
102151
}
103152

153+
get parents() {
154+
// The SearchPath class may only have a single parent.
155+
return this.parentNode ? [this.parentNode] : [];
156+
}
157+
104158
increaseMaxEditDistance() {
105-
this.parentPath.increaseMaxEditDistance();
159+
this.parentNode.increaseMaxEditDistance();
106160

107161
// By extracting the entries from the priority queue and increasing distance outside of it as a batch job,
108162
// we get an O(N) implementation, rather than the O(N log N) that would result from maintaining the original queue.
@@ -117,11 +171,11 @@ export class SearchQuotientSpur implements SearchQuotientNode {
117171
get correctionsEnabled(): boolean {
118172
// When corrections are disabled, the Web engine will only provide individual Transforms
119173
// for an input, not a distribution. No distributions means we shouldn't do corrections.
120-
return this.parentPath?.correctionsEnabled || this.inputs?.length > 1;
174+
return this.parentNode?.correctionsEnabled || this.inputs?.length > 1;
121175
}
122176

123177
public get currentCost(): number {
124-
const parentCost = this.parentPath?.currentCost ?? Number.POSITIVE_INFINITY;
178+
const parentCost = this.parentNode?.currentCost ?? Number.POSITIVE_INFINITY;
125179
const localCost = this.selectionQueue.peek()?.currentCost ?? Number.POSITIVE_INFINITY;
126180

127181
return Math.min(localCost, parentCost);
@@ -156,7 +210,7 @@ export class SearchQuotientSpur implements SearchQuotientNode {
156210
* @returns
157211
*/
158212
public handleNextNode(): PathResult {
159-
const parentCost = this.parentPath?.currentCost ?? Number.POSITIVE_INFINITY;
213+
const parentCost = this.parentNode?.currentCost ?? Number.POSITIVE_INFINITY;
160214
const localCost = this.selectionQueue.peek()?.currentCost ?? Number.POSITIVE_INFINITY;
161215

162216
if(parentCost <= localCost) {
@@ -166,7 +220,7 @@ export class SearchQuotientSpur implements SearchQuotientNode {
166220
};
167221
}
168222

169-
const result = this.parentPath.handleNextNode();
223+
const result = this.parentNode.handleNextNode();
170224

171225
if(result.type == 'complete') {
172226
this.addEdgesForNodes([result.finalNode]);
@@ -178,9 +232,10 @@ export class SearchQuotientSpur implements SearchQuotientNode {
178232
} as PathResult
179233
}
180234

235+
// will have equal .spaceId.
181236
let currentNode = this.selectionQueue.dequeue();
182237

183-
let unmatchedResult: PathResult = {
238+
let unmatchedResult = {
184239
type: 'intermediate',
185240
cost: currentNode.currentCost
186241
}
@@ -191,7 +246,7 @@ export class SearchQuotientSpur implements SearchQuotientNode {
191246
// Note: .knownCost is not scaled, while its contribution to .currentCost _is_ scaled.
192247
let substitutionsOnly = false;
193248
if(currentNode.editCount > 2) {
194-
return unmatchedResult;
249+
return unmatchedResult as PathResult;
195250
} else if(currentNode.editCount == 2) {
196251
substitutionsOnly = true;
197252
}
@@ -200,18 +255,16 @@ export class SearchQuotientSpur implements SearchQuotientNode {
200255
// Allows a little 'wiggle room' + 2 "hard" edits.
201256
// Can be important if needed characters don't actually exist on the keyboard
202257
// ... or even just not the then-current layer of the keyboard.
203-
//
204-
// TODO: still consider the lowest-cost individual edges for THIS specific criterion.
205258
if(currentNode.currentCost > this.lowestPossibleSingleCost + 2.5 * EDIT_DISTANCE_COST_SCALE) {
206-
return unmatchedResult;
259+
return unmatchedResult as PathResult;
207260
}
208261

209262
// Stage 2: process subset further OR build remaining edges
210263

211264
if(currentNode.hasPartialInput) {
212265
// Re-use the current queue; the number of total inputs considered still holds.
213266
this.selectionQueue.enqueueAll(currentNode.processSubsetEdge());
214-
return unmatchedResult;
267+
return unmatchedResult as PathResult;
215268
}
216269

217270
// OK, we fully crossed a graph edge and have landed on a transition point;
@@ -223,19 +276,26 @@ export class SearchQuotientSpur implements SearchQuotientNode {
223276
this.selectionQueue.enqueueAll(insertionEdges);
224277
}
225278

226-
if((this.returnedValues[currentNode.resultKey]?.currentCost ?? Number.POSITIVE_INFINITY) > currentNode.currentCost) {
227-
this.returnedValues[currentNode.resultKey] = currentNode;
228-
} else {
229-
// Not a better cost, so reject it and move on to the next potential result.
230-
return this.handleNextNode();
279+
if(currentNode.spaceId == this.spaceId) {
280+
if(this.returnedValues) {
281+
if((this.returnedValues[currentNode.resultKey]?.currentCost ?? Number.POSITIVE_INFINITY) > currentNode.currentCost) {
282+
this.returnedValues[currentNode.resultKey] = currentNode;
283+
} else {
284+
// Not a better cost, so reject it and move on to the next potential result.
285+
return this.handleNextNode();
286+
}
287+
}
288+
289+
return {
290+
type: 'complete',
291+
cost: currentNode.currentCost,
292+
finalNode: currentNode,
293+
spaceId: this.spaceId
294+
};
231295
}
232296

233-
return {
234-
type: 'complete',
235-
cost: currentNode.currentCost,
236-
finalNode: currentNode,
237-
spaceId: this.spaceId
238-
};
297+
// If we've somehow fully exhausted all search options, indicate that none remain.
298+
return unmatchedResult as PathResult;
239299
}
240300

241301
public get previousResults(): SearchResult[] {

web/src/engine/predictive-text/worker-thread/src/main/model-compositor.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import { applySuggestionCasing, correctAndEnumerate, dedupeSuggestions, finalize
77
import { detectCurrentCasing, determineModelTokenizer, determineModelWordbreaker, determinePunctuationFromModel } from './model-helpers.js';
88

99
import { ContextTracker } from './correction/context-tracker.js';
10-
import { DEFAULT_ALLOTTED_CORRECTION_TIME_INTERVAL } from './correction/search-quotient-spur.js';
10+
import { DEFAULT_ALLOTTED_CORRECTION_TIME_INTERVAL } from './correction/distance-modeler.js';
1111

1212
import CasingForm = LexicalModelTypes.CasingForm;
1313
import Configuration = LexicalModelTypes.Configuration;

0 commit comments

Comments
 (0)