Skip to content

Commit 86fc4ef

Browse files
committed
change(web): remove duplicate correction-search path filtering
Upon investigation into the code being removed, this mostly triggered whenever an empty transform appeared in the input - during a context reset, at initialization, or when a deadkey is typed. (Deadkeys aren't sent to the pred-text engine.) This is generally not a common case within the engine, and there exists other filtering that helps prevent duplicating search results. Suppose a token that begins tagged with an empty transform. When the search performs an 'insertion' to better lookup words down a lexical path, the 'insertion' is identical whether before or after an empty transform. Similarly, insertions after a deletion appear no different than substitutions (or the same insertion before the deletion, then the deletion itself). The code likely didn't gain us much, as it likely carried some performance overhead - it built a large object that required memory and lookup time that involved constructing and processing strings for hashing. Build-bot: skip build:web Test-bot: skip
1 parent 43f5498 commit 86fc4ef

File tree

4 files changed

+158
-73
lines changed

4 files changed

+158
-73
lines changed

web/src/engine/predictive-text/worker-thread/src/main/correction/distance-modeler.ts

Lines changed: 33 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import Distribution = LexicalModelTypes.Distribution;
1313
import LexiconTraversal = LexicalModelTypes.LexiconTraversal;
1414
import ProbabilityMass = LexicalModelTypes.ProbabilityMass;
1515
import Transform = LexicalModelTypes.Transform;
16+
import TransformUtils from '#./transformUtils.js';
1617

1718
type RealizedInput = ProbabilityMass<Transform>[]; // NOT Distribution - they're masses from separate distributions.
1819

@@ -136,9 +137,15 @@ export class SearchNode {
136137
*/
137138
readonly spaceId: number;
138139

140+
/**
141+
* Notes if the most recent edit processed in the node's represented search path
142+
* was not a deletion. Root nodes are considered to meet this condition.
143+
*/
144+
readonly nonDeletion: boolean;
145+
139146
constructor(rootTraversal: LexiconTraversal, spaceId: number, toKey?: (arg0: string) => string);
140-
constructor(node: SearchNode, spaceId?: number);
141-
constructor(param1: LexiconTraversal | SearchNode, spaceId?: number, toKey?: ((arg0: string) => string)) {
147+
constructor(node: SearchNode, spaceId?: number, isSubstitution?: boolean);
148+
constructor(param1: LexiconTraversal | SearchNode, spaceId?: number, param2?: boolean | ((arg0: string) => string)) {
142149
if(param1 instanceof SearchNode) {
143150
const priorNode = param1;
144151

@@ -149,6 +156,8 @@ export class SearchNode {
149156
this.priorInput = priorNode.priorInput.slice(0);
150157
this.matchedTraversals = priorNode.matchedTraversals.slice();
151158

159+
this.nonDeletion = param2 as boolean;
160+
152161
// Do NOT copy over _inputCost; this is a helper-constructor for methods
153162
// building new nodes... which will have a different cost.
154163
delete this._inputCost;
@@ -160,8 +169,10 @@ export class SearchNode {
160169
this.calculation = new ClassicalDistanceCalculation();
161170
this.matchedTraversals = [param1];
162171
this.priorInput = [];
172+
const toKey = param2 as ((arg0: string) => string);
163173
this.toKey = toKey || (x => x);
164174
this.spaceId = spaceId;
175+
this.nonDeletion = true;
165176
}
166177
}
167178

@@ -250,12 +261,28 @@ export class SearchNode {
250261
throw new Error("Invalid state: will not take new input while still processing Transform subset");
251262
}
252263

264+
// Do not create insertion nodes after an empty transform; only before.
265+
// "Before" and "after" are identical for empty transforms - why duplicate?
266+
//
267+
// Also, do not create insertion nodes directly after deletions; that's
268+
// essentially a substitution. We'll have an equivalent edge already built
269+
// with higher-accuracy modeling.
270+
//
271+
// Following previous insertions is fine, as is following a proper
272+
// match/substitution.
273+
if(this.priorInput.length > 0) {
274+
const priorInput = this.priorInput[this.priorInput.length - 1].sample;
275+
if(TransformUtils.isEmpty(priorInput) || !this.nonDeletion) {
276+
return [];
277+
}
278+
}
279+
253280
let edges: SearchNode[] = [];
254281

255282
for(let lexicalChild of this.currentTraversal.children()) {
256283
let childCalc = this.calculation.addMatchChar(lexicalChild.char);
257284

258-
let searchChild = new SearchNode(this);
285+
let searchChild = new SearchNode(this, this.spaceId, true);
259286
searchChild.calculation = childCalc;
260287
searchChild.priorInput = this.priorInput;
261288
searchChild.matchedTraversals.push(lexicalChild.traversal());
@@ -367,7 +394,7 @@ export class SearchNode {
367394

368395
keySet.add(char);
369396

370-
const node = new SearchNode(this);
397+
const node = new SearchNode(this, this.spaceId, this.nonDeletion);
371398
node.calculation = calculation;
372399
node.partialEdge.subsetSubindex++;
373400
// Append the newly-processed char to the subset's `insert` string.
@@ -415,7 +442,7 @@ export class SearchNode {
415442
// `insert` string.
416443
childSubset.insert += SENTINEL_CODE_UNIT;
417444

418-
const node = new SearchNode(this);
445+
const node = new SearchNode(this, this.spaceId, this.nonDeletion);
419446
node.calculation = childCalc;
420447
node.matchedTraversals.push(childTraversal);
421448
node.partialEdge.subsetSubindex++;
@@ -463,7 +490,7 @@ export class SearchNode {
463490
continue;
464491
}
465492

466-
const node = new SearchNode(this, edgeId);
493+
const node = new SearchNode(this, edgeId, isSubstitution);
467494
node.calculation = edgeCalc;
468495
node.partialEdge = {
469496
doSubsetMatching: isSubstitution,
@@ -513,31 +540,6 @@ export class SearchNode {
513540
return this.setupSubsetProcessing(dist, true, edgeId);
514541
}
515542

516-
/**
517-
* A key uniquely identifying identical search path nodes. Replacement of a keystroke's
518-
* text in a manner that results in identical path to a different keystroke should result
519-
* in both path nodes sharing the same pathKey value.
520-
*
521-
* This mostly matters after re-use of a SearchSpace when a token is extended; children of
522-
* completed paths are possible, and so children can be re-built in such a case.
523-
*/
524-
get pathKey(): string {
525-
// Note: deleteLefts apply before inserts, so order them that way.
526-
let inputString = this.priorInput.map((value) => '-' + value.sample.deleteLeft + '+' + value.sample.insert).join('');
527-
const partialEdge = this.partialEdge;
528-
// Make sure the subset progress contributes to the key!
529-
if(partialEdge) {
530-
const subset = partialEdge.transformSubset;
531-
// We need a copy of at least one insert string in the subset here. Without that, all subsets
532-
// at the same level of the search, against the same match, look identical - not cool!
533-
inputString += `-${subset.entries[0].sample.deleteLeft}+<${subset.key},${partialEdge.subsetSubindex},${subset.entries[0].sample.insert}>`;
534-
}
535-
let matchString = this.calculation.matchSequence.join('');
536-
537-
// TODO: might should also track diagonalWidth.
538-
return inputString + ' @@ ' + matchString;
539-
}
540-
541543
/**
542544
* A key uniquely identifying identical match sequences. Reaching the same net result
543545
* by different paths should result in identical `resultKey` values.

web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -52,16 +52,6 @@ export class SearchQuotientSpur implements SearchQuotientNode {
5252
*/
5353
public returnedValues?: {[resultKey: string]: SearchNode} = {}; // TODO: make it private again!
5454

55-
/**
56-
* Acts as a Map that prevents duplicating a correction-search path if reached
57-
* more than once.
58-
*/
59-
protected get processedEdgeSet(): {[pathKey: string]: boolean} {
60-
return this._processedEdgeSet;
61-
}
62-
63-
private _processedEdgeSet?: {[pathKey: string]: boolean} = {};
64-
6555
/**
6656
* Provides a heuristic for the base cost at each depth if the best
6757
* individual input were taken at that level.
@@ -245,7 +235,7 @@ export class SearchQuotientSpur implements SearchQuotientNode {
245235

246236
const result = this.parentPath.handleNextNode();
247237

248-
if(result.type == 'complete' && !this.processedEdgeSet[result.finalNode.pathKey]) {
238+
if(result.type == 'complete') {
249239
this.addEdgesForNodes(result.finalNode);
250240
}
251241

@@ -262,14 +252,6 @@ export class SearchQuotientSpur implements SearchQuotientNode {
262252
cost: currentNode.currentCost
263253
}
264254

265-
// Have we already processed a matching edge? If so, skip it.
266-
// We already know the previous edge is of lower cost.
267-
if(this.processedEdgeSet[currentNode.pathKey]) {
268-
return unmatchedResult;
269-
} else {
270-
this.processedEdgeSet[currentNode.pathKey] = true;
271-
}
272-
273255
// Stage 1: filter out nodes/edges we want to prune
274256

275257
// Forbid a raw edit-distance of greater than 2.

web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,13 @@ import { defaultWordbreaker, WordBreakProperty } from '@keymanapp/models-wordbre
55

66
import TransformUtils from './transformUtils.js';
77
import { determineModelTokenizer, determineModelWordbreaker, determinePunctuationFromModel } from './model-helpers.js';
8-
import { ContextTracker } from './correction/context-tracker.js';
98
import { ContextState, determineContextSlideTransform } from './correction/context-state.js';
9+
import { ContextTracker } from './correction/context-tracker.js';
10+
import { ContextTransition } from './correction/context-transition.js';
1011
import { ExecutionTimer } from './correction/execution-timer.js';
1112
import ModelCompositor from './model-compositor.js';
13+
import { getBestMatches } from './correction/distance-modeler.js';
14+
import { SearchQuotientSpur } from './correction/search-quotient-spur.js';
1215

1316
const searchForProperty = defaultWordbreaker.searchForProperty;
1417

@@ -23,7 +26,6 @@ import Reversion = LexicalModelTypes.Reversion;
2326
import Suggestion = LexicalModelTypes.Suggestion;
2427
import SuggestionTag = LexicalModelTypes.SuggestionTag;
2528
import Transform = LexicalModelTypes.Transform;
26-
import { ContextTransition, getBestMatches, SearchPath } from './test-index.js';
2729

2830
/*
2931
* The functions in this file exist to provide unit-testable stateless components for the
@@ -494,7 +496,7 @@ export async function correctAndEnumerate(
494496
let rawPredictions: CorrectionPredictionTuple[] = [];
495497
let bestCorrectionCost: number;
496498
const correctionPredictionMap: Record<string, Distribution<Suggestion>> = {};
497-
for await(const match of getBestMatches(searchSpaces[0] as SearchPath, timer)) {
499+
for await(const match of getBestMatches(searchSpaces[0] as SearchQuotientSpur, timer)) {
498500
// Corrections obtained: now to predict from them!
499501
const correction = match.matchString;
500502
const searchSpace = searchSpaces.find(s => s.spaceId == match.spaceId);

web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/distance-modeler.tests.ts

Lines changed: 119 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -337,31 +337,130 @@ describe('Correction Distance Modeler', () => {
337337
});
338338
});
339339

340-
// Consider adding more, deeper?
341-
it('builds insertion edges based on lexicon, from root', () => {
342-
const rootTraversal = testModel.traverseFromRoot();
343-
assert.isNotEmpty(rootTraversal);
340+
describe('buildInsertionEdges()', () => {
341+
it('builds insertion edges from (empty) root', () => {
342+
const rootTraversal = testModel.traverseFromRoot();
343+
assert.isNotEmpty(rootTraversal);
344344

345-
const rootSeed = SEARCH_EDGE_SEED++;
346-
const rootNode = new correction.SearchNode(rootTraversal, rootSeed);
347-
assert.equal(rootNode.calculation.getHeuristicFinalCost(), 0);
345+
const rootSeed = SEARCH_EDGE_SEED++;
346+
const rootNode = new correction.SearchNode(rootTraversal, rootSeed);
347+
assert.equal(rootNode.calculation.getHeuristicFinalCost(), 0);
348348

349-
const edges = rootNode.buildInsertionEdges();
350-
assert.isAbove(edges.length, 0);
349+
const edges = rootNode.buildInsertionEdges();
350+
assert.isAbove(edges.length, 0);
351351

352-
let expectedChildCount = 0;
353-
for(const child of rootTraversal.children()) {
354-
expectedChildCount++;
352+
let expectedChildCount = 0;
353+
for(const child of rootTraversal.children()) {
354+
expectedChildCount++;
355355

356-
const childEdge = edges.filter(value => value.calculation.lastMatchEntry == child.char)[0];
357-
assert.isOk(childEdge);
358-
assert.isEmpty(childEdge.priorInput);
359-
assert.isEmpty(childEdge.calculation.inputSequence);
360-
assert.isAbove(childEdge.currentCost, 0);
361-
assert.equal(childEdge.spaceId, rootSeed);
362-
}
356+
const childEdge = edges.filter(value => value.calculation.lastMatchEntry == child.char)[0];
357+
assert.isOk(childEdge);
358+
assert.isEmpty(childEdge.priorInput);
359+
assert.isEmpty(childEdge.calculation.inputSequence);
360+
assert.isAbove(childEdge.currentCost, 0);
361+
assert.equal(childEdge.spaceId, rootSeed);
362+
}
363363

364-
assert.equal(edges.length, expectedChildCount);
364+
assert.equal(edges.length, expectedChildCount);
365+
});
366+
367+
it('builds insertion edges directly after prior insertion', () => {
368+
const rootTraversal = testModel.traverseFromRoot();
369+
assert.isNotEmpty(rootTraversal);
370+
371+
const rootSeed = SEARCH_EDGE_SEED++;
372+
const rootNode = new correction.SearchNode(rootTraversal, rootSeed);
373+
assert.equal(rootNode.calculation.getHeuristicFinalCost(), 0);
374+
375+
const edges = rootNode.buildInsertionEdges();
376+
assert.isAbove(edges.length, 0);
377+
378+
const firstChild = edges[0];
379+
const childEdges = firstChild.buildInsertionEdges();
380+
381+
let expectedChildCount = 0;
382+
for(const child of firstChild.currentTraversal.children()) {
383+
expectedChildCount++;
384+
385+
const childEdge = edges.filter(value => value.calculation.lastMatchEntry == child.char)[0];
386+
assert.isOk(childEdge);
387+
assert.isEmpty(childEdge.priorInput);
388+
assert.isEmpty(childEdge.calculation.inputSequence);
389+
assert.isAbove(childEdge.currentCost, 0);
390+
assert.equal(childEdge.spaceId, rootSeed);
391+
}
392+
393+
assert.equal(childEdges.length, expectedChildCount);
394+
});
395+
396+
it('does not build insertion edges after a deletion', () => {
397+
const rootTraversal = testModel.traverseFromRoot();
398+
assert.isNotEmpty(rootTraversal);
399+
400+
const rootSeed = SEARCH_EDGE_SEED++;
401+
const rootNode = new correction.SearchNode(rootTraversal, rootSeed);
402+
assert.equal(rootNode.calculation.getHeuristicFinalCost(), 0);
403+
404+
const edges = rootNode.buildDeletionEdges([{
405+
sample: { insert: 'd', deleteLeft: 0 },
406+
p: 1
407+
}], SEARCH_EDGE_SEED++).flatMap(n => n.processSubsetEdge());
408+
assert.isAbove(edges.length, 0);
409+
const firstChild = edges[0];
410+
411+
const insertEdgesAfterDelete = firstChild.buildInsertionEdges();
412+
assert.equal(insertEdgesAfterDelete.length, 0);
413+
});
414+
415+
it('does not build insertion edges after receiving an empty transform as input', () => {
416+
const rootTraversal = testModel.traverseFromRoot();
417+
assert.isNotEmpty(rootTraversal);
418+
419+
const rootSeed = SEARCH_EDGE_SEED++;
420+
const rootNode = new correction.SearchNode(rootTraversal, rootSeed);
421+
assert.equal(rootNode.calculation.getHeuristicFinalCost(), 0);
422+
423+
const edges = rootNode.buildSubstitutionEdges([{
424+
// intentionally empty!
425+
sample: { insert: '', deleteLeft: 0 },
426+
p: 1
427+
}], SEARCH_EDGE_SEED++).flatMap(n => n.processSubsetEdge());
428+
assert.isAbove(edges.length, 0);
429+
const firstChild = edges[0];
430+
const insertsAfterEmpty = firstChild.buildInsertionEdges();
431+
432+
assert.equal(insertsAfterEmpty.length, 0);
433+
});
434+
435+
it('builds insertion edges after a substitution', () => {
436+
const rootTraversal = testModel.traverseFromRoot();
437+
assert.isNotEmpty(rootTraversal);
438+
439+
const rootSeed = SEARCH_EDGE_SEED++;
440+
const rootNode = new correction.SearchNode(rootTraversal, rootSeed);
441+
assert.equal(rootNode.calculation.getHeuristicFinalCost(), 0);
442+
443+
const subSeed = SEARCH_EDGE_SEED++;
444+
const edges = rootNode.buildSubstitutionEdges([{
445+
sample: { insert: 't', deleteLeft: 0 },
446+
p: 1
447+
}], subSeed).flatMap(n => n.processSubsetEdge());
448+
assert.isAbove(edges.length, 0);
449+
const firstChild = edges[0];
450+
const insertsAfterInput = firstChild.buildInsertionEdges();
451+
452+
let expectedChildCount = 0;
453+
for(const child of firstChild.currentTraversal.children()) {
454+
expectedChildCount++;
455+
456+
const childEdge = edges.filter(value => value.calculation.lastMatchEntry == child.char)[0];
457+
assert.isOk(childEdge);
458+
assert.equal(childEdge.priorInput.length, 1);
459+
assert.equal(childEdge.spaceId, subSeed);
460+
}
461+
462+
assert.equal(insertsAfterInput.length, expectedChildCount);
463+
});
365464
});
366465

367466
describe('buildDeletionEdges @ token root', () => {

0 commit comments

Comments
 (0)