Skip to content

Commit 3bfe361

Browse files
committed
refactor(web): remove lower-level SearchSpaceTier + associated heuristic
This change simplifies the mechanics of each SearchSpace by dropping the complexity of managing an intermediate tier. This does get rid of a mild boost that assistss the correction-search process in prioritizing nearly-completed correction paths, but we should rework that heuristic anyway - especially in light of upcoming changes and potential complications once we start supporting correction from alternate tokenizations. Build-bot: skip build:web Test-bot: skip
1 parent cf5b2d9 commit 3bfe361

File tree

2 files changed

+40
-145
lines changed

2 files changed

+40
-145
lines changed

web/src/engine/predictive-text/worker-thread/src/main/correction/distance-modeler.ts

Lines changed: 40 additions & 143 deletions
Original file line numberDiff line numberDiff line change
@@ -544,44 +544,6 @@ export class SearchNode {
544544
}
545545
}
546546

547-
/**
548-
* Categorizes Nodes by how many input Transforms (edges) deep they are within the search tree.
549-
*/
550-
class SearchSpaceTier {
551-
correctionQueue: PriorityQueue<SearchNode>;
552-
processed: SearchNode[] = [];
553-
554-
/**
555-
* Indicates the depth searched, in terms of number of inputs, by this tier of the search space.
556-
*/
557-
index: number;
558-
559-
constructor(instance: SearchSpaceTier);
560-
constructor(index: number, initialEdges?: SearchNode[]);
561-
constructor(arg1: number | SearchSpaceTier, initialEdges?: SearchNode[]) {
562-
if(typeof arg1 == 'number') {
563-
this.index = arg1;
564-
this.correctionQueue = new PriorityQueue<SearchNode>(QUEUE_NODE_COMPARATOR, initialEdges);
565-
return;
566-
} else {
567-
this.index = arg1.index;
568-
this.processed = [].concat(arg1.processed);
569-
this.correctionQueue = new PriorityQueue(arg1.correctionQueue);
570-
}
571-
}
572-
573-
increaseMaxEditDistance() {
574-
// By extracting the entries from the priority queue and increasing distance outside of it as a batch job,
575-
// we get an O(N) implementation, rather than the O(N log N) that would result from maintaining the original queue.
576-
let entries = this.correctionQueue.toArray();
577-
578-
entries.forEach(function(edge) { edge.calculation = edge.calculation.increaseMaxDistance(); });
579-
580-
// Since we just modified the stored instances, and the costs may have shifted, we need to re-heapify.
581-
this.correctionQueue = new PriorityQueue<SearchNode>(QUEUE_NODE_COMPARATOR, entries);
582-
}
583-
}
584-
585547
export class SearchResult {
586548
private resultNode: SearchNode;
587549

@@ -655,17 +617,14 @@ type PathResult = NullPath | IntermediateSearchPath | CompleteSearchPath;
655617
// The set of search spaces corresponding to the same 'context' for search.
656618
// Whenever a wordbreak boundary is crossed, a new instance should be made.
657619
export class SearchSpace {
658-
private QUEUE_SPACE_COMPARATOR: Comparator<SearchSpaceTier>;
659-
660620
// p = 1 / (e^4) = 0.01831563888. This still exceeds many neighboring keys!
661621
// p = 1 / (e^5) = 0.00673794699. Strikes a good balance.
662622
// Should easily give priority to neighboring keys before edit-distance kicks in (when keys are a bit ambiguous)
663623
static readonly EDIT_DISTANCE_COST_SCALE = 5;
664624
static readonly MIN_KEYSTROKE_PROBABILITY = 0.0001;
665625
static readonly DEFAULT_ALLOTTED_CORRECTION_TIME_INTERVAL = 33; // in milliseconds.
666626

667-
private tierOrdering: SearchSpaceTier[] = [];
668-
private selectionQueue: PriorityQueue<SearchSpaceTier>;
627+
private selectionQueue: PriorityQueue<SearchNode>;
669628
private _inputSequence: Distribution<Transform>[] = [];
670629
private minInputCost: number[] = [];
671630
private rootNode: SearchNode;
@@ -705,10 +664,6 @@ export class SearchSpace {
705664
*/
706665
constructor(model: LexicalModel);
707666
constructor(arg1: SearchSpace|LexicalModel) {
708-
// Constructs the priority-queue comparator-closure needed for determining which
709-
// tier should be searched next.
710-
this.buildQueueSpaceComparator();
711-
712667
if(arg1 instanceof SearchSpace) {
713668
this._inputSequence = [].concat(arg1._inputSequence);
714669
this.minInputCost = [].concat(arg1.minInputCost);
@@ -718,8 +673,8 @@ export class SearchSpace {
718673
this.returnedValues = {...arg1.returnedValues};
719674
this.processedEdgeSet = {...arg1.processedEdgeSet};
720675

721-
this.tierOrdering = arg1.tierOrdering.map((tier) => new SearchSpaceTier(tier));
722-
this.selectionQueue = new PriorityQueue(this.QUEUE_SPACE_COMPARATOR, this.tierOrdering);
676+
this.selectionQueue = new PriorityQueue(QUEUE_NODE_COMPARATOR);
677+
this.selectionQueue.enqueueAll([...arg1.selectionQueue.toArray()]);
723678
return;
724679
}
725680

@@ -730,68 +685,11 @@ export class SearchSpace {
730685
throw new Error("The provided model does not implement the `traverseFromRoot` function, which is needed to support robust correction searching.");
731686
}
732687

733-
this.selectionQueue = new PriorityQueue<SearchSpaceTier>(this.QUEUE_SPACE_COMPARATOR);
688+
this.selectionQueue = new PriorityQueue<SearchNode>(QUEUE_NODE_COMPARATOR);
734689
this.rootNode = new SearchNode(model.traverseFromRoot(), model.toKey ? model.toKey.bind(model) : null);
690+
this.selectionQueue.enqueue(this.rootNode);
735691

736-
this.completedPaths = [this.rootNode];
737-
738-
// Adds a base level queue to handle initial insertions.
739-
// Start with _just_ the root node. Necessary for proper empty-token, empty-input handling!
740-
let baseTier = new SearchSpaceTier(0, [this.rootNode]);
741-
this.tierOrdering.push(baseTier);
742-
this.selectionQueue.enqueue(baseTier);
743-
}
744-
745-
private buildQueueSpaceComparator() {
746-
let searchSpace = this;
747-
748-
this.QUEUE_SPACE_COMPARATOR = function(space1, space2) {
749-
let node1 = space1.correctionQueue.peek();
750-
let node2 = space2.correctionQueue.peek();
751-
752-
let index1 = space1.index;
753-
let index2 = space2.index;
754-
755-
let sign = 1;
756-
757-
if(index2 < index1) {
758-
let temp = index2;
759-
index2 = index1;
760-
index1 = temp;
761-
762-
sign = -1;
763-
}
764-
765-
// Boost the cost of the lower tier by the minimum cost possible for the
766-
// missing inputs between them. In essence, compare the nodes as if the
767-
// lower tier had the most likely input appended for each such input
768-
// missing at the lower tier.
769-
//
770-
// A 100% admissible heuristic to favor a deeper search assuming no
771-
// deleteLefts follow as later inputs. The added cost is guaranteed if
772-
// the path is traversed further - even with subset use. A subset that
773-
// doesn't match the char instantly carries higher cost than this due to
774-
// the edit distance, even if the bin has max probability.
775-
//
776-
// Remember, tier index i's last used input was from input index i-1. As a
777-
// result, i is the first needed input index, with index2 - 1 the last
778-
// entry needed to match them.
779-
let tierMinCost: number = 0;
780-
for(let i=index1; i < index2; i++) {
781-
tierMinCost = tierMinCost + searchSpace.minInputCost[i];
782-
}
783-
784-
// Guards, just in case one of the search spaces ever has an empty node.
785-
if(node1 && node2) {
786-
// If node1 is lower-tier, node1 is the one in need of boosted cost.
787-
// `sign` flips it when node2 is lower tier.
788-
return node1.currentCost - node2.currentCost + sign * tierMinCost;
789-
} else if(node2) {
790-
return 1;
791-
} else {
792-
return -1;
793-
}
794-
}
692+
this.completedPaths = [];
795693
}
796694

797695
/**
@@ -802,7 +700,14 @@ export class SearchSpace {
802700
}
803701

804702
increaseMaxEditDistance() {
805-
this.tierOrdering.forEach(function(tier) { tier.increaseMaxEditDistance() });
703+
// By extracting the entries from the priority queue and increasing distance outside of it as a batch job,
704+
// we get an O(N) implementation, rather than the O(N log N) that would result from maintaining the original queue.
705+
const entries = this.selectionQueue.toArray();
706+
707+
entries.forEach(function(edge) { edge.calculation = edge.calculation.increaseMaxDistance(); });
708+
709+
// Since we just modified the stored instances, and the costs may have shifted, we need to re-heapify.
710+
this.selectionQueue = new PriorityQueue<SearchNode>(QUEUE_NODE_COMPARATOR, entries);
806711
}
807712

808713
get correctionsEnabled() {
@@ -845,10 +750,7 @@ export class SearchSpace {
845750
newlyAvailableEdges = newlyAvailableEdges.concat(batch);
846751
});
847752

848-
// Now that we've built the new edges, we can efficiently construct the new search tier.
849-
let tier = new SearchSpaceTier(this.tierOrdering.length, newlyAvailableEdges);
850-
this.tierOrdering.push(tier);
851-
this.selectionQueue.enqueue(tier);
753+
this.selectionQueue.enqueueAll(newlyAvailableEdges);
852754
}
853755

854756
// TODO: will want eventually for reversions and/or backspaces
@@ -864,12 +766,15 @@ export class SearchSpace {
864766
* @returns
865767
*/
866768
private hasNextMatchEntry(): boolean {
867-
let topQueue = this.selectionQueue.peek();
868-
if(topQueue) {
869-
return topQueue.correctionQueue.count > 0;
870-
} else {
871-
return false;
769+
return this.selectionQueue.count > 0 && this.selectionQueue.peek().currentCost < Number.POSITIVE_INFINITY;
770+
}
771+
772+
public getCurrentCost(): number {
773+
if(this.selectionQueue.count > 0) {
774+
return this.selectionQueue.peek().currentCost;
872775
}
776+
777+
return Number.POSITIVE_INFINITY;
873778
}
874779

875780
/**
@@ -883,8 +788,7 @@ export class SearchSpace {
883788
return { type: 'none' };
884789
}
885790

886-
let bestTier = this.selectionQueue.dequeue();
887-
let currentNode = bestTier.correctionQueue.dequeue();
791+
let currentNode = this.selectionQueue.dequeue();
888792

889793
let unmatchedResult: IntermediateSearchPath = {
890794
type: 'intermediate',
@@ -894,7 +798,6 @@ export class SearchSpace {
894798
// Have we already processed a matching edge? If so, skip it.
895799
// We already know the previous edge is of lower cost.
896800
if(this.processedEdgeSet[currentNode.pathKey]) {
897-
this.selectionQueue.enqueue(bestTier);
898801
return unmatchedResult;
899802
} else {
900803
this.processedEdgeSet[currentNode.pathKey] = true;
@@ -913,25 +816,21 @@ export class SearchSpace {
913816
substitutionsOnly = true;
914817
}
915818

916-
let tierMinCost = 0;
917-
for(let i = 0; i <= bestTier.index; i++) {
918-
tierMinCost += this.minInputCost[i];
919-
}
920-
921819
// Thresholds _any_ path, partially based on currently-traversed distance.
922820
// Allows a little 'wiggle room' + 2 "hard" edits.
923821
// Can be important if needed characters don't actually exist on the keyboard
924822
// ... or even just not the then-current layer of the keyboard.
925-
if(currentNode.currentCost > tierMinCost + 2.5 * SearchSpace.EDIT_DISTANCE_COST_SCALE) {
823+
//
824+
// TODO: still consider the lowest-cost individual edges for THIS specific criterion.
825+
if(currentNode.currentCost > /* tierMinCost */ + 2.5 * SearchSpace.EDIT_DISTANCE_COST_SCALE) {
926826
return unmatchedResult;
927827
}
928828

929829
// Stage 2: process subset further OR build remaining edges
930830

931831
if(currentNode.hasPartialInput) {
932832
// Re-use the current queue; the number of total inputs considered still holds.
933-
bestTier.correctionQueue.enqueueAll(currentNode.processSubsetEdge());
934-
this.selectionQueue.enqueue(bestTier);
833+
this.selectionQueue.enqueueAll(currentNode.processSubsetEdge());
935834
return unmatchedResult;
936835
}
937836

@@ -941,15 +840,19 @@ export class SearchSpace {
941840
// Always possible, as this does not require any new input.
942841
if(!substitutionsOnly) {
943842
let insertionEdges = currentNode.buildInsertionEdges();
944-
bestTier.correctionQueue.enqueueAll(insertionEdges);
843+
this.selectionQueue.enqueueAll(insertionEdges);
945844
}
946845

947-
if(bestTier.index == this.tierOrdering.length - 1) {
846+
if(currentNode.calculation.inputSequence.length == this.inputSequence.length) {
948847
// It was the final tier - store the node for future reference.
949848
this.completedPaths.push(currentNode);
950849

951-
// Since we don't modify any other tier, we may simply reinsert the removed tier.
952-
this.selectionQueue.enqueue(bestTier);
850+
if((this.returnedValues[currentNode.resultKey]?.currentCost ?? Number.POSITIVE_INFINITY) > currentNode.currentCost) {
851+
this.returnedValues[currentNode.resultKey] = currentNode;
852+
} else {
853+
// Not a better cost, so reject it and move on to the next potential result.
854+
return this.handleNextNode();
855+
}
953856

954857
return {
955858
type: 'complete',
@@ -958,27 +861,21 @@ export class SearchSpace {
958861
};
959862
} else {
960863
// Time to construct new edges for the next tier!
961-
let nextTier = this.tierOrdering[bestTier.index+1];
962-
963-
let inputIndex = nextTier.index;
864+
let inputIndex = currentNode.calculation.inputSequence.length;
964865

965866
let deletionEdges: SearchNode[] = [];
966867
if(!substitutionsOnly) {
967-
deletionEdges = currentNode.buildDeletionEdges(this._inputSequence[inputIndex-1]);
868+
deletionEdges = currentNode.buildDeletionEdges(this._inputSequence[inputIndex]);
968869
}
969-
const substitutionEdges = currentNode.buildSubstitutionEdges(this._inputSequence[inputIndex-1]);
870+
const substitutionEdges = currentNode.buildSubstitutionEdges(this._inputSequence[inputIndex]);
970871
let batch = deletionEdges.concat(substitutionEdges);
971872

972873
// Skip the queue for the first pass; there will ALWAYS be at least one pass,
973874
// and queue-enqueing does come with a cost - avoid unnecessary overhead here.
974875
batch = batch.flatMap(e => e.processSubsetEdge());
975876

976877
// Note: we're live-modifying the tier's cost here! The priority queue loses its guarantees as a result.
977-
nextTier.correctionQueue.enqueueAll(batch);
978-
979-
// So, we simply rebuild the selection queue.
980-
// Also re-adds `bestTier`, which we'd de-queued.
981-
this.selectionQueue = new PriorityQueue<SearchSpaceTier>(this.QUEUE_SPACE_COMPARATOR, this.tierOrdering);
878+
this.selectionQueue.enqueueAll(batch);
982879

983880
// We didn't reach an end-node, so we just end the iteration and continue the search.
984881
}

web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-tokenization.tests.ts

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,10 +137,8 @@ describe('ContextTokenization', function() {
137137
};
138138

139139
let baseTokenization = new ContextTokenization(tokens, transitionEdits, null /* dummy val */);
140-
141140
let cloned = new ContextTokenization(baseTokenization);
142141

143-
assert.notDeepEqual(cloned, baseTokenization);
144142
assert.deepEqual(cloned.tokens.map((token) => token.searchSpace.inputSequence),
145143
baseTokenization.tokens.map((token) => token.searchSpace.inputSequence));
146144

0 commit comments

Comments
 (0)