keymanapp
diff --git a/‎web/src/engine/predictive-text/worker-thread/src/main/correction/context-state.ts‎
Lines changed: 5 additions & 2 deletions b/‎web/src/engine/predictive-text/worker-thread/src/main/correction/context-state.ts‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts‎
Lines changed: 7 additions & 4 deletions b/‎web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts‎
Lines changed: 7 additions & 4 deletions
diff --git a/‎web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts‎
Lines changed: 7 additions & 2 deletions b/‎web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎web/src/engine/predictive-text/worker-thread/src/main/correction/distance-modeler.ts‎
Lines changed: 16 additions & 3 deletions b/‎web/src/engine/predictive-text/worker-thread/src/main/correction/distance-modeler.ts‎
Lines changed: 16 additions & 3 deletions
diff --git a/‎web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts‎
Lines changed: 52 additions & 34 deletions b/‎web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts‎
Lines changed: 52 additions & 34 deletions
@@ -192,7 +192,7 @@ export class ContextState {
    */
   analyzeTransition(
     context: Context,
-    transformDistribution?: Distribution<Transform>,
+    transformDistribution: Distribution<Transform>,
     // overrides checks for token substitution that can fail for large applied suggestions.
     isApplyingSuggestion?: boolean
   ): ContextTransition {
@@ -245,8 +245,11 @@ export class ContextState {
     // and then fold all resulting search spaces (on the final token) into one.
     const tokenizationAnalysis = trueInputSubset.pendingSet.get(baseTokenization);
 
+    // Determine the best probability from among ALL available inputs, before they're split
+    // into subsets.
+    const bestProb = transformDistribution.reduce((best, curr) => Math.max(best, curr.p), 0);
     // Should gain one per subsetBuilder.subsets entry.
-    const resultTokenization = baseTokenization.evaluateTransition(tokenizationAnalysis, lexicalModel, trueInput);
+    const resultTokenization = baseTokenization.evaluateTransition(tokenizationAnalysis, lexicalModel, trueInput, bestProb);
 
     // ------------
 
 
@@ -24,6 +24,7 @@ import Transform = LexicalModelTypes.Transform;
 export interface TokenInputSource {
   trueTransform: Transform;
   inputStartIndex: number;
+  bestProbFromSet: number;
 }
 
 /**
@@ -129,9 +130,10 @@ export class ContextToken {
       rawTransformDistributions.forEach((entry) => {
         this._inputRange.push({
           trueTransform: entry[0].sample,
-          inputStartIndex: 0
+          inputStartIndex: 0,
+          bestProbFromSet: 1
         });
-        this.searchSpace.addInput(entry);
+        this.searchSpace.addInput(entry, 1);
       });
     }
   }
@@ -142,7 +144,7 @@ export class ContextToken {
    */
   addInput(inputSource: TokenInputSource, distribution: Distribution<Transform>) {
     this._inputRange.push(inputSource);
-    this.searchSpace.addInput(distribution);
+    this.searchSpace.addInput(distribution, inputSource.bestProbFromSet);
   }
 
   /**
@@ -350,7 +352,8 @@ export class ContextToken {
         backupToken = new ContextToken(constructingToken);
         constructingToken.addInput({
           trueTransform: priorSourceInput.trueTransform,
-          inputStartIndex: priorSourceInput.inputStartIndex + extraCharsAdded
+          inputStartIndex: priorSourceInput.inputStartIndex + extraCharsAdded,
+          bestProbFromSet: priorSourceInput.bestProbFromSet
         }, tailDistribution);
 
         const lenToCommit = lenBeforeLastApply + extraCharsAdded;
 
@@ -497,12 +497,17 @@ export class ContextTokenization {
    * @param lexicalModel The active lexical model
    * @param sourceInput The Transform associated with the keystroke triggering
    * the transition.
+   * @param bestProbFromSet The probability of the single most likely input
+   * transform in the overall transformDistribution associated with the
+   * keystroke triggering theh transition.  It need not be represented by the
+   * pendingTokenization to be built.
    * @returns
    */
   evaluateTransition(
     pendingTokenization: PendingTokenization,
     lexicalModel: LexicalModel,
-    sourceInput: Transform
+    sourceInput: Transform,
+    bestProbFromSet: number
   ): ContextTokenization {
     const { alignment: alignment, inputs } = pendingTokenization;
     const sliceIndex = alignment.edgeWindow.sliceIndex;
@@ -581,7 +586,7 @@ export class ContextTokenization {
       if(affectedToken.inputRange.length == 0 && distribution[0].sample.deleteLeft != 0) {
         distribution = distribution.map((mass) => ({sample: { ...mass.sample, deleteLeft: 0 }, p: mass.p }));
       }
-      affectedToken.addInput({trueTransform: sourceInput, inputStartIndex: appliedLength}, distribution);
+      affectedToken.addInput({trueTransform: sourceInput, inputStartIndex: appliedLength, bestProbFromSet: bestProbFromSet}, distribution);
       appliedLength += KMWString.length(distribution[0].sample.insert);
 
       const tokenize = determineModelTokenizer(lexicalModel);
 
@@ -652,6 +652,12 @@ export class SearchSpace {
    */
   private processedEdgeSet: {[pathKey: string]: boolean} = {};
 
+  /**
+   * Provides a heuristic for the base cost at each depth if the best
+   * individual input were taken at that level.
+   */
+  private lowestCostAtDepth: number[];
+
   /**
    * Clone constructor.  Deep-copies its internal queues, but not search nodes.
    * @param instance
@@ -670,6 +676,7 @@ export class SearchSpace {
       this.rootNode = arg1.rootNode;
       // Re-use already-checked Nodes.
       this.completedPaths = [].concat(arg1.completedPaths);
+      this.lowestCostAtDepth = arg1.lowestCostAtDepth.slice();
       this.returnedValues = {...arg1.returnedValues};
       this.processedEdgeSet = {...arg1.processedEdgeSet};
 
@@ -688,6 +695,7 @@ export class SearchSpace {
     this.selectionQueue = new PriorityQueue<SearchNode>(QUEUE_NODE_COMPARATOR);
     this.rootNode = new SearchNode(model.traverseFromRoot(), model.toKey ? model.toKey.bind(model) : null);
     this.selectionQueue.enqueue(this.rootNode);
+    this.lowestCostAtDepth = [];
 
     this.completedPaths = [];
   }
@@ -722,8 +730,12 @@ export class SearchSpace {
    * @param inputDistribution The fat-finger distribution for the incoming keystroke (or
    * just the raw keystroke if corrections are disabled)
    */
-  addInput(inputDistribution: Distribution<Transform>) {
-    this._inputSequence.push(inputDistribution);
+  addInput(inputDistribution: Distribution<Transform>, bestProbFromSet: number) {
+    const input = inputDistribution;
+    this._inputSequence.push(input);
+    const lastDepthCost = this.lowestCostAtDepth[this.lowestCostAtDepth.length - 1] ?? 0;
+    const logTierCost = -Math.log(bestProbFromSet);
+    this.lowestCostAtDepth.push(lastDepthCost + logTierCost);
 
     // Assumes that `inputDistribution` is already sorted.
     this.minInputCost.push(-Math.log(inputDistribution[0].p));
@@ -822,7 +834,8 @@ export class SearchSpace {
     // ... or even just not the then-current layer of the keyboard.
     //
     // TODO:  still consider the lowest-cost individual edges for THIS specific criterion.
-    if(currentNode.currentCost > /* tierMinCost */ + 2.5 * SearchSpace.EDIT_DISTANCE_COST_SCALE) {
+    const tierMinCost = this.lowestCostAtDepth[currentNode.priorInput.length-1];
+    if(currentNode.currentCost > tierMinCost + 2.5 * SearchSpace.EDIT_DISTANCE_COST_SCALE) {
       return unmatchedResult;
     }
 
 
@@ -128,22 +128,25 @@ describe('ContextToken', function() {
 
       token1.addInput({
         trueTransform: srcTransform,
-        inputStartIndex: 0
+        inputStartIndex: 0,
+        bestProbFromSet: 1
       }, [{sample: {insert: 'can', deleteLeft: 0, deleteRight: 0, id: 1}, p: 1}]);
 
       token2.addInput({
         trueTransform: srcTransform,
-        inputStartIndex: 3
+        inputStartIndex: 3,
+        bestProbFromSet: 1
       }, [{sample: {insert: "'", deleteLeft: 0, deleteRight: 0, id: 1}, p: 1}]);
 
       token3.addInput({
         trueTransform: srcTransform,
-        inputStartIndex: 4
+        inputStartIndex: 4,
+        bestProbFromSet: 1
       }, [{sample: {insert: 't', deleteLeft: 0, deleteRight: 0, id: 1}, p: 1}]);
 
       const merged = ContextToken.merge([token1, token2, token3], plainModel);
       assert.equal(merged.exampleInput, "can't");
-      assert.deepEqual(merged.inputRange, [ { trueTransform: srcTransform, inputStartIndex: 0 } ]);
+      assert.deepEqual(merged.inputRange, [ { trueTransform: srcTransform, inputStartIndex: 0, bestProbFromSet: 1 } ]);
       assert.deepEqual(merged.searchSpace.inputSequence, [[{sample: srcTransform, p: 1}]]);
     });
 
@@ -168,35 +171,41 @@ describe('ContextToken', function() {
 
       token1.addInput({
         trueTransform: srcTransform1,
-        inputStartIndex: 0
+        inputStartIndex: 0,
+        bestProbFromSet: 1
       }, [{sample: srcTransform1, p: 1}]);
       token1.addInput({
         trueTransform: srcTransform2,
-        inputStartIndex: 0
+        inputStartIndex: 0,
+        bestProbFromSet: 1
       }, [{sample: {insert: 's', deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}]);
 
       token2.addInput({
         trueTransform: srcTransform2,
-        inputStartIndex: 1
+        inputStartIndex: 1,
+        bestProbFromSet: 1
       }, [{sample: {insert: "and", deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}]);
 
       token3.addInput({
         trueTransform: srcTransform2,
-        inputStartIndex: 4
+        inputStartIndex: 4,
+        bestProbFromSet: 1
       }, [{sample: {insert: 's', deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}]);
       token3.addInput({
         trueTransform: srcTransform3,
-        inputStartIndex: 0
+        inputStartIndex: 0,
+        bestProbFromSet: 1
       }, [{sample: srcTransform3, p: 1}]);
 
       token4.addInput({
         trueTransform: srcTransform4,
-        inputStartIndex: 0
+        inputStartIndex: 0,
+        bestProbFromSet: 1
       }, [{sample: srcTransform4, p: 1}]);
 
       const merged = ContextToken.merge(tokensToMerge, plainModel);
       assert.equal(merged.exampleInput, "applesandsourgrapes");
-      assert.deepEqual(merged.inputRange, srcTransforms.map((t) => ({ trueTransform: t, inputStartIndex: 0 }) ));
+      assert.deepEqual(merged.inputRange, srcTransforms.map((t) => ({ trueTransform: t, inputStartIndex: 0, bestProbFromSet: 1 }) ));
       assert.deepEqual(merged.searchSpace.inputSequence, srcTransforms.map((t) => [{sample: t, p: 1}]));
     });
 
@@ -221,35 +230,41 @@ describe('ContextToken', function() {
 
       token1.addInput({
         trueTransform: srcTransform1,
-        inputStartIndex: 0
+        inputStartIndex: 0,
+        bestProbFromSet: 1
       }, [{sample: srcTransform1, p: 1}]);
       token1.addInput({
         trueTransform: srcTransform2,
-        inputStartIndex: 0
+        inputStartIndex: 0,
+        bestProbFromSet: 1
       }, [{sample: {insert: toMathematicalSMP('s'), deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}]);
 
       token2.addInput({
         trueTransform: srcTransform2,
-        inputStartIndex: 1
+        inputStartIndex: 1,
+        bestProbFromSet: 1
       }, [{sample: {insert: toMathematicalSMP("and"), deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}]);
 
       token3.addInput({
         trueTransform: srcTransform2,
-        inputStartIndex: 4
+        inputStartIndex: 4,
+        bestProbFromSet: 1
       }, [{sample: {insert: toMathematicalSMP('s'), deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}]);
       token3.addInput({
         trueTransform: srcTransform3,
-        inputStartIndex: 0
+        inputStartIndex: 0,
+        bestProbFromSet: 1
       }, [{sample: srcTransform3, p: 1}]);
 
       token4.addInput({
         trueTransform: srcTransform4,
-        inputStartIndex: 0
+        inputStartIndex: 0,
+        bestProbFromSet: 1
       }, [{sample: srcTransform4, p: 1}]);
 
       const merged = ContextToken.merge(tokensToMerge, plainModel);
       assert.equal(merged.exampleInput, toMathematicalSMP("applesandsourgrapes"));
-      assert.deepEqual(merged.inputRange, srcTransforms.map((t) => ({ trueTransform: t, inputStartIndex: 0 }) ));
+      assert.deepEqual(merged.inputRange, srcTransforms.map((t) => ({ trueTransform: t, inputStartIndex: 0, bestProbFromSet: 1 }) ));
       assert.deepEqual(merged.searchSpace.inputSequence, srcTransforms.map((t) => [{sample: t, p: 1}]));
     });
   });
@@ -278,7 +293,7 @@ describe('ContextToken', function() {
 
       const tokenToSplit = new ContextToken(plainModel);
       for(let i = 0; i < keystrokeDistributions.length; i++) {
-        tokenToSplit.addInput({trueTransform: keystrokeDistributions[i][0].sample, inputStartIndex: 0}, keystrokeDistributions[i]);
+        tokenToSplit.addInput({trueTransform: keystrokeDistributions[i][0].sample, inputStartIndex: 0, bestProbFromSet: .75}, keystrokeDistributions[i]);
       };
 
       assert.equal(tokenToSplit.sourceText, 'can\'');
@@ -316,7 +331,7 @@ describe('ContextToken', function() {
 
       const tokenToSplit = new ContextToken(plainModel);
       for(let i = 0; i < keystrokeDistributions.length; i++) {
-        tokenToSplit.addInput({trueTransform: keystrokeDistributions[i][0].sample, inputStartIndex: 0}, keystrokeDistributions[i]);
+        tokenToSplit.addInput({trueTransform: keystrokeDistributions[i][0].sample, inputStartIndex: 0, bestProbFromSet: 1}, keystrokeDistributions[i]);
       };
 
       assert.equal(tokenToSplit.sourceText, 'biglargetransform');
@@ -343,7 +358,9 @@ describe('ContextToken', function() {
           insert: 'biglargetransform',
           deleteLeft: 0,
           deleteRight: 0
-        }, inputStartIndex: i
+        },
+        inputStartIndex: i,
+        bestProbFromSet: 1
       })));
       assert.sameDeepOrderedMembers(resultsOfSplit.map(t => t.searchSpace.inputSequence[0]), splitTextArray.map(t => [{
         sample: { insert: t, deleteLeft: 0, deleteRight: 0 }, p: 1
@@ -365,7 +382,7 @@ describe('ContextToken', function() {
 
       const tokenToSplit = new ContextToken(plainModel);
       for(let i = 0; i < keystrokeDistributions.length; i++) {
-        tokenToSplit.addInput({trueTransform: keystrokeDistributions[i][0].sample, inputStartIndex: 0}, keystrokeDistributions[i]);
+        tokenToSplit.addInput({trueTransform: keystrokeDistributions[i][0].sample, inputStartIndex: 0, bestProbFromSet: 1}, keystrokeDistributions[i]);
       };
 
       assert.equal(tokenToSplit.exampleInput, 'largelongtransforms');
@@ -388,15 +405,15 @@ describe('ContextToken', function() {
       assert.equal(resultsOfSplit.length, 3);
       assert.sameOrderedMembers(resultsOfSplit.map(t => t.exampleInput), splitTextArray);
       assert.deepEqual(resultsOfSplit[0].inputRange, [
-        { trueTransform: keystrokeDistributions[0][0].sample, inputStartIndex: 0 },
-        { trueTransform: keystrokeDistributions[1][0].sample, inputStartIndex: 0 },
+        { trueTransform: keystrokeDistributions[0][0].sample, inputStartIndex: 0, bestProbFromSet: 1 },
+        { trueTransform: keystrokeDistributions[1][0].sample, inputStartIndex: 0, bestProbFromSet: 1 },
       ]);
       assert.deepEqual(resultsOfSplit[1].inputRange, [
-        { trueTransform: keystrokeDistributions[1][0].sample, inputStartIndex: 'arge'.length },
-        { trueTransform: keystrokeDistributions[2][0].sample, inputStartIndex: 0 },
+        { trueTransform: keystrokeDistributions[1][0].sample, inputStartIndex: 'arge'.length, bestProbFromSet: 1 },
+        { trueTransform: keystrokeDistributions[2][0].sample, inputStartIndex: 0, bestProbFromSet: 1 },
       ]);
       assert.deepEqual(resultsOfSplit[2].inputRange, [
-        { trueTransform: keystrokeDistributions[2][0].sample, inputStartIndex: 'ng'.length }
+        { trueTransform: keystrokeDistributions[2][0].sample, inputStartIndex: 'ng'.length, bestProbFromSet: 1 }
       ]);
 
       assert.deepEqual(resultsOfSplit[0].searchSpace.inputSequence, [
@@ -459,7 +476,7 @@ describe('ContextToken', function() {
 
       const tokenToSplit = new ContextToken(plainModel);
       for(let i = 0; i < keystrokeDistributions.length; i++) {
-        tokenToSplit.addInput({trueTransform: keystrokeDistributions[i][0].sample, inputStartIndex: 0}, keystrokeDistributions[i]);
+        tokenToSplit.addInput({trueTransform: keystrokeDistributions[i][0].sample, inputStartIndex: 0, bestProbFromSet: 1}, keystrokeDistributions[i]);
       };
 
       assert.equal(tokenToSplit.exampleInput, toMathematicalSMP('largelongtransforms'));
@@ -482,15 +499,15 @@ describe('ContextToken', function() {
       assert.equal(resultsOfSplit.length, 3);
       assert.sameOrderedMembers(resultsOfSplit.map(t => t.exampleInput), splitTextArray);
       assert.deepEqual(resultsOfSplit[0].inputRange, [
-        { trueTransform: keystrokeDistributions[0][0].sample, inputStartIndex: 0 },
-        { trueTransform: keystrokeDistributions[1][0].sample, inputStartIndex: 0 },
+        { trueTransform: keystrokeDistributions[0][0].sample, inputStartIndex: 0, bestProbFromSet: 1 },
+        { trueTransform: keystrokeDistributions[1][0].sample, inputStartIndex: 0, bestProbFromSet: 1 },
       ]);
       assert.deepEqual(resultsOfSplit[1].inputRange, [
-        { trueTransform: keystrokeDistributions[1][0].sample, inputStartIndex: 'arge'.length },
-        { trueTransform: keystrokeDistributions[2][0].sample, inputStartIndex: 0 },
+        { trueTransform: keystrokeDistributions[1][0].sample, inputStartIndex: 'arge'.length, bestProbFromSet: 1 },
+        { trueTransform: keystrokeDistributions[2][0].sample, inputStartIndex: 0, bestProbFromSet: 1 },
       ]);
       assert.deepEqual(resultsOfSplit[2].inputRange, [
-        { trueTransform: keystrokeDistributions[2][0].sample, inputStartIndex: 'ng'.length }
+        { trueTransform: keystrokeDistributions[2][0].sample, inputStartIndex: 'ng'.length, bestProbFromSet: 1 }
       ]);
 
       assert.deepEqual(resultsOfSplit[0].searchSpace.inputSequence, [
@@ -550,7 +567,8 @@ describe('preprocessInputSources', () => {
 
     const results = preprocessInputSources(transforms.map((t) => ({
       trueTransform: t,
-      inputStartIndex: 0
+      inputStartIndex: 0,
+      bestProbFromSet: 1
     })));
 
     assert.equal(results.length, transforms.length);