Merge branch 'refactor/web/correction-heuristic-and-thresholding' into refactor/web/relocate-search-space

jahorton · jahorton · commit c02de14e036a · 2026-01-05T13:04:32.000-06:00
diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts
@@ -124,16 +124,15 @@ export class ContextToken {
       rawText ||= '';
 
       // Supports the old pathway for: updateWithBackspace(tokenText: string, transformId: number)
-      const rawTransformDistributions: Distribution<Transform>[] = textToCharTransforms(rawText).map(function(transform) {
-        return [{sample: transform, p: 1.0}];
-      });
-      rawTransformDistributions.forEach((entry) => {
+      // Build a token that represents the current text with no ambiguity - probability at max (1.0)
+      const BASE_PROBABILITY = 1;
+      textToCharTransforms(rawText).forEach((transform) => {
         this._inputRange.push({
-          trueTransform: entry[0].sample,
+          trueTransform: transform,
           inputStartIndex: 0,
-          bestProbFromSet: 1
+          bestProbFromSet: BASE_PROBABILITY
         });
-        this.searchSpace.addInput(entry, 1);
+        this.searchSpace.addInput([{sample: transform, p: BASE_PROBABILITY}], 1);
       });
     }
   }
diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts
@@ -499,7 +499,7 @@ export class ContextTokenization {
    * the transition.
    * @param bestProbFromSet The probability of the single most likely input
    * transform in the overall transformDistribution associated with the
-   * keystroke triggering theh transition.  It need not be represented by the
+   * keystroke triggering the transition.  It need not be represented by the
    * pendingTokenization to be built.
    * @returns
    */
@@ -586,7 +586,7 @@ export class ContextTokenization {
       if(affectedToken.inputRange.length == 0 && distribution[0].sample.deleteLeft != 0) {
         distribution = distribution.map((mass) => ({sample: { ...mass.sample, deleteLeft: 0 }, p: mass.p }));
       }
-      affectedToken.addInput({trueTransform: sourceInput, inputStartIndex: appliedLength, bestProbFromSet: bestProbFromSet}, distribution);
+      affectedToken.addInput({trueTransform: sourceInput, inputStartIndex: appliedLength, bestProbFromSet}, distribution);
       appliedLength += KMWString.length(distribution[0].sample.insert);
 
       const tokenize = determineModelTokenizer(lexicalModel);