diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-state.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-state.ts index 5bb62dabc0c..2be4fc8f980 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-state.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-state.ts @@ -14,10 +14,12 @@ import { applyTransform } from '@keymanapp/models-templates'; import { KMWString } from '@keymanapp/web-utils'; import { ContextToken } from './context-token.js'; -import { ContextTokenization } from './context-tokenization.js'; +import { ContextTokenization, determineTaillessTrueKeystroke } from './context-tokenization.js'; import { ContextTransition } from './context-transition.js'; import { determineModelTokenizer } from '../model-helpers.js'; -import { legacySubsetKeyer, TokenizationSubsetBuilder } from './tokenization-subsets.js'; +import { SearchCluster } from './search-cluster.js'; +import { SearchPath } from './search-path.js'; +import { precomputationSubsetKeyer, TokenizationSubsetBuilder } from './tokenization-subsets.js'; import TransformUtils from '../transformUtils.js'; import Context = LexicalModelTypes.Context; @@ -45,7 +47,7 @@ export class ContextState { /** * Denotes the most likely tokenization for the represented Context. */ - tokenization: ContextTokenization; + tokenizations: ContextTokenization[]; /** * Denotes the keystroke-sourced Transform that was last applied to a @@ -118,13 +120,13 @@ export class ContextState { * @param tokenization Precomputed tokenization for the context, leveraging previous * correction-search progress and results */ - constructor(context: Context, model: LexicalModel, tokenization?: ContextTokenization); - constructor(param1: Context | ContextState, model?: LexicalModel, tokenization?: ContextTokenization) { + constructor(context: Context, model: LexicalModel, tokenizations?: ContextTokenization[]); + constructor(param1: Context | ContextState, model?: LexicalModel, tokenizations?: ContextTokenization[]) { if(!(param1 instanceof ContextState)) { this.context = param1; this.model = model; - if(tokenization) { - this.tokenization = tokenization; + if(tokenizations) { + this.tokenizations = tokenizations; } else { this.initFromReset(); } @@ -133,7 +135,7 @@ export class ContextState { Object.assign(this, stateToClone); this.inputTransforms = new Map(stateToClone.inputTransforms); - this.tokenization = new ContextTokenization(stateToClone.tokenization); + this.tokenizations = stateToClone.tokenizations.map(t => new ContextTokenization(t)); // A shallow copy of the array is fine, but we'd be best off // not aliasing the array itself. @@ -164,7 +166,7 @@ export class ContextState { if(baseTokens.length == 0) { baseTokens.push(new ContextToken(this.model)); } - this.tokenization = new ContextTokenization(baseTokens); + this.tokenizations = [new ContextTokenization(baseTokens)]; this.inputTransforms = new Map(); } @@ -198,19 +200,25 @@ export class ContextState { appliedSuggestionId?: number ): ContextTransition { const lexicalModel = this.model; - const trueInput = transformDistribution[0].sample; + + // Determine the best probability from among ALL available inputs, before they're split + // into subsets. + const bestProb = transformDistribution.reduce((best, cur) => best < cur.p ? cur.p : best, 0); const transition = new ContextTransition(this, this.appliedInput?.id); // From here on, we work toward the common-case - re-using old info when // context (and its tokenization) is changed by an input Transform. - - let trueInputSubsetKey: string; const slideUpdateTransform = determineContextSlideTransform(this.context, context); // Goal: allow multiple base tokenizations. - const startTokenizations = [this.tokenization]; - const startTokenizationsAfterSlide = startTokenizations.map(t => t.applyContextSlide(lexicalModel, slideUpdateTransform)); + const startTokenizations: Set = new Set(); + const keyedTokenizations: Map = new Map(); + this.tokenizations.forEach(t => { + const slidTokenization = t.applyContextSlide(lexicalModel, slideUpdateTransform); + startTokenizations.add(slidTokenization); + keyedTokenizations.set(t.clusteringKey, slidTokenization) + }); // Easy case - no net change to the tokenizations whatsoever; the actual request // aims to save-state the most recent results. @@ -220,38 +228,92 @@ export class ContextState { // If the tokenizations match, clone the ContextState; we want to preserve a post-application // context separately from pre-application contexts for predictions based on empty roots. const state = new ContextState(this); - state.tokenization = startTokenizationsAfterSlide[0]; + state.tokenizations = [...startTokenizations.values()]; transition.finalize(state, transformDistribution); return transition; } - const subsetBuilder = new TokenizationSubsetBuilder(legacySubsetKeyer); - for(let baseTokenization of startTokenizationsAfterSlide) { - + const subsetBuilder = new TokenizationSubsetBuilder(precomputationSubsetKeyer); + for(let baseTokenization of startTokenizations.values()) { for(let mass of transformDistribution) { + // Handle the splits and merges early, here. const tokenizationAnalysis = baseTokenization.mapWhitespacedTokenization(lexicalModel, mass.sample); - subsetBuilder.addPrecomputation(baseTokenization, tokenizationAnalysis, mass.p); + const alignment = tokenizationAnalysis.alignment; + + // Pre-process any splits and merges; the result of these operations may + // have the same properties as other base tokenizations within the + // subset if compatible. + const needsRealignment = (alignment.merges.length > 0 || alignment.splits.length > 0 || alignment.unmappedEdits.length > 0); + const sourceTokenization = needsRealignment ? baseTokenization.realign(alignment) : baseTokenization; - if(mass.sample == trueInput) { - trueInputSubsetKey = subsetBuilder.keyer(tokenizationAnalysis); - } + subsetBuilder.addPrecomputation(sourceTokenization, tokenizationAnalysis, mass.p); } } - // And now to (partly) detransform from a multiple-tokenization paradigm. - const trueInputSubset = subsetBuilder.subsets.get(trueInputSubsetKey); - // Right now, we only have one base tokenization, so we just fetch it. - const baseTokenization = startTokenizationsAfterSlide[0]; - // For multiple tokenizations, we'd retrieve each, use the "most likely" one as base, - // and then fold all resulting search spaces (on the final token) into one. - const tokenizationAnalysis = trueInputSubset.transitionPaths.get(baseTokenization); + // For all target tokenizations - each transition subset... + const finalTokenizations = [...subsetBuilder.subsets.values()].map((subset) => { + // Iterate over all _source_ tokenizations and the changes used to transition them + // to that target tokenization. + const transitionSets = [...subset.transitionPaths.entries()]; + const isolatedSubsetResults = transitionSets.map((precomp) => { + const rootTokenization = precomp[0]; - // Determine the best probability from among ALL available inputs, before they're split - // into subsets. - const bestProb = transformDistribution.reduce((best, curr) => Math.max(best, curr.p), 0); - // Should gain one per subsetBuilder.subsets entry. - const realignedTokenization = baseTokenization.realign(tokenizationAnalysis.alignment); - const resultTokenization = realignedTokenization.evaluateTransition(tokenizationAnalysis, trueInput.id, bestProb, appliedSuggestionId); + return rootTokenization.evaluateTransition(precomp[1], trueInput.id, bestProb, appliedSuggestionId); + }); + + // Super-easy case: there's only the one tokenization anyway. + if(isolatedSubsetResults.length == 1) { + return isolatedSubsetResults[0]; + } + + // Assumption: all produced "isolatedSubsetResults" should essentially be + // the same tokenization. That said, tail entries will likely not be + // perfect matches; we need to splice them together, without duplicates. + // We also cannot rely on tokens before the standard tail index having + // been unmodified; merges and splits may have been applied earlier in the + // sequence. + + const tokenCount = isolatedSubsetResults[0].tokens.length; + if(isolatedSubsetResults.find(sr => sr.tokens.length != tokenCount)) { + throw new Error("Assumption invalidated: incoming tokenization paths do not converge"); + } + + const finalizedTokenization: ContextToken[] = []; + for(let i = 0; i < tokenCount; i++) { + const spaceSet: Set = new Set(); + let isWhitespace = true; + let isPartial = false; + + isolatedSubsetResults.map((sr) => sr.tokens[i]).forEach((token) => { + const searchSpace = token.searchSpace; + isWhitespace &&= token.isWhitespace; + isPartial ||= token.isPartial; + + if(searchSpace instanceof SearchPath) { + spaceSet.add(searchSpace); + } else if(searchSpace instanceof SearchCluster) { + searchSpace.parents.forEach(p => spaceSet.add(p)); + } else { + throw new Error("Cannot handle unknown SearchSpace type"); + } + }); + + const setVals = [...spaceSet.values()] + const finalizedSpace = setVals.length > 1 ? new SearchCluster(setVals) : setVals[0]; + + const token = new ContextToken(finalizedSpace); + token.isWhitespace = isWhitespace; + token.isPartial = isPartial; + + finalizedTokenization.push(token) + } + + return new ContextTokenization( + finalizedTokenization, + transitionSets[0][1], + determineTaillessTrueKeystroke(transitionSets[0][1]) + ); + }); // ------------ @@ -261,17 +323,26 @@ export class ContextState { // epic/dict-breaker: if ANY decently-likely tokenization satisfies this, we still // have a reasonable candidate for display of a delayed reversion. (Not 'all' - // 'any'.) - const tokens = resultTokenization.tokens; - const lastIndex = tokens.length - 1; - // Ignore a context-final empty '' token; the interesting one is what comes before. - const nonEmptyTail = !tokens[lastIndex].isEmptyToken ? tokens[lastIndex] : tokens[lastIndex - 1]; - const appliedSuggestionTransitionId = nonEmptyTail?.appliedTransitionId; const state = new ContextState(applyTransform(trueInput, context), lexicalModel); - state.tokenization = new ContextTokenization(resultTokenization.tokens, tokenizationAnalysis, resultTokenization.taillessTrueKeystroke); + // Set tokenizations from above. + // TODO: + // - sort by most .tail.searchSpace.bestExample.p? + // - threshold to the N most likely tokenizations? + state.tokenizations = finalTokenizations; state.appliedInput = transformDistribution?.[0].sample; transition.finalize(state, transformDistribution); - transition.revertableTransitionId = appliedSuggestionTransitionId; + + // Maybe sort the tokenizations in some manner, first? + transition.revertableTransitionId = state.tokenizations.map((tokenization) => { + const tokens = tokenization.tokens; + const lastIndex = tokens.length - 1; + // Ignore a context-final empty '' token; the interesting one is what comes before. + const nonEmptyTail = !tokens[lastIndex].isEmptyToken ? tokens[lastIndex] : tokens[lastIndex - 1]; + return nonEmptyTail?.appliedTransitionId; + }).find((transitionId) => { + return transitionId !== undefined; + }); return transition; } } diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts index b5363757dba..15b75db77c6 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts @@ -10,7 +10,7 @@ import { LexicalModelTypes } from '@keymanapp/common-types'; import { SearchPath } from "./search-path.js"; -import { SearchSpace, PathInputProperties } from "./search-space.js"; +import { isSearchSpace, SearchSpace } from "./search-space.js"; import { TokenSplitMap } from "./context-tokenization.js"; import { generateSubsetId } from './tokenization-subsets.js'; @@ -69,7 +69,7 @@ export class ContextToken { * Constructs a new, empty instance for use with the specified LexicalModel. * @param model */ - constructor(model: LexicalModel); + constructor(model: SearchSpace | LexicalModel); /** * Constructs a new instance with pre-existing text for use with the specified LexicalModel. * @param model @@ -81,7 +81,7 @@ export class ContextToken { * @param baseToken */ constructor(baseToken: ContextToken); - constructor(param: ContextToken | LexicalModel, rawText?: string, isPartial?: boolean) { + constructor(param: ContextToken | SearchSpace | LexicalModel, rawText?: string, isPartial?: boolean) { if(param instanceof ContextToken) { const priorToken = param; Object.assign(this, priorToken); @@ -92,7 +92,7 @@ export class ContextToken { // we need to ensure that only fully-utilized keystrokes are considered. this._searchSpace = priorToken.searchSpace; } else { - const model = param; + const baseSpace = isSearchSpace(param) ? param as SearchSpace : new SearchPath(param as LexicalModel); // May be altered outside of the constructor. this.isWhitespace = false; @@ -105,7 +105,7 @@ export class ContextToken { return [{sample: transform, p: 1.0}]; }); - let searchSpace = new SearchPath(model); + let searchSpace = baseSpace; rawTransformDistributions.forEach((entry) => { searchSpace = new SearchPath(searchSpace, entry, { @@ -122,14 +122,6 @@ export class ContextToken { } } - /** - * Call this to record the original keystroke Transforms for the context range - * corresponding to this token. - */ - addInput(inputSource: PathInputProperties, distribution: Distribution) { - this._searchSpace = new SearchPath(this._searchSpace, distribution, inputSource); - } - get inputCount() { return this._searchSpace.inputCount; } diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts index a099c1296b4..2153b0f8d13 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts @@ -9,6 +9,7 @@ import { LexicalModelTypes } from '@keymanapp/common-types'; import { KMWString } from '@keymanapp/web-utils'; +import { SENTINEL_CODE_UNIT } from '@keymanapp/models-templates'; import { ContextToken } from './context-token.js'; import TransformUtils from '../transformUtils.js'; @@ -17,6 +18,7 @@ import { determineModelTokenizer } from '../model-helpers.js'; import { ExtendedEditOperation, SegmentableDistanceCalculation } from './segmentable-calculation.js'; import { TokenizationPath } from './tokenization-subsets.js'; import { PathInputProperties } from './search-space.js'; +import { SearchPath } from './search-path.js'; import LexicalModel = LexicalModelTypes.LexicalModel; import Transform = LexicalModelTypes.Transform; @@ -130,7 +132,7 @@ export class ContextTokenization { constructor(priorToClone: ContextTokenization); constructor(tokens: ContextToken[]); - constructor(tokens: ContextToken[], alignment: TokenizationPath, taillessTrueKeystroke: Transform); + constructor(tokens: ContextToken[], tokenizationPath: TokenizationPath, taillessTrueKeystroke: Transform); constructor( param1: ContextToken[] | ContextTokenization, tokenizationPath?: TokenizationPath, @@ -599,11 +601,6 @@ export class ContextTokenization { } affectedToken.isPartial = true; - if(appliedSuggestionId !== undefined) { - affectedToken.appliedTransitionId = appliedSuggestionId; - } else { - delete affectedToken.appliedTransitionId; - } // If we are completely replacing a token via delete left, erase the deleteLeft; // that part applied to a _previous_ token that no longer exists. @@ -625,11 +622,18 @@ export class ContextTokenization { inputSource.segment.end = appliedLength; } - affectedToken = new ContextToken(affectedToken); - affectedToken.addInput(inputSource, distribution); + const searchPath = new SearchPath(affectedToken.searchSpace, distribution, inputSource); // the token generally holds the current SearchSpace... at present. + affectedToken = new ContextToken(searchPath); + + if(appliedSuggestionId !== undefined) { + affectedToken.appliedTransitionId = appliedSuggestionId; + } else { + delete affectedToken.appliedTransitionId; + } const tokenize = determineModelTokenizer(lexicalModel); affectedToken.isWhitespace = tokenize({left: affectedToken.exampleInput, startOfBuffer: false, endOfBuffer: false}).left[0]?.isWhitespace ?? false; + // Do not re-use the previous token; the mutation may have unexpected // results (say, in unit-testing) tailTokenization[tokenIndex] = affectedToken; @@ -643,6 +647,11 @@ export class ContextTokenization { determineTaillessTrueKeystroke(tokenizationPath) ); } + + get clusteringKey(): string { + // Note: SENTINEL_CODE_UNIT is not leveraged by SearchPath.sourceRangeKey. + return this.tokens.map(t => `${t.sourceRangeKey}L${t.searchSpace.codepointLength}`).join(SENTINEL_CODE_UNIT); + } } const appendText = (full: string, current: string) => full + current; diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-transition.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-transition.ts index 23251412aba..2307f590c65 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-transition.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-transition.ts @@ -139,6 +139,12 @@ export class ContextTransition { transform: Transform, appliedTransitionId: number ) => { + // TODO: add NEW tokenization based on base tokenization + suggestion. + // Ensure it's the "most likely" in some sense. + // + // Issue: suggestions do not currently track their base spaceId - their source. + // Cannot reference-equality check due to inter-thread communication. + // How can we best remember the suggestion's original source tokenization? const state = baseState.analyzeTransition( baseState.context, [{sample: transform, p: 1}], diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-space.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-space.ts index 07d8c6e3eb6..efb47e316ff 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-space.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-space.ts @@ -15,6 +15,7 @@ import { SearchPath } from "./search-path.js"; import Distribution = LexicalModelTypes.Distribution; import LexicalModel = LexicalModelTypes.LexicalModel; import Transform = LexicalModelTypes.Transform; +import { SearchCluster } from "./search-cluster.js"; export let SPACE_ID_SEED = 0; @@ -95,6 +96,10 @@ export interface PathInputProperties { subsetId: number; } +export function isSearchSpace(obj: unknown): boolean { + return obj instanceof SearchPath || obj instanceof SearchCluster; +} + /** * Represents all or a portion of the dynamically-generated graph used to search * for predictive-text corrections. diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/tokenization-subsets.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/tokenization-subsets.ts index ed1e5b48e7b..b991f252823 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/tokenization-subsets.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/tokenization-subsets.ts @@ -249,7 +249,7 @@ export function precomputationSubsetKeyer(tokenizationEdits: TokenizationTransit } } - return components.concat(editKeyer(tokenizationEdits)).join(SENTINEL_CODE_UNIT); + return components /*.concat(editKeyer(tokenizationEdits))*/ .join(SENTINEL_CODE_UNIT); } export class TokenizationSubsetBuilder { diff --git a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts index 94cbd09c2e2..93c2a44451f 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts @@ -299,8 +299,11 @@ export function determineContextTransition( if(inputIsEmpty) { // Directly build a simple empty transition that duplicates the last seen state. // This should also clear the preservation transform if it exists! - const tokenization = new ContextTokenization(contextTracker.latest.final.tokenization.tokens); - const priorState = new ContextState(context, transition.final.model, tokenization); + const priorState = new ContextState( + context, + transition.final.model, + contextTracker.latest.final.tokenizations.map(t => new ContextTokenization(t)) + ); transition = new ContextTransition(priorState, inputTransform.id); transition.finalize(priorState, transformDistribution); } else if( @@ -460,7 +463,7 @@ export async function correctAndEnumerate( // Ideally, the answer (in the future) will be no, but leaving it in right now may pose an issue. // The 'eventual' logic will be significantly more complex, though still manageable. - const tokenizations = [transition.final.tokenization]; + const tokenizations = transition.final.tokenizations; const searchSpaces = tokenizations.map(t => t.tail.searchSpace); // If corrections are not enabled, bypass the correction search aspect @@ -471,7 +474,7 @@ export async function correctAndEnumerate( if(!searchSpaces.find(s => s.correctionsEnabled)) { const wordbreak = determineModelWordbreaker(lexicalModel); // The one true tokenization: no corrections permitted. - const tokenization = transition.final.tokenization; + const tokenization = transition.final.tokenizations[0]; // No matter the prediction, once we know the root of the prediction, we'll always 'replace' the // same amount of text. We can handle this before the big 'prediction root' loop. diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-state.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-state.tests.ts index 584194a7a6a..e84b3abc8c8 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-state.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-state.tests.ts @@ -36,7 +36,8 @@ describe('ContextState', () => { assert.equal(state.context, context); assert.equal(state.model, plainModel); - assert.isOk(state.tokenization); + assert.isOk(state.tokenizations); + assert.equal(state.tokenizations.length, 1); assert.isUndefined(state.isManuallyApplied); assert.isNotOk(state.suggestions); assert.isNotOk(state.appliedSuggestionId); @@ -46,36 +47,39 @@ describe('ContextState', () => { it('creates one empty token for an empty context', () => { let context = { left: '', right: '', startOfBuffer: true, endOfBuffer: true }; let state = new ContextState(context, plainModel); - assert.isOk(state.tokenization); - assert.equal(state.tokenization.tokens.length, 1); - assert.equal(state.tokenization.tail.exampleInput, ''); + assert.isOk(state.tokenizations); + assert.equal(state.tokenizations.length, 1); + assert.equal(state.tokenizations[0].tokens.length, 1); + assert.equal(state.tokenizations[0].tail.exampleInput, ''); }); it('creates tokens for initial text (without ending whitespace)', () => { let context = { left: 'the quick brown fox', right: '', startOfBuffer: true, endOfBuffer: true }; let state = new ContextState(context, plainModel); - assert.isOk(state.tokenization); - assert.equal(state.tokenization.tokens.length, 7); - assert.deepEqual(state.tokenization.exampleInput, ['the', ' ', 'quick', ' ', 'brown', ' ', 'fox']); + assert.equal(state.tokenizations?.length, 1); + assert.isOk(state.tokenizations[0]); + assert.equal(state.tokenizations[0].tokens.length, 7); + assert.deepEqual(state.tokenizations[0].exampleInput, ['the', ' ', 'quick', ' ', 'brown', ' ', 'fox']); let context2 = { left: "an apple a day keeps the doctor", startOfBuffer: true, endOfBuffer: true }; let rawTokens = ["an", " ", "apple", " ", "a", " ", "day", " ", "keeps", " ", "the", " ", "doctor"]; let state2 = new ContextState(context2, plainModel); - assert.deepEqual(state2.tokenization.tokens.map(token => token.exampleInput), rawTokens); + assert.deepEqual(state2.tokenizations[0].tokens.map(token => token.exampleInput), rawTokens); }); it('creates tokens for initial text (with extra empty token for ending whitespace)', () => { let context = { left: 'the quick brown fox ', right: '', startOfBuffer: true, endOfBuffer: true }; let state = new ContextState(context, plainModel); - assert.isOk(state.tokenization); - assert.equal(state.tokenization.tokens.length, 9); - assert.deepEqual(state.tokenization.exampleInput, ['the', ' ', 'quick', ' ', 'brown', ' ', 'fox', ' ', '']); + assert.equal(state.tokenizations?.length, 1); + assert.isOk(state.tokenizations[0]); + assert.equal(state.tokenizations[0].tokens.length, 9); + assert.deepEqual(state.tokenizations[0].exampleInput, ['the', ' ', 'quick', ' ', 'brown', ' ', 'fox', ' ', '']); let context2 = { left: "an apple a day keeps the doctor ", startOfBuffer: true, endOfBuffer: true }; let rawTokens = ["an", " ", "apple", " ", "a", " ", "day", " ", "keeps", " ", "the", " ", "doctor", " ", ""]; let state2 = new ContextState(context2, plainModel); - assert.deepEqual(state2.tokenization.tokens.map(token => token.exampleInput), rawTokens); + assert.deepEqual(state2.tokenizations[0].tokens.map(token => token.exampleInput), rawTokens); }); }); @@ -97,7 +101,7 @@ describe('ContextState', () => { let baseState = new ContextState(existingContext, plainModel); let newContextMatch = baseState.analyzeTransition(newContext, toWrapperDistribution(transform)); assert.isNotNull(newContextMatch?.final); - assert.deepEqual(newContextMatch.final.tokenization.tokens.map(token => token.exampleInput), rawTokens); + assert.deepEqual(newContextMatch.final.tokenizations[0].tokens.map(token => token.exampleInput), rawTokens); // // Phrased this way to facilitate TS type-inference; assert.isTrue() does // // NOT do this for us! @@ -124,7 +128,7 @@ describe('ContextState', () => { let baseState = new ContextState(existingContext, plainModel); let newContextMatch = baseState.analyzeTransition(newContext, toWrapperDistribution(transform)); assert.isNotNull(newContextMatch?.final); - assert.deepEqual(newContextMatch.final.tokenization.tokens.map(token => token.exampleInput), rawTokens); + assert.deepEqual(newContextMatch.final.tokenizations[0].tokens.map(token => token.exampleInput), rawTokens); // // Phrased this way to facilitate TS type-inference; assert.isTrue() does // // NOT do this for us! @@ -151,7 +155,7 @@ describe('ContextState', () => { let baseState = new ContextState(existingContext, plainModel); let newContextMatch = baseState.analyzeTransition(newContext, toWrapperDistribution(transform)); assert.isNotNull(newContextMatch?.final); - assert.deepEqual(newContextMatch.final.tokenization.tokens.map(token => token.exampleInput), rawTokens); + assert.deepEqual(newContextMatch.final.tokenizations[0].tokens.map(token => token.exampleInput), rawTokens); // if(!newContextMatch.final.tokenization.alignment.canAlign) { // assert.fail("context alignment failed"); @@ -176,7 +180,7 @@ describe('ContextState', () => { let baseState = new ContextState(existingContext, plainModel); let newContextMatch = baseState.analyzeTransition(newContext, toWrapperDistribution(transform)); assert.isNotNull(newContextMatch?.final); - assert.deepEqual(newContextMatch.final.tokenization.tokens.map(token => token.exampleInput), rawTokens); + assert.deepEqual(newContextMatch.final.tokenizations[0].tokens.map(token => token.exampleInput), rawTokens); // if(!newContextMatch.final.tokenization.alignment.canAlign) { // assert.fail("context alignment failed"); @@ -201,7 +205,7 @@ describe('ContextState', () => { let baseState = new ContextState(existingContext, plainModel); let newContextMatch = baseState.analyzeTransition(newContext, toWrapperDistribution(transform)); assert.isNotNull(newContextMatch?.final); - assert.deepEqual(newContextMatch.final.tokenization.tokens.map(token => token.exampleInput), rawTokens); + assert.deepEqual(newContextMatch.final.tokenizations[0].tokens.map(token => token.exampleInput), rawTokens); // if(!newContextMatch.final.tokenization.alignment.canAlign) { // assert.fail("context alignment failed"); @@ -223,7 +227,7 @@ describe('ContextState', () => { let baseState = new ContextState(existingContext, plainModel); let newContextMatch = baseState.analyzeTransition(existingContext, toWrapperDistribution(transform)); assert.isNotNull(newContextMatch?.final); - assert.deepEqual(newContextMatch.final.tokenization.tokens.map(token => token.exampleInput), rawTokens); + assert.deepEqual(newContextMatch.final.tokenizations[0].tokens.map(token => token.exampleInput), rawTokens); // if(!newContextMatch.final.tokenization.alignment.canAlign) { // assert.fail("context alignment failed"); @@ -246,18 +250,19 @@ describe('ContextState', () => { let baseState = new ContextState(existingContext, plainModel); let newContextMatch = baseState.analyzeTransition(existingContext, toWrapperDistribution(transform)); assert.isNotNull(newContextMatch?.final); - assert.deepEqual(newContextMatch.final.tokenization.tokens.map(token => token.exampleInput), rawTokens); + assert.deepEqual(newContextMatch.final.tokenizations[0].tokens.map(token => token.exampleInput), rawTokens); // We want to preserve the added whitespace when predicting a token that follows after it. - assert.deepEqual(newContextMatch.final.tokenization.taillessTrueKeystroke, { insert: ' ', deleteLeft: 0 }); + assert.deepEqual(newContextMatch.final.tokenizations[0].taillessTrueKeystroke, { insert: ' ', deleteLeft: 0 }); // The 'wordbreak' transform let state = newContextMatch?.final; // space transform - assert.equal(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchSpace.inputCount, 1); + const finalTokenization = state.tokenizations[0]; + assert.equal(finalTokenization.tokens[finalTokenization.tokens.length - 2].searchSpace.inputCount, 1); // empty transform - assert.equal(state.tokenization.tokens[state.tokenization.tokens.length - 1].searchSpace.inputCount, 1); - assert.isTrue(state.tokenization.tail.searchSpace instanceof SearchPath); - assert.deepEqual((state.tokenization.tail.searchSpace as SearchPath).lastInput, [{sample: { insert: '', deleteLeft: 0 }, p: 1}]); + assert.equal(finalTokenization.tokens[finalTokenization.tokens.length - 1].searchSpace.inputCount, 1); + assert.isTrue(finalTokenization.tail.searchSpace instanceof SearchPath); + assert.deepEqual((finalTokenization.tail.searchSpace as SearchPath).lastInput, [{sample: { insert: '', deleteLeft: 0 }, p: 1}]); }); it("properly matches and aligns when whitespace before final empty token is extended", function() { @@ -273,19 +278,19 @@ describe('ContextState', () => { let baseState = new ContextState(existingContext, plainModel); let newContextMatch = baseState.analyzeTransition(existingContext, toWrapperDistribution(transform)); assert.isNotNull(newContextMatch?.final); - assert.deepEqual(newContextMatch.final.tokenization.tokens.map(token => token.exampleInput), rawTokens); + const finalTokenization = newContextMatch.final.tokenizations[0]; + assert.deepEqual(finalTokenization.tokens.map(token => token.exampleInput), rawTokens); // We want to preserve the added whitespace when predicting a token that follows after it. - assert.deepEqual(newContextMatch.final.tokenization.taillessTrueKeystroke, { insert: ' ', deleteLeft: 0 }); + assert.deepEqual(finalTokenization.taillessTrueKeystroke, { insert: ' ', deleteLeft: 0 }); // The 'wordbreak' transform - let state = newContextMatch?.final; // Two whitespaces, one of which is new! - const preTail = state.tokenization.tokens[state.tokenization.tokens.length - 2]; + const preTail = finalTokenization.tokens[finalTokenization.tokens.length - 2]; assert.equal(preTail.searchSpace.inputCount, 2); assert.deepEqual((preTail.searchSpace.parents[0] as SearchPath).lastInput, [{sample: transform, p: 1}]); - assert.equal(state.tokenization.tail.searchSpace.inputCount, 1); - assert.isTrue(state.tokenization.tail.searchSpace instanceof SearchPath); - assert.deepEqual((state.tokenization.tail.searchSpace as SearchPath).lastInput, [{sample: { insert: '', deleteLeft: 0 }, p: 1}]); + assert.equal(finalTokenization.tail.searchSpace.inputCount, 1); + assert.isTrue(finalTokenization.tail.searchSpace instanceof SearchPath); + assert.deepEqual((finalTokenization.tail.searchSpace as SearchPath).lastInput, [{sample: { insert: '', deleteLeft: 0 }, p: 1}]); }); it("properly matches and aligns when a 'wordbreak' is removed via backspace", function() { @@ -301,7 +306,7 @@ describe('ContextState', () => { let baseState = new ContextState(existingContext, plainModel); let newContextMatch = baseState.analyzeTransition(existingContext, toWrapperDistribution(transform)); assert.isOk(newContextMatch?.final); - assert.deepEqual(newContextMatch?.final.tokenization.tokens.map(token => token.exampleInput), rawTokens); + assert.deepEqual(newContextMatch?.final.tokenizations[0].tokens.map(token => token.exampleInput), rawTokens); }); it("properly matches and aligns when an implied 'wordbreak' occurs (as when following \"'\")", function() { @@ -317,13 +322,14 @@ describe('ContextState', () => { let baseState = new ContextState(existingContext, plainModel); let newContextMatch = baseState.analyzeTransition(existingContext, toWrapperDistribution(transform)); assert.isNotNull(newContextMatch?.final); - assert.deepEqual(newContextMatch.final.tokenization.tokens.map(token => token.exampleInput), rawTokens); - assert.deepEqual(newContextMatch.final.tokenization.taillessTrueKeystroke, { insert: '', deleteLeft: 0 }); + assert.equal(newContextMatch.final.tokenizations.length, 1); + const finalTokenization = newContextMatch.final.tokenizations[0]; + assert.deepEqual(finalTokenization.tokens.map(token => token.exampleInput), rawTokens); + assert.deepEqual(finalTokenization.taillessTrueKeystroke, { insert: '', deleteLeft: 0 }); // The 'wordbreak' transform - let state = newContextMatch.final; - assert.equal(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchSpace.inputCount, 1); - assert.equal(state.tokenization.tokens[state.tokenization.tokens.length - 1].searchSpace.inputCount, 1); + assert.equal(finalTokenization.tokens[finalTokenization.tokens.length - 2].searchSpace.inputCount, 1); + assert.equal(finalTokenization.tokens[finalTokenization.tokens.length - 1].searchSpace.inputCount, 1); }) // Needs improved context-state management (due to 2x tokens) @@ -343,22 +349,17 @@ describe('ContextState', () => { let baseState = new ContextState(existingContext, plainModel); let newContextMatch = baseState.analyzeTransition(newContext, toWrapperDistribution(transform)); assert.isNotNull(newContextMatch?.final); - assert.deepEqual(newContextMatch.final.tokenization.tokens.map(token => token.exampleInput), rawTokens); + assert.equal(newContextMatch.final.tokenizations.length, 1); + const finalTokenization = newContextMatch.final.tokenizations[0]; + assert.deepEqual(finalTokenization.tokens.map(token => token.exampleInput), rawTokens); // We want to preserve the added whitespace when predicting a token that follows after it. - assert.deepEqual(newContextMatch.final.tokenization.taillessTrueKeystroke, { insert: ' ', deleteLeft: 0 }); + assert.deepEqual(finalTokenization.taillessTrueKeystroke, { insert: ' ', deleteLeft: 0 }); // The 'wordbreak' transform - let state = newContextMatch.final; - assert.equal(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchSpace.inputCount, 1); + assert.equal(finalTokenization.tokens[finalTokenization.tokens.length - 2].searchSpace.inputCount, 1); assert.equal( - state.tokenization.tokens[state.tokenization.tokens.length - 1].searchSpace.inputCount, 1 + finalTokenization.tokens[finalTokenization.tokens.length - 1].searchSpace.inputCount, 1 ); - - // if(!newContextMatch.final.tokenization.alignment.canAlign) { - // assert.fail("context alignment failed"); - // } - // assert.equal(newContextMatch.final.tokenization.alignment.leadTokenShift, -2); - // assert.equal(newContextMatch.final.tokenization.alignment.tailTokenShift, 2); }); it("properly matches and aligns when initial token is modified AND a 'wordbreak' is added'", function() { @@ -374,15 +375,16 @@ describe('ContextState', () => { let baseState = new ContextState(existingContext, plainModel); let newContextMatch = baseState.analyzeTransition(existingContext, [{sample: transform, p: 1}]); assert.isNotNull(newContextMatch?.final); - assert.deepEqual(newContextMatch.final.tokenization.tokens.map(token => token.exampleInput), rawTokens); + assert.equal(newContextMatch.final.tokenizations.length, 1); + const finalTokenization = newContextMatch.final.tokenizations[0]; + assert.deepEqual(finalTokenization.tokens.map(token => token.exampleInput), rawTokens); // We want to preserve all text preceding the new token when applying a suggestion. - assert.deepEqual(newContextMatch.final.tokenization.taillessTrueKeystroke, { insert: 'd ', deleteLeft: 0}); + assert.deepEqual(finalTokenization.taillessTrueKeystroke, { insert: 'd ', deleteLeft: 0}); // The 'wordbreak' transform - let state = newContextMatch.final; - assert.equal(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchSpace.inputCount, 1); + assert.equal(finalTokenization.tokens[finalTokenization.tokens.length - 2].searchSpace.inputCount, 1); assert.equal( - state.tokenization.tokens[state.tokenization.tokens.length - 1].searchSpace.inputCount, 1 + finalTokenization.tokens[finalTokenization.tokens.length - 1].searchSpace.inputCount, 1 ); }); @@ -399,14 +401,15 @@ describe('ContextState', () => { let baseState = new ContextState(existingContext, plainModel); let newContextMatch = baseState.analyzeTransition(existingContext, [{sample: transform, p: 1}]); assert.isNotNull(newContextMatch?.final); - assert.deepEqual(newContextMatch.final.tokenization.tokens.map(token => token.exampleInput), rawTokens); + assert.equal(newContextMatch.final.tokenizations.length, 1); + const finalTokenization = newContextMatch.final.tokenizations[0]; + assert.deepEqual(finalTokenization.tokens.map(token => token.exampleInput), rawTokens); // We want to preserve all text preceding the new token when applying a suggestion. - assert.deepEqual(newContextMatch.final.tokenization.taillessTrueKeystroke, { insert: 'tor ', deleteLeft: 0 }); + assert.deepEqual(finalTokenization.taillessTrueKeystroke, { insert: 'tor ', deleteLeft: 0 }); // The 'wordbreak' transform - let state = newContextMatch.final; - assert.equal(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchSpace.inputCount, 1); - assert.equal(state.tokenization.tokens[state.tokenization.tokens.length - 1].searchSpace.inputCount, 1); + assert.equal(finalTokenization.tokens[finalTokenization.tokens.length - 2].searchSpace.inputCount, 1); + assert.equal(finalTokenization.tokens[finalTokenization.tokens.length - 1].searchSpace.inputCount, 1); }); it.skip('handles case where tail token is split into three rather than two', function() { @@ -432,7 +435,7 @@ describe('ContextState', () => { let problemContextMatch = baseState.analyzeTransition({left: "text'", startOfBuffer: true, endOfBuffer: true}, [{sample: transform, p: 1}]); assert.isNotNull(problemContextMatch); - assert.deepEqual(problemContextMatch.final.tokenization.exampleInput, ['text', '\'', '"']); + assert.deepEqual(problemContextMatch.final.tokenizations[0].exampleInput, ['text', '\'', '"']); }); }); }); diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts index c9338228e26..fcb2cbabab2 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-token.tests.ts @@ -121,36 +121,45 @@ describe('ContextToken', function() { const srcTransform = { insert: "can't", deleteLeft: 0, deleteRight: 0, id: 1 }; const srcSubsetId = generateSubsetId(); - const token1 = new ContextToken(plainModel); - const token2 = new ContextToken(plainModel); - const token3 = new ContextToken(plainModel); + let token1 = new ContextToken(plainModel); + let token2 = new ContextToken(plainModel); + let token3 = new ContextToken(plainModel); - token1.addInput({ - segment: { - transitionId: srcTransform.id, - start: 0 - }, - bestProbFromSet: 1, - subsetId: srcSubsetId - }, [{sample: {insert: 'can', deleteLeft: 0, deleteRight: 0, id: 1}, p: 1}]); - - token2.addInput({ - segment: { - transitionId: srcTransform.id, - start: 3 - }, - bestProbFromSet: 1, - subsetId: srcSubsetId - }, [{sample: {insert: "'", deleteLeft: 0, deleteRight: 0, id: 1}, p: 1}]); - - token3.addInput({ - segment: { - transitionId: srcTransform.id, - start: 4 - }, - bestProbFromSet: 1, - subsetId: srcSubsetId - }, [{sample: {insert: 't', deleteLeft: 0, deleteRight: 0, id: 1}, p: 1}]); + token1 = new ContextToken(new SearchPath( + token1.searchSpace, + [{sample: {insert: 'can', deleteLeft: 0, deleteRight: 0, id: 1}, p: 1}], { + segment: { + transitionId: srcTransform.id, + start: 0 + }, + bestProbFromSet: 1, + subsetId: srcSubsetId + } + )); + + token2 = new ContextToken(new SearchPath( + token2.searchSpace, + [{sample: {insert: "'", deleteLeft: 0, deleteRight: 0, id: 1}, p: 1}], { + segment: { + transitionId: srcTransform.id, + start: 3 + }, + bestProbFromSet: 1, + subsetId: srcSubsetId + } + )); + + token3 = new ContextToken(new SearchPath( + token3.searchSpace, + [{sample: {insert: 't', deleteLeft: 0, deleteRight: 0, id: 1}, p: 1}], { + segment: { + transitionId: srcTransform.id, + start: 4 + }, + bestProbFromSet: 1, + subsetId: srcSubsetId + } + )); const merged = ContextToken.merge([token1, token2, token3]); assert.equal(merged.exampleInput, "can't"); @@ -179,67 +188,85 @@ describe('ContextToken', function() { ]; // apples - const token1 = new ContextToken(plainModel); + let token1 = new ContextToken(plainModel); // and - const token2 = new ContextToken(plainModel); + let token2 = new ContextToken(plainModel); // sour - const token3 = new ContextToken(plainModel); + let token3 = new ContextToken(plainModel); // grapes - const token4 = new ContextToken(plainModel); - const tokensToMerge = [token1, token2, token3, token4] + let token4 = new ContextToken(plainModel); - token1.addInput({ - segment: { - transitionId: srcTransforms[0].id, - start: 0 - }, - bestProbFromSet: 1, - subsetId: srcSubsetIds[0] - }, [{sample: srcTransforms[0], p: 1}]); - token1.addInput({ - segment: { - transitionId: srcTransforms[1].id, - start: 0 - }, - bestProbFromSet: 1, - subsetId: srcSubsetIds[1] - }, [{sample: {insert: 's', deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}]); - - token2.addInput({ - segment: { - transitionId: srcTransforms[1].id, - start: 1 - }, - bestProbFromSet: 1, - subsetId: srcSubsetIds[1] - }, [{sample: {insert: "and", deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}]); - - token3.addInput({ - segment: { - transitionId: srcTransforms[1].id, - start: 4 - }, - bestProbFromSet: 1, - subsetId: srcSubsetIds[1] - }, [{sample: {insert: 's', deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}]); - token3.addInput({ - segment: { - transitionId: srcTransforms[2].id, - start: 0 - }, - bestProbFromSet: 1, - subsetId: srcSubsetIds[2] - }, [{sample: srcTransforms[2], p: 1}]); + token1 = new ContextToken(new SearchPath( + token1.searchSpace, + [{sample: srcTransforms[0], p: 1}], { + segment: { + transitionId: srcTransforms[0].id, + start: 0 + }, + bestProbFromSet: 1, + subsetId: srcSubsetIds[0] + } + )); + token1 = new ContextToken(new SearchPath( + token1.searchSpace, + [{sample: {insert: 's', deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}], { + segment: { + transitionId: srcTransforms[1].id, + start: 0 + }, + bestProbFromSet: 1, + subsetId: srcSubsetIds[1] + } + )); - token4.addInput({ - segment: { - transitionId: srcTransforms[3].id, - start: 0 - }, - bestProbFromSet: 1, - subsetId: srcSubsetIds[3] - }, [{sample: srcTransforms[3], p: 1}]); + token2 = new ContextToken(new SearchPath( + token2.searchSpace, + [{sample: {insert: "and", deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}], { + segment: { + transitionId: srcTransforms[1].id, + start: 1 + }, + bestProbFromSet: 1, + subsetId: srcSubsetIds[1] + } + )); + token3 = new ContextToken(new SearchPath( + token3.searchSpace, + [{sample: {insert: 's', deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}], { + segment: { + transitionId: srcTransforms[1].id, + start: 4 + }, + bestProbFromSet: 1, + subsetId: srcSubsetIds[1] + } + )); + token3 = new ContextToken(new SearchPath( + token3.searchSpace, + [{sample: srcTransforms[2], p: 1}], { + segment: { + transitionId: srcTransforms[2].id, + start: 0 + }, + bestProbFromSet: 1, + subsetId: srcSubsetIds[2] + } + )); + + token4 = new ContextToken(new SearchPath( + token4.searchSpace, + [{sample: srcTransforms[3], p: 1}], { + segment: { + transitionId: srcTransforms[3].id, + start: 0 + }, + bestProbFromSet: 1, + subsetId: srcSubsetIds[3] + } + )); + + const tokensToMerge = [token1, token2, token3, token4]; const merged = ContextToken.merge(tokensToMerge); assert.equal(merged.exampleInput, "applesandsourgrapes"); assert.deepEqual(merged.inputSegments, srcTransforms.map((t, i) => ({ @@ -267,68 +294,86 @@ describe('ContextToken', function() { generateSubsetId() ]; - // apples - const token1 = new ContextToken(plainModel); + // apples + let token1 = new ContextToken(plainModel); // and - const token2 = new ContextToken(plainModel); + let token2 = new ContextToken(plainModel); // sour - const token3 = new ContextToken(plainModel); + let token3 = new ContextToken(plainModel); // grapes - const token4 = new ContextToken(plainModel); - const tokensToMerge = [token1, token2, token3, token4] + let token4 = new ContextToken(plainModel); - token1.addInput({ - segment: { - transitionId: srcTransforms[0].id, - start: 0 - }, - bestProbFromSet: 1, - subsetId: srcSubsetIds[0] - }, [{sample: srcTransforms[0], p: 1}]); - token1.addInput({ - segment: { - transitionId: srcTransforms[1].id, - start: 0 - }, - bestProbFromSet: 1, - subsetId: srcSubsetIds[1] - }, [{sample: {insert: toMathematicalSMP('s'), deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}]); - - token2.addInput({ - segment: { - transitionId: srcTransforms[1].id, - start: 1 - }, - bestProbFromSet: 1, - subsetId: srcSubsetIds[1] - }, [{sample: {insert: toMathematicalSMP("and"), deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}]); - - token3.addInput({ - segment: { - transitionId: srcTransforms[1].id, - start: 4 - }, - bestProbFromSet: 1, - subsetId: srcSubsetIds[1] - }, [{sample: {insert: toMathematicalSMP('s'), deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}]); - token3.addInput({ - segment: { - transitionId: srcTransforms[2].id, - start: 0 - }, - bestProbFromSet: 1, - subsetId: srcSubsetIds[2] - }, [{sample: srcTransforms[2], p: 1}]); + token1 = new ContextToken(new SearchPath( + token1.searchSpace, + [{sample: srcTransforms[0], p: 1}], { + segment: { + transitionId: srcTransforms[0].id, + start: 0 + }, + bestProbFromSet: 1, + subsetId: srcSubsetIds[0] + } + )); + token1 = new ContextToken(new SearchPath( + token1.searchSpace, + [{sample: {insert: toMathematicalSMP('s'), deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}], { + segment: { + transitionId: srcTransforms[1].id, + start: 0 + }, + bestProbFromSet: 1, + subsetId: srcSubsetIds[1] + } + )); - token4.addInput({ - segment: { - transitionId: srcTransforms[3].id, - start: 0 - }, - bestProbFromSet: 1, - subsetId: srcSubsetIds[3] - }, [{sample: srcTransforms[3], p: 1}]); + token2 = new ContextToken(new SearchPath( + token2.searchSpace, + [{sample: {insert: toMathematicalSMP("and"), deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}], { + segment: { + transitionId: srcTransforms[1].id, + start: 1 + }, + bestProbFromSet: 1, + subsetId: srcSubsetIds[1] + } + )); + + token3 = new ContextToken(new SearchPath( + token3.searchSpace, + [{sample: {insert: toMathematicalSMP('s'), deleteLeft: 0, deleteRight: 0, id: 2}, p: 1}], { + segment: { + transitionId: srcTransforms[1].id, + start: 4 + }, + bestProbFromSet: 1, + subsetId: srcSubsetIds[1] + } + )); + token3 = new ContextToken(new SearchPath( + token3.searchSpace, + [{sample: srcTransforms[2], p: 1}], { + segment: { + transitionId: srcTransforms[2].id, + start: 0 + }, + bestProbFromSet: 1, + subsetId: srcSubsetIds[2] + } + )); + token4 = new ContextToken(new SearchPath( + token4.searchSpace, + [{sample: srcTransforms[3], p: 1}], { + segment: { + transitionId: srcTransforms[3].id, + start: 0 + }, + bestProbFromSet: 1, + subsetId: srcSubsetIds[3] + } + )); + + const tokensToMerge = [token1, token2, token3, token4]; const merged = ContextToken.merge(tokensToMerge); assert.equal(merged.exampleInput, toMathematicalSMP("applesandsourgrapes")); assert.deepEqual(merged.inputSegments, srcTransforms.map((t, i) => ({ @@ -363,15 +408,18 @@ describe('ContextToken', function() { ] ] - const tokenToSplit = new ContextToken(plainModel); + let tokenToSplit = new ContextToken(plainModel); for(let i = 0; i < keystrokeDistributions.length; i++) { - tokenToSplit.addInput({ - segment: { - transitionId: keystrokeDistributions[i][0].sample.id, - start: 0 - }, bestProbFromSet: .75, - subsetId: generateSubsetId() - }, keystrokeDistributions[i]); + tokenToSplit = new ContextToken(new SearchPath( + tokenToSplit.searchSpace, + keystrokeDistributions[i], { + segment: { + transitionId: keystrokeDistributions[i][0].sample.id, + start: 0 + }, bestProbFromSet: .75, + subsetId: generateSubsetId() + } + )); }; tokenToSplit.searchSpace.hasInputs(keystrokeDistributions); @@ -405,16 +453,19 @@ describe('ContextToken', function() { const splitTextArray = ['big', 'large', 'transform']; const subsetId = generateSubsetId(); - const tokenToSplit = new ContextToken(plainModel); + let tokenToSplit = new ContextToken(plainModel); for(let i = 0; i < keystrokeDistributions.length; i++) { - tokenToSplit.addInput({ - segment: { - transitionId: keystrokeDistributions[i][0].sample.id, - start: 0 - }, - bestProbFromSet: 1, - subsetId - }, keystrokeDistributions[i]); + tokenToSplit = new ContextToken(new SearchPath( + tokenToSplit.searchSpace, + keystrokeDistributions[i], { + segment: { + transitionId: keystrokeDistributions[i][0].sample.id, + start: 0 + }, + bestProbFromSet: 1, + subsetId + } + )); }; assert.isTrue(tokenToSplit.searchSpace.hasInputs(keystrokeDistributions)); @@ -474,16 +525,19 @@ describe('ContextToken', function() { generateSubsetId() ]; - const tokenToSplit = new ContextToken(plainModel); + let tokenToSplit = new ContextToken(plainModel); for(let i = 0; i < keystrokeDistributions.length; i++) { - tokenToSplit.addInput({ - segment: { - transitionId: keystrokeDistributions[i][0].sample.id, - start: 0 - }, - bestProbFromSet: 1, - subsetId: subsetIds[i] - }, keystrokeDistributions[i]); + tokenToSplit = new ContextToken(new SearchPath( + tokenToSplit.searchSpace, + keystrokeDistributions[i], { + segment: { + transitionId: keystrokeDistributions[i][0].sample.id, + start: 0 + }, + bestProbFromSet: 1, + subsetId: subsetIds[i] + } + )); }; assert.equal(tokenToSplit.exampleInput, 'largelongtransforms'); @@ -595,16 +649,19 @@ describe('ContextToken', function() { generateSubsetId() ]; - const tokenToSplit = new ContextToken(plainModel); + let tokenToSplit = new ContextToken(plainModel); for(let i = 0; i < keystrokeDistributions.length; i++) { - tokenToSplit.addInput({ - segment: { - transitionId: keystrokeDistributions[i][0].sample.id, - start: 0 - }, - bestProbFromSet: 1, - subsetId: subsetIds[i] - }, keystrokeDistributions[i]); + tokenToSplit = new ContextToken(new SearchPath( + tokenToSplit.searchSpace, + keystrokeDistributions[i], { + segment: { + transitionId: keystrokeDistributions[i][0].sample.id, + start: 0 + }, + bestProbFromSet: 1, + subsetId: subsetIds[i] + } + )); }; assert.equal(tokenToSplit.exampleInput, toMathematicalSMP('largelongtransforms')); diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-tokenization.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-tokenization.tests.ts index f12f5b01499..52ad44bd4de 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-tokenization.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-tokenization.tests.ts @@ -35,14 +35,10 @@ function toTransformToken(text: string, transformId?: number) { let idSeed = transformId === undefined ? TOKEN_TRANSFORM_SEED++ : transformId; let isWhitespace = text == ' '; let token = new ContextToken(plainModel); - const textAsTransform = { insert: text, deleteLeft: 0, id: idSeed }; - token.addInput({ - segment: { - transitionId: textAsTransform.id, - start: 0 - }, bestProbFromSet: 1, - subsetId: generateSubsetId() - }, [ { sample: textAsTransform, p: 1 } ]); + const textAsDist = [{sample: { insert: text, deleteLeft: 0, id: idSeed }, p: 1}]; + const space = new SearchPath(token.searchSpace, textAsDist, textAsDist[0]); + + token = new ContextToken(space); token.isWhitespace = isWhitespace; return token; } @@ -155,14 +151,6 @@ describe('ContextTokenization', function() { baseTokenization.tokens.map((token) => token.searchSpace) ); - // The `.searchSpace` instances will not be deep-equal; there are class properties - // that hold functions with closures, configured at runtime. - - // @ts-ignore - TS2704 b/c deleting a readonly property. - baseTokenization.tokens.forEach((token) => delete token.searchSpace); - // @ts-ignore - TS2704 b/c deleting a readonly property. - cloned.tokens.forEach((token) => delete token.searchSpace); - assert.deepEqual(cloned, baseTokenization); }); }); diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-tracker.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-tracker.tests.ts index f27ebd72f7d..accbd5e8716 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-tracker.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-tracker.tests.ts @@ -76,10 +76,11 @@ describe('ContextTracker', function() { assert.equal(postContextMatch.final.appliedSuggestionId, baseSuggestion.id); // Penultimate token corresponds to whitespace, which does not have a 'raw' representation. - assert.equal(postContextMatch.final.tokenization.tokens[postContextMatch.final.tokenization.tokens.length - 2].exampleInput, ' '); + assert.equal(postContextMatch.final.tokenizations.length, 1); + assert.equal(postContextMatch.final.tokenizations[0].tokens[postContextMatch.final.tokenizations[0].tokens.length - 2].exampleInput, ' '); // Final token is empty (follows a wordbreak) - assert.equal(postContextMatch.final.tokenization.tail.exampleInput, ''); + assert.equal(postContextMatch.final.tokenizations[0].tail.exampleInput, ''); }); }); }); \ No newline at end of file diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-transition.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-transition.tests.ts index 6185069c7b3..5cb19d124c2 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-transition.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/context-transition.tests.ts @@ -25,9 +25,12 @@ var plainModel = new TrieModel(jsonFixture('models/tries/english-1000'), function assertClonedStateMatch(a: ContextState, b: ContextState) { assert.notEqual(a, b); - assert.notEqual(a.tokenization, b.tokenization); - assert.notSameOrderedMembers(a.tokenization.tokens, b.tokenization.tokens); - assert.sameOrderedMembers(a.tokenization.exampleInput, b.tokenization.exampleInput); + assert.notEqual(a.tokenizations, b.tokenizations); + assert.equal(a.tokenizations.length, b.tokenizations.length); + for(let i = 0; i < a.tokenizations.length; i++) { + assert.notSameOrderedMembers(a.tokenizations[i].tokens, b.tokenizations[i].tokens); + assert.sameOrderedMembers(a.tokenizations[i].exampleInput, b.tokenizations[i].exampleInput); + } assert.deepEqual(a.suggestions, b.suggestions); } @@ -50,7 +53,7 @@ describe('ContextTransition', () => { const transition = new ContextTransition(baseState, 1); assert.sameOrderedMembers( - transition.base.tokenization.tokens.map((t) => t.exampleInput), + transition.base.tokenizations[0].tokens.map((t) => t.exampleInput), ['hello', ' ', 'world', ' ', ''] ); assert.equal(transition.transitionId, 1); @@ -67,7 +70,7 @@ describe('ContextTransition', () => { const transition = new ContextTransition(baseState, 1); assert.sameOrderedMembers( - transition.base.tokenization.tokens.map((t) => t.exampleInput), + transition.base.tokenizations[0].tokens.map((t) => t.exampleInput), ['hello', ' ', 'world', ' ', ''] ); @@ -143,17 +146,19 @@ describe('ContextTransition', () => { assert.notEqual(appliedTransition.base, transition); assert.isOk(appliedTransition.appended); assert.notEqual(appliedTransition.appended, transition); - assert.sameOrderedMembers(appliedTransition.base.final.tokenization.exampleInput, [ + assert.equal(appliedTransition.base.final.tokenizations.length, 1); + assert.sameOrderedMembers(appliedTransition.base.final.tokenizations[0].exampleInput, [ 'hello', ' ', 'world' ]); - assert.sameOrderedMembers(appliedTransition.appended.final.tokenization.exampleInput, [ + assert.equal(appliedTransition.appended.final.tokenizations.length, 1); + assert.sameOrderedMembers(appliedTransition.appended.final.tokenizations[0].exampleInput, [ 'hello', ' ', 'world', ' ', '' ]); assert.equal(appliedTransition.base.final.appliedSuggestionId, suggestions[0].id); assert.equal(appliedTransition.appended.final.appliedSuggestionId, suggestions[0].id); // 3 long, only last token was edited. - appliedTransition.base.final.tokenization.tokens.forEach((token, index) => { + appliedTransition.base.final.tokenizations[0].tokens.forEach((token, index) => { if(index >= 2) { assert.equal(token.appliedTransitionId, suggestions[0].transformId); } else { @@ -161,7 +166,7 @@ describe('ContextTransition', () => { } }); - appliedTransition.appended.final.tokenization.tokens.forEach((token, index) => { + appliedTransition.appended.final.tokenizations[0].tokens.forEach((token, index) => { if(index >= 2) { assert.equal(token.appliedTransitionId, suggestions[0].transformId); } else { @@ -227,17 +232,19 @@ describe('ContextTransition', () => { assert.notEqual(appliedTransition.base, transition); assert.isOk(appliedTransition.appended); assert.notEqual(appliedTransition.appended, transition); - assert.sameOrderedMembers(appliedTransition.base.final.tokenization.exampleInput, [ + assert.equal(appliedTransition.base.final.tokenizations.length, 1); + assert.sameOrderedMembers(appliedTransition.base.final.tokenizations[0].exampleInput, [ 'hello', ' ', 'world', ' ', 'the' ]); - assert.sameOrderedMembers(appliedTransition.appended.final.tokenization.exampleInput, [ + assert.equal(appliedTransition.appended.final.tokenizations.length, 1); + assert.sameOrderedMembers(appliedTransition.appended.final.tokenizations[0].exampleInput, [ 'hello', ' ', 'world', ' ', 'the', ' ', '' ]); assert.equal(appliedTransition.base.final.appliedSuggestionId, suggestions[0].id); assert.equal(appliedTransition.appended.final.appliedSuggestionId, suggestions[0].id); // 3 long, only last token was edited. - appliedTransition.base.final.tokenization.tokens.forEach((token, index) => { + appliedTransition.base.final.tokenizations[0].tokens.forEach((token, index) => { if(index >= 3) { assert.equal(token.appliedTransitionId, suggestions[0].transformId); } else { @@ -245,7 +252,7 @@ describe('ContextTransition', () => { } }); - appliedTransition.appended.final.tokenization.tokens.forEach((token, index) => { + appliedTransition.appended.final.tokenizations[0].tokens.forEach((token, index) => { if(index >= 3) { assert.equal(token.appliedTransitionId, suggestions[0].transformId); } else { diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/tokenization-subsets.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/tokenization-subsets.tests.ts index 3882a76b440..07927448d57 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/tokenization-subsets.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/context/tokenization-subsets.tests.ts @@ -16,7 +16,7 @@ import { LexicalModelTypes } from '@keymanapp/common-types'; import { deepCopy } from '@keymanapp/web-utils'; import { jsonFixture } from '@keymanapp/common-test-resources/model-helpers.mjs'; -import { buildEdgeWindow, ContextToken, ContextTokenization, models, precomputationSubsetKeyer, TokenizationTransitionEdits, TokenizationSubsetBuilder, generateSubsetId } from '@keymanapp/lm-worker/test-index'; +import { buildEdgeWindow, ContextToken, ContextTokenization, models, precomputationSubsetKeyer, TokenizationTransitionEdits, TokenizationSubsetBuilder, SearchPath } from '@keymanapp/lm-worker/test-index'; import Distribution = LexicalModelTypes.Distribution; import Transform = LexicalModelTypes.Transform; @@ -172,16 +172,11 @@ describe('precomputationSubsetKeyer', function() { [...tokenization.tokens, (() => { const token = new ContextToken(plainModel, 'da'); // source text: 'date' - token.addInput({ - segment: { - transitionId: 13, - start: 0 - }, bestProbFromSet: 1, - subsetId: generateSubsetId() - }, [ + const dist = [ {sample: {insert: 'te', deleteLeft: 0, id: 13}, p: 1} - ]); - return token; + ]; + const space = new SearchPath(token.searchSpace, dist, dist[0]); + return new ContextToken(space); })()], { insert: 's', deleteLeft: 0, deleteRight: 0 }, false @@ -204,16 +199,11 @@ describe('precomputationSubsetKeyer', function() { [...tokenization.tokens, (() => { const token = new ContextToken(plainModel, 'da'); // source text: 'date' - token.addInput({ - segment: { - transitionId: 13, - start: 0 - }, bestProbFromSet: 1, - subsetId: generateSubsetId() - }, [ - {sample: {insert: 't', deleteLeft: 0}, p: 1} - ]); - return token; + const dist = [ + {sample: {insert: 't', deleteLeft: 0, id: 13}, p: 1} + ]; + const space = new SearchPath(token.searchSpace, dist, dist[0]); + return new ContextToken(space); })()], { insert: 'es', deleteLeft: 0, deleteRight: 0, id: 14 }, false @@ -248,17 +238,15 @@ describe('precomputationSubsetKeyer', function() { ...buildEdgeWindow( [...tokenization.tokens, (() => { const token = new ContextToken(plainModel, 'da'); - token.isPartial = true; // source text: 'dat' - token.addInput({ - segment: { - transitionId: 13, - start: 0 - }, bestProbFromSet: 1, - subsetId: generateSubsetId() - }, [{sample: {insert: 'ts', deleteLeft: 0, id: 13}, p: 1} - ]); - return token; + const dist = [ + {sample: {insert: 'ts', deleteLeft: 0, id: 13}, p: 1} + ]; + const space = new SearchPath(token.searchSpace, dist, dist[0]); + let token2 = new ContextToken(space); + token2.isPartial = true; + + return token2; })()], { insert: 'e', deleteLeft: 1, deleteRight: 0, id: 14 }, false @@ -280,18 +268,15 @@ describe('precomputationSubsetKeyer', function() { ...buildEdgeWindow( [...tokenization.tokens, (() => { const token = new ContextToken(plainModel, 'da'); - token.isPartial = true; // source text: 'dat' - token.addInput({ - segment: { - transitionId: 13, - start: 0 - }, bestProbFromSet: 1, - subsetId: generateSubsetId() - }, [ + const dist = [ {sample: {insert: 't', deleteLeft: 0, id: 13}, p: 1} - ]); - return token; + ]; + const space = new SearchPath(token.searchSpace, dist, dist[0]); + let token2 = new ContextToken(space); + token2.isPartial = true; + + return token2; })()], { insert: 'e', deleteLeft: 0, deleteRight: 0, id: 14 }, false @@ -738,27 +723,25 @@ describe('TokenizationSubsetBuilder', function() { const trueSourceTransform: Transform = { insert: 'é', deleteLeft: 1, id: 13 }; - const fourCharTailToken = new ContextToken(baseTokenization.tail); - fourCharTailToken.addInput({ - segment: { - transitionId: 13, - start: 0 - }, bestProbFromSet: 1, - subsetId: generateSubsetId() - }, [ - { sample: trueSourceTransform, p: .6 } - ]); - - const fiveCharTailToken = new ContextToken(baseTokenization.tail); - fiveCharTailToken.addInput({ - segment: { - transitionId: 13, - start: 0 - }, bestProbFromSet: 1, - subsetId: generateSubsetId() - }, [ + let fourCharTailToken = new ContextToken(baseTokenization.tail); + let fourCharTailDist = [{sample: trueSourceTransform, p: .6}]; + let fourCharTailSpace = new SearchPath( + fourCharTailToken.searchSpace, + fourCharTailDist, + fourCharTailDist[0] + ); + fourCharTailToken = new ContextToken(fourCharTailSpace); + + let fiveCharTailToken = new ContextToken(baseTokenization.tail); + let fiveCharTailDist = [ { sample: { insert: 's', deleteLeft: 0, id: 13 }, p: .4 } - ]); + ]; + let fiveCharTailSpace = new SearchPath( + fiveCharTailToken.searchSpace, + fiveCharTailDist, + fiveCharTailDist[0] + ); + fiveCharTailToken = new ContextToken(fiveCharTailSpace); const subsetBuilder = new TokenizationSubsetBuilder(); const fourCharTokenization = new ContextTokenization([...baseTokenization.tokens.slice(0, -1), fourCharTailToken]); @@ -787,27 +770,25 @@ describe('TokenizationSubsetBuilder', function() { const trueSourceTransform: Transform = { insert: 'é', deleteLeft: 1, id: 13 }; - const twoCharTailToken = new ContextToken(baseTokenization.tail); - twoCharTailToken.addInput({ - segment: { - transitionId: 13, - start: 0 - }, bestProbFromSet: .6, - subsetId: generateSubsetId() - }, [ - { sample: trueSourceTransform, p: .6 } - ]); - - const threeCharTailToken = new ContextToken(baseTokenization.tail); - threeCharTailToken.addInput({ - segment: { - transitionId: 13, - start: 0 - }, bestProbFromSet: .6, - subsetId: generateSubsetId() - }, [ - { sample: { insert: 'a', deleteLeft: 0, id: 13}, p: .4 } - ]); + let twoCharTailToken = new ContextToken(baseTokenization.tail); + let twoCharTailDist = [{sample: trueSourceTransform, p: .6}]; + let twoCharTailSpace = new SearchPath( + twoCharTailToken.searchSpace, + twoCharTailDist, + twoCharTailDist[0] + ); + twoCharTailToken = new ContextToken(twoCharTailSpace); + + let threeCharTailToken = new ContextToken(baseTokenization.tail); + let threeCharTailDist = [ + { sample: { insert: 'a', deleteLeft: 0, id: 13 }, p: .4 } + ]; + let threeCharTailSpace = new SearchPath( + threeCharTailToken.searchSpace, + threeCharTailDist, + threeCharTailDist[0] + ); + threeCharTailToken = new ContextToken(threeCharTailSpace); const subsetBuilder = new TokenizationSubsetBuilder(); const twoCharTokenization = new ContextTokenization([...baseTokenization.tokens.slice(0, -1), twoCharTailToken]); diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/determine-suggestion-alignment.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/determine-suggestion-alignment.tests.ts index be81e711610..d1341bb6af0 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/determine-suggestion-alignment.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/determine-suggestion-alignment.tests.ts @@ -48,7 +48,7 @@ describe('determineSuggestionAlignment', () => { transition.finalize(transition.base, [{sample: { insert: '', deleteLeft: 0 }, p: 1}]); // transition, model - const results = determineSuggestionAlignment(transition, transition.final.tokenization, plainCasedModel); + const results = determineSuggestionAlignment(transition, transition.final.tokenizations[0], plainCasedModel); assert.deepEqual(results.predictionContext, context); assert.equal(results.deleteLeft, "techn".length); @@ -65,7 +65,7 @@ describe('determineSuggestionAlignment', () => { const transition = baseState.analyzeTransition(context, [{sample: { insert: '', deleteLeft: 1 }, p: 1}]) // transition, model - const results = determineSuggestionAlignment(transition, transition.final.tokenization, plainCasedModel); + const results = determineSuggestionAlignment(transition, transition.final.tokenizations[0], plainCasedModel); assert.deepEqual(results.predictionContext, context); assert.equal(results.deleteLeft, "tech".length + 1 /* for the deleted whitespace */); @@ -82,7 +82,7 @@ describe('determineSuggestionAlignment', () => { const transition = baseState.analyzeTransition(context, [{sample: { insert: 'n', deleteLeft: 1 }, p: 1}]) // transition, model - const results = determineSuggestionAlignment(transition, transition.final.tokenization, plainCasedModel); + const results = determineSuggestionAlignment(transition, transition.final.tokenizations[0], plainCasedModel); assert.deepEqual(results.predictionContext, context); assert.equal(results.deleteLeft, "techn".length + 1 /* for the deleted whitespace */); diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/determine-suggestion-context-transition.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/determine-suggestion-context-transition.tests.ts index 1008699b3ac..98fbb964f79 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/determine-suggestion-context-transition.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/determine-suggestion-context-transition.tests.ts @@ -103,12 +103,13 @@ describe('determineContextTransition', () => { assert.isOk(transition); assert.equal(transition, tracker.latest); assert.isFalse(warningEmitterSpy.called); - assert.sameOrderedMembers(transition.final.tokenization.exampleInput, ['this', ' ', 'is', ' ', 'for', ' ', 'techn']); - assert.isOk(transition.final.tokenization.transitionEdits); + assert.equal(transition.final.tokenizations.length, 1); + assert.sameOrderedMembers(transition.final.tokenizations[0].exampleInput, ['this', ' ', 'is', ' ', 'for', ' ', 'techn']); + assert.isOk(transition.final.tokenizations[0].transitionEdits); assert.equal(transition.final.context.left, targetContext.left); assert.equal(transition.final.context.right ?? "", targetContext.right ?? ""); assert.sameDeepOrderedMembers(transition.inputDistribution, inputDistribution); - assert.isNotOk(transition.final.tokenization.taillessTrueKeystroke); + assert.isNotOk(transition.final.tokenizations[0].taillessTrueKeystroke); assert.equal(transition.transitionId, 1); } finally { warningEmitterSpy.restore(); @@ -225,8 +226,9 @@ describe('determineContextTransition', () => { assert.notEqual(extendingTransition, baseTransition); // These values support delayed reversions. - assert.equal(extendingTransition.final.tokenization.tokens[6].appliedTransitionId, pred_testing.transformId); - assert.equal(extendingTransition.final.tokenization.tokens[7].appliedTransitionId, pred_testing.transformId); + assert.equal(extendingTransition.final.tokenizations.length, 1); + assert.equal(extendingTransition.final.tokenizations[0].tokens[6].appliedTransitionId, pred_testing.transformId); + assert.equal(extendingTransition.final.tokenizations[0].tokens[7].appliedTransitionId, pred_testing.transformId); // We start a new token here, rather than continue (and/or replace) an old one; // this shouldn't be set here yet.