Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import { ExecutionTimer, STANDARD_TIME_BETWEEN_DEFERS } from './execution-timer.
import { QUEUE_NODE_COMPARATOR, SearchQuotientSpur } from './search-quotient-spur.js';
import { PathResult } from './search-quotient-node.js';
import { subsetByChar, subsetByInterval, mergeSubset, TransformSubset } from '../transform-subsets.js';
import TransformUtils from '../transformUtils.js';

import Distribution = LexicalModelTypes.Distribution;
import LexiconTraversal = LexicalModelTypes.LexiconTraversal;
Expand All @@ -33,6 +34,13 @@ enum TimedTaskTypes {
CORRECTING = 2
}

enum PathEdge {
ROOT = 'root',
INSERTION = 'insertion',
DELETION = 'deletion',
SUBSTITUTION = 'substitution'
}

/**
* This type models a partially-processed subset of Transforms to be processed
* as a batch due to sharing similar properties.
Expand Down Expand Up @@ -136,9 +144,15 @@ export class SearchNode {
*/
readonly spaceId: number;

/**
* Notes the edit operation used for the most recent edge in the node's
* represented search path.
*/
private readonly lastEdgeType: PathEdge;

constructor(rootTraversal: LexiconTraversal, spaceId: number, toKey?: (arg0: string) => string);
constructor(node: SearchNode, spaceId?: number);
constructor(param1: LexiconTraversal | SearchNode, spaceId?: number, toKey?: ((arg0: string) => string)) {
constructor(node: SearchNode, spaceId?: number, edgeType?: PathEdge);
constructor(param1: LexiconTraversal | SearchNode, spaceId?: number, param2?: PathEdge | ((arg0: string) => string)) {
if(param1 instanceof SearchNode) {
const priorNode = param1;

Expand All @@ -149,6 +163,8 @@ export class SearchNode {
this.priorInput = priorNode.priorInput.slice(0);
this.matchedTraversals = priorNode.matchedTraversals.slice();

this.lastEdgeType = param2 as PathEdge;

// Do NOT copy over _inputCost; this is a helper-constructor for methods
// building new nodes... which will have a different cost.
delete this._inputCost;
Expand All @@ -160,8 +176,10 @@ export class SearchNode {
this.calculation = new ClassicalDistanceCalculation();
this.matchedTraversals = [param1];
this.priorInput = [];
const toKey = param2 as ((arg0: string) => string);
this.toKey = toKey || (x => x);
this.spaceId = spaceId;
this.lastEdgeType = PathEdge.ROOT;
}
}

Expand Down Expand Up @@ -250,12 +268,28 @@ export class SearchNode {
throw new Error("Invalid state: will not take new input while still processing Transform subset");
}

// Do not create insertion nodes after an empty transform; only before.
// "Before" and "after" are identical for empty transforms - why duplicate?
//
// Also, do not create insertion nodes directly after deletions; that's
// essentially a substitution. We'll have an equivalent edge already built
// with higher-accuracy modeling.
//
// Following previous insertions is fine, as is following a proper
// match/substitution.
if(this.priorInput.length > 0) {
const priorInput = this.priorInput[this.priorInput.length - 1].sample;
if(TransformUtils.isEmpty(priorInput) || this.lastEdgeType == PathEdge.DELETION) {
return [];
}
}

let edges: SearchNode[] = [];

for(let lexicalChild of this.currentTraversal.children()) {
let childCalc = this.calculation.addMatchChar(lexicalChild.char);

let searchChild = new SearchNode(this);
let searchChild = new SearchNode(this, this.spaceId, PathEdge.INSERTION);
searchChild.calculation = childCalc;
searchChild.priorInput = this.priorInput;
searchChild.matchedTraversals.push(lexicalChild.traversal());
Expand Down Expand Up @@ -367,7 +401,7 @@ export class SearchNode {

keySet.add(char);

const node = new SearchNode(this);
const node = new SearchNode(this, this.spaceId, this.lastEdgeType);
node.calculation = calculation;
node.partialEdge.subsetSubindex++;
// Append the newly-processed char to the subset's `insert` string.
Expand Down Expand Up @@ -415,7 +449,7 @@ export class SearchNode {
// `insert` string.
childSubset.insert += SENTINEL_CODE_UNIT;

const node = new SearchNode(this);
const node = new SearchNode(this, this.spaceId, this.lastEdgeType);
node.calculation = childCalc;
node.matchedTraversals.push(childTraversal);
node.partialEdge.subsetSubindex++;
Expand Down Expand Up @@ -464,7 +498,7 @@ export class SearchNode {
continue;
}

const node = new SearchNode(this, edgeId);
const node = new SearchNode(this, edgeId, isSubstitution ? PathEdge.SUBSTITUTION : PathEdge.DELETION);
node.calculation = edgeCalc;
node.partialEdge = {
doSubsetMatching: isSubstitution,
Expand Down Expand Up @@ -514,31 +548,6 @@ export class SearchNode {
return this.setupSubsetProcessing(dist, true, edgeId);
}

/**
* A key uniquely identifying identical search path nodes. Replacement of a keystroke's
* text in a manner that results in identical path to a different keystroke should result
* in both path nodes sharing the same pathKey value.
*
* This mostly matters after re-use of a SearchSpace when a token is extended; children of
* completed paths are possible, and so children can be re-built in such a case.
*/
get pathKey(): string {
// Note: deleteLefts apply before inserts, so order them that way.
let inputString = this.priorInput.map((value) => '-' + value.sample.deleteLeft + '+' + value.sample.insert).join('');
const partialEdge = this.partialEdge;
// Make sure the subset progress contributes to the key!
if(partialEdge) {
const subset = partialEdge.transformSubset;
// We need a copy of at least one insert string in the subset here. Without that, all subsets
// at the same level of the search, against the same match, look identical - not cool!
inputString += `-${subset.entries[0].sample.deleteLeft}+<${subset.key},${partialEdge.subsetSubindex},${subset.entries[0].sample.insert}>`;
}
let matchString = this.calculation.matchSequence.join('');

// TODO: might should also track diagonalWidth.
return inputString + ' @@ ' + matchString;
}

/**
* A key uniquely identifying identical match sequences. Reaching the same net result
* by different paths should result in identical `resultKey` values.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,16 +52,6 @@ export class SearchQuotientSpur implements SearchQuotientNode {
*/
public returnedValues?: {[resultKey: string]: SearchNode} = {}; // TODO: make it private again!

/**
* Acts as a Map that prevents duplicating a correction-search path if reached
* more than once.
*/
protected get processedEdgeSet(): {[pathKey: string]: boolean} {
return this._processedEdgeSet;
}

private _processedEdgeSet?: {[pathKey: string]: boolean} = {};

/**
* Provides a heuristic for the base cost at each depth if the best
* individual input were taken at that level.
Expand Down Expand Up @@ -245,7 +235,7 @@ export class SearchQuotientSpur implements SearchQuotientNode {

const result = this.parentPath.handleNextNode();

if(result.type == 'complete' && !this.processedEdgeSet[result.finalNode.pathKey]) {
if(result.type == 'complete') {
this.addEdgesForNodes(result.finalNode);
}

Expand All @@ -262,14 +252,6 @@ export class SearchQuotientSpur implements SearchQuotientNode {
cost: currentNode.currentCost
}

// Have we already processed a matching edge? If so, skip it.
// We already know the previous edge is of lower cost.
if(this.processedEdgeSet[currentNode.pathKey]) {
return unmatchedResult;
} else {
this.processedEdgeSet[currentNode.pathKey] = true;
}

// Stage 1: filter out nodes/edges we want to prune

// Forbid a raw edit-distance of greater than 2.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -341,31 +341,130 @@ describe('Correction Distance Modeler', () => {
});
});

// Consider adding more, deeper?
it('builds insertion edges based on lexicon, from root', () => {
const rootTraversal = testModel.traverseFromRoot();
assert.isNotEmpty(rootTraversal);
describe('buildInsertionEdges()', () => {
it('builds insertion edges from (empty) root', () => {
const rootTraversal = testModel.traverseFromRoot();
assert.isNotEmpty(rootTraversal);

const rootSeed = SEARCH_EDGE_SEED++;
const rootNode = new correction.SearchNode(rootTraversal, rootSeed);
assert.equal(rootNode.calculation.getHeuristicFinalCost(), 0);
const rootSeed = SEARCH_EDGE_SEED++;
const rootNode = new correction.SearchNode(rootTraversal, rootSeed);
assert.equal(rootNode.calculation.getHeuristicFinalCost(), 0);

const edges = rootNode.buildInsertionEdges();
assert.isAbove(edges.length, 0);
const edges = rootNode.buildInsertionEdges();
assert.isAbove(edges.length, 0);

let expectedChildCount = 0;
for(const child of rootTraversal.children()) {
expectedChildCount++;
let expectedChildCount = 0;
for(const child of rootTraversal.children()) {
expectedChildCount++;

let childEdge = edges.filter(value => lastEntry(value.calculation.matchSequence) == child.char)[0];
assert.isOk(childEdge);
assert.isEmpty(childEdge.priorInput);
assert.isEmpty(childEdge.calculation.inputSequence);
assert.isAbove(childEdge.currentCost, 0);
assert.equal(childEdge.spaceId, rootSeed);
}
let childEdge = edges.filter(value => lastEntry(value.calculation.matchSequence) == child.char)[0];
assert.isOk(childEdge);
assert.isEmpty(childEdge.priorInput);
assert.isEmpty(childEdge.calculation.inputSequence);
assert.isAbove(childEdge.currentCost, 0);
assert.equal(childEdge.spaceId, rootSeed);
}

assert.equal(edges.length, expectedChildCount);
assert.equal(edges.length, expectedChildCount);
});

it('builds insertion edges directly after prior insertion', () => {
const rootTraversal = testModel.traverseFromRoot();
assert.isNotEmpty(rootTraversal);

const rootSeed = SEARCH_EDGE_SEED++;
const rootNode = new correction.SearchNode(rootTraversal, rootSeed);
assert.equal(rootNode.calculation.getHeuristicFinalCost(), 0);

const edges = rootNode.buildInsertionEdges();
assert.isAbove(edges.length, 0);

const firstChild = edges[0];
const childEdges = firstChild.buildInsertionEdges();

let expectedChildCount = 0;
for(const child of firstChild.currentTraversal.children()) {
expectedChildCount++;

let childEdge = edges.filter(value => lastEntry(value.calculation.matchSequence) == child.char)[0];
assert.isOk(childEdge);
assert.isEmpty(childEdge.priorInput);
assert.isEmpty(childEdge.calculation.inputSequence);
assert.isAbove(childEdge.currentCost, 0);
assert.equal(childEdge.spaceId, rootSeed);
}

assert.equal(childEdges.length, expectedChildCount);
});

it('does not build insertion edges after a deletion', () => {
const rootTraversal = testModel.traverseFromRoot();
assert.isNotEmpty(rootTraversal);

const rootSeed = SEARCH_EDGE_SEED++;
const rootNode = new correction.SearchNode(rootTraversal, rootSeed);
assert.equal(rootNode.calculation.getHeuristicFinalCost(), 0);

const edges = rootNode.buildDeletionEdges([{
sample: { insert: 'd', deleteLeft: 0 },
p: 1
}], SEARCH_EDGE_SEED++).flatMap(n => n.processSubsetEdge());
assert.isAbove(edges.length, 0);
const firstChild = edges[0];

const insertEdgesAfterDelete = firstChild.buildInsertionEdges();
assert.equal(insertEdgesAfterDelete.length, 0);
});

it('does not build insertion edges after receiving an empty transform as input', () => {
const rootTraversal = testModel.traverseFromRoot();
assert.isNotEmpty(rootTraversal);

const rootSeed = SEARCH_EDGE_SEED++;
const rootNode = new correction.SearchNode(rootTraversal, rootSeed);
assert.equal(rootNode.calculation.getHeuristicFinalCost(), 0);

const edges = rootNode.buildSubstitutionEdges([{
// intentionally empty!
sample: { insert: '', deleteLeft: 0 },
p: 1
}], SEARCH_EDGE_SEED++).flatMap(n => n.processSubsetEdge());
assert.isAbove(edges.length, 0);
const firstChild = edges[0];
const insertsAfterEmpty = firstChild.buildInsertionEdges();

assert.equal(insertsAfterEmpty.length, 0);
});

it('builds insertion edges after a substitution', () => {
const rootTraversal = testModel.traverseFromRoot();
assert.isNotEmpty(rootTraversal);

const rootSeed = SEARCH_EDGE_SEED++;
const rootNode = new correction.SearchNode(rootTraversal, rootSeed);
assert.equal(rootNode.calculation.getHeuristicFinalCost(), 0);

const subSeed = SEARCH_EDGE_SEED++;
const edges = rootNode.buildSubstitutionEdges([{
sample: { insert: 't', deleteLeft: 0 },
p: 1
}], subSeed).flatMap(n => n.processSubsetEdge());
assert.isAbove(edges.length, 0);
const firstChild = edges[0];
const insertsAfterInput = firstChild.buildInsertionEdges();

let expectedChildCount = 0;
for(const child of firstChild.currentTraversal.children()) {
expectedChildCount++;

let childEdge = edges.filter(value => lastEntry(value.calculation.matchSequence) == child.char)[0];
assert.isOk(childEdge);
assert.equal(childEdge.priorInput.length, 1);
assert.equal(childEdge.spaceId, subSeed);
}

assert.equal(insertsAfterInput.length, expectedChildCount);
});
});

describe('buildDeletionEdges @ token root', () => {
Expand Down