Skip to content

Commit 5fd4da0

Browse files
feat(*): drop usage of as helper from vectorOf and test
Co-authored-by: Rachna <[email protected]>
1 parent 59e259b commit 5fd4da0

File tree

2 files changed

+31
-5
lines changed

2 files changed

+31
-5
lines changed

src/wink-nlp.js

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -433,11 +433,21 @@ var nlp = function ( theModel, pipe, wordVectorsJSON = null ) {
433433
methods.its = itsHelpers;
434434
methods.as = asHelpers;
435435
// Vector of a token method.
436-
const dummyRDD = Object.create( null );
437-
dummyRDD.wordVectors = wordVectorsJSON;
438-
methods.vectorOf = function ( word ) {
439-
if ( typeof word !== 'string' ) throw Error( 'winkNLP: input word must be of type string.' );
440-
return asHelpers.vector( [ word ], dummyRDD );
436+
methods.vectorOf = function ( word, safe = true ) {
437+
const vectors = wordVectorsJSON.vectors;
438+
const unkVector = wordVectorsJSON.unkVector;
439+
const sliceUpTo = wordVectorsJSON.l2NormIndex + 1;
440+
441+
if ( typeof word !== 'string' ) {
442+
throw Error( 'winkNLP: input word must be of type string.' );
443+
}
444+
445+
const tv = vectors[ word.toLowerCase() ];
446+
if ( tv === undefined ) {
447+
// If unsafe, return the entire array.
448+
return ( safe ) ? unkVector.slice( 0, sliceUpTo ) : unkVector.slice();
449+
}
450+
return ( safe ) ? tv.slice( 0, sliceUpTo ) : tv.slice();
441451
}; // vectorOf()
442452

443453
return methods;

test/wink-nlp-specs.js

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -630,4 +630,20 @@ describe( 'vectorOf method', function () {
630630
expect( myNLP.vectorOf( 'UNK$$$' ) ).to.deep.equal( zeroVector );
631631
expect( doc2.tokens().out( its.value, as.vector) ).to.deep.equal( zeroVector );
632632
} );
633+
634+
it( 'with safe=false and UNK$$$, array\'s length === 102 & last element === -1', function () {
635+
const zeroVector = new Array( 102 );
636+
zeroVector.fill( 0 );
637+
zeroVector[ 101 ] = -1;
638+
expect( myNLP.vectorOf( 'UNK$$$', false ) ).to.deep.equal( zeroVector );
639+
expect( myNLP.vectorOf( 'UNK$$$', false ).length ).to.deep.equal( 102 );
640+
} );
641+
642+
it( 'with safe=false and "the" word, array\'s length === 102 & last element === 0', function () {
643+
// because the is the first word in the word vectors i.e. most often used word!
644+
const theVector = myNLP.vectorOf( 'the' );
645+
theVector.push( 0 );
646+
expect( myNLP.vectorOf( 'the', false ) ).to.deep.equal( theVector );
647+
expect( myNLP.vectorOf( 'the', false ).length ).to.deep.equal( 102 );
648+
} );
633649
} );

0 commit comments

Comments
 (0)