Skip to content

Commit a8ec42b

Browse files
feat(*): add test cases for vector cosine similarity
1 parent 8db4ee2 commit a8ec42b

File tree

3 files changed

+33
-3
lines changed

3 files changed

+33
-3
lines changed

test/as-specs.js

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,15 @@ var winkNLP = require( '../src/wink-nlp.js' );
3838
var its = require( '../src/its.js' );
3939
var as = require( '../src/as.js' );
4040
var model = require( './test-model/model.js' );
41+
var wordVectors = require( './test-model/languages/cur/models/test-vectors.json' );
42+
var similarity = require( '../utilities/similarity.js' );
4143

4244
var expect = chai.expect;
4345
var describe = mocha.describe;
4446
var it = mocha.it;
4547

4648
describe( 'its functions for .out()', function () {
47-
var nlp = winkNLP( model );
49+
var nlp = winkNLP( model, null, wordVectors );
4850

4951
it( 'as.bow', function () {
5052
expect( nlp.readDoc( 'to be or not to be' ).tokens().out( its.value, as.bow ) ).to.deep.equal( { to: 2, be: 2, or: 1, not: 1 } );
@@ -73,9 +75,35 @@ describe( 'its functions for .out()', function () {
7375
} );
7476

7577
it( 'as.vector', function () {
76-
var doc = nlp.readDoc( 'the dog ran away' );
78+
const sent = 'the dog ran';
79+
const doc = nlp.readDoc( sent );
7780

81+
// Check exceptions when as.vector is combined with incompitable its helpers.
7882
expect( doc.tokens().out.bind( null, its.negationFlag, as.vector ) ).to.throw( 'winkNLP: as.vector is allowed only with its value or normal or lemma.' );
7983
expect( doc.tokens().filter( ( t ) => t.out().length > 0 ).out.bind( null, its.negationFlag, as.vector ) ).to.throw( 'winkNLP: as.vector is allowed only with its value or normal or lemma.' );
84+
85+
// Test actual behaviour of as.helper by averaging the vectors of tokens
86+
// and computing the l2Norm.
87+
const vDog = nlp.vectorOf( 'dog' );
88+
const vRan = nlp.vectorOf( 'ran' );
89+
const sentVector = doc.tokens().filter( (t) => ( t.out( its.type ) === 'word' && !t.out( its.stopWordFlag ) ) ).out(its.value, as.vector );
90+
91+
// Note, last entry in the array is `l2Norm`, that is why subtract 1 from the length.
92+
const rVector = new Array( vDog.length - 1 );
93+
rVector.fill( 0 );
94+
let ssr = 0;
95+
for ( let k = 0; k < rVector.length; k += 1 ) {
96+
rVector[ k ] = +( ( vDog[ k ] + vRan[ k ] ) / 2 ).toFixed( 8 );
97+
ssr += rVector[ k ] * rVector[ k ];
98+
}
99+
rVector.push( +Math.sqrt( ssr ).toFixed( 8 ) );
100+
expect( rVector ).to.deep.equal( sentVector );
101+
102+
// Also test the similarity here itself to check both as.vector & similarity.
103+
const docWith2S = nlp.readDoc( 'The table was in the drawing room. The desk was in the study room.' );
104+
const s0Vector = docWith2S.sentences().itemAt( 0 ).tokens().filter( (t) => ( t.out( its.type ) === 'word' && !t.out( its.stopWordFlag ) ) ).out(its.value, as.vector );
105+
const s1Vector = docWith2S.sentences().itemAt( 1 ).tokens().filter( (t) => ( t.out( its.type ) === 'word' && !t.out( its.stopWordFlag ) ) ).out(its.value, as.vector );
106+
107+
expect( similarity.vector.cosine( s0Vector, s1Vector ) ).to.equal( 0.824834 );
80108
} );
81109
} );

test/similarity-specs.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,3 +137,5 @@ describe( 'set-oo normal behaviour', function () {
137137
} );
138138
} );
139139
} );
140+
141+
// vector-cosine behaviour is tested in the as-specs.js

utilities/similarity.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ similarity.vector.cosine = function ( vectorA, vectorB ) {
199199
}
200200

201201
// Use `l2Norm` directly from each vector.
202-
return +( sumOfProducts / ( vectorA[ length ] * vectorB[ length ] ) ).toFixed( 4 );
202+
return +( sumOfProducts / ( vectorA[ length ] * vectorB[ length ] ) ).toFixed( 6 );
203203
}; // similarity.vector.cosine()
204204

205205
// Export similarity

0 commit comments

Comments
 (0)