@@ -193,6 +193,7 @@ as.vector = function ( tokens, rdd ) {
193193 const size = rdd . wordVectors . dimensions ;
194194 const precision = rdd . wordVectors . precision ;
195195 const vectors = rdd . wordVectors . vectors ;
196+ const l2NormIndex = rdd . wordVectors . l2NormIndex ;
196197
197198 // Set up a new initialized vector of `size`
198199 const v = new Array ( size ) ;
@@ -203,8 +204,11 @@ as.vector = function ( tokens, rdd ) {
203204 for ( let i = 0 ; i < tokens . length ; i += 1 ) {
204205 // Extract token vector for the current token.
205206 const tv = vectors [ tokens [ i ] . toLowerCase ( ) ] ;
206- // Increment `numOfTokens` if the above operation was successful.
207- if ( tv !== undefined ) numOfTokens += 1 ;
207+ // Increment `numOfTokens` if the above operation was successful
208+ // AND l2Norm is non-zero, because for UNK vectors it is set to 0.
209+ // The later is applicable for the contextual vectors, where in event
210+ // of UNK, an all zero vectors is set for UNK word.
211+ if ( tv !== undefined && tv [ l2NormIndex ] !== 0 ) numOfTokens += 1 ;
208212 for ( let j = 0 ; j < size ; j += 1 ) {
209213 // Keep summing, eventually it will be divided by `numOfTokens` to obtain avareage.
210214 v [ j ] += ( tv === undefined ) ? 0 : tv [ j ] ;
0 commit comments