Skip to content

Commit cf14966

Browse files
feat(*): enable addons support for enhanced language model
closes #7 references #6
1 parent aeefa9d commit cf14966

File tree

13 files changed

+54
-39
lines changed

13 files changed

+54
-39
lines changed

src/allowed.js

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,8 @@ allowed.its4token = new Set( [
4848
its.abbrevFlag,
4949
its.suffix,
5050
its.type,
51-
its.value
51+
its.value,
52+
its.stem
5253
] );
5354

5455
allowed.its4tokens = allowed.its4token;
@@ -91,7 +92,8 @@ allowed.its4sentence = new Set( [
9192
its.span,
9293
its.markedUpText,
9394
its.negationFlag,
94-
its.sentiment
95+
its.sentiment,
96+
its.stem
9597
] );
9698

9799
allowed.its4document = allowed.its4sentence;

src/api/col-sentences-out.js

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,14 @@ var itmSentenceOut = require( './itm-sentence-out.js' );
3838
* data type or data structure.
3939
* @param {object} rdd Raw Document Data-structure.
4040
* @param {function} itsf Desired `its` mapper.
41-
* @param {object} wordVectors The word vectors.
41+
* @param {object} addons The model's addons.
4242
* @return {*} Mapped sentences.
4343
* @private
4444
*/
45-
var colSentencesOut = function ( rdd, itsf, wordVectors ) {
45+
var colSentencesOut = function ( rdd, itsf, addons ) {
4646
var sents = [];
4747
for ( let i = 0; i < rdd.sentences.length; i += 1 ) {
48-
sents.push( itmSentenceOut( i, rdd, itsf, wordVectors ) );
48+
sents.push( itmSentenceOut( i, rdd, itsf, addons ) );
4949
}
5050
return sents;
5151
}; // colSentencesOut()

src/api/col-tokens-out.js

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,14 +48,14 @@ var psMask = constants.psMask;
4848
* @param {object} rdd Raw Document Data-structure.
4949
* @param {function} itsf Desired `its` mapper.
5050
* @param {function} asf Desired `as` reducer.
51-
* @param {object} wordVectors The word vectors.
51+
* @param {object} addons The model's addons.
5252
* @return {*} Map-reduced collection of tokens.
5353
* @private
5454
*/
55-
var colTokensOut = function ( start, end, rdd, itsf, asf, wordVectors ) {
55+
var colTokensOut = function ( start, end, rdd, itsf, asf, addons ) {
5656
// Vectors require completely different handling.
5757
if ( itsf === its.vector ) {
58-
return its.vector( start, end, rdd.tokens, wordVectors );
58+
return its.vector( start, end, rdd.tokens, addons );
5959
}
6060

6161
// Not a vector request, perform map-reduce.
@@ -65,11 +65,11 @@ var colTokensOut = function ( start, end, rdd, itsf, asf, wordVectors ) {
6565
// Note, `as.text/markedUpText` needs special attention to include preceeding spaces.
6666
if ( asfn === as.text || asfn === as.markedUpText ) {
6767
for ( let i = start; i <= end; i += 1 ) {
68-
mappedTkns.push( ''.padEnd( rdd.tokens[ ( i * tkSize ) + 1 ] & psMask ), itsf( i, rdd.tokens, rdd.cache ) ); // eslint-disable-line no-bitwise
68+
mappedTkns.push( ''.padEnd( rdd.tokens[ ( i * tkSize ) + 1 ] & psMask ), itsf( i, rdd.tokens, rdd.cache, addons ) ); // eslint-disable-line no-bitwise
6969
}
7070
} else {
7171
for ( let i = start; i <= end; i += 1 ) {
72-
mappedTkns.push( itsfn( i, rdd.tokens, rdd.cache ) );
72+
mappedTkns.push( itsfn( i, rdd.tokens, rdd.cache, addons ) );
7373
}
7474
}
7575

src/api/itm-document-out.js

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,15 +41,15 @@ var colTokensOut = require( './col-tokens-out.js' );
4141
* data type or data structure.
4242
* @param {Object} rdd Raw Document Data-structure.
4343
* @param {function} itsf Desired `its` mapper.
44-
* @param {Object} wordVectors The word vectors.
44+
* @param {Object} addons The model's addons.
4545
* @return {*} Mapped value.
4646
* @private
4747
*/
48-
var itmDocumentOut = function ( rdd, itsf, wordVectors ) {
48+
var itmDocumentOut = function ( rdd, itsf, addons ) {
4949
var document = rdd.document;
5050
// Vectors require completely different handling.
5151
if ( itsf === its.vector ) {
52-
return its.vector( document, rdd, wordVectors );
52+
return its.vector( document, rdd, addons );
5353
}
5454

5555
var itsfn = ( itsf && allowed.its4document.has( itsf ) ) ? itsf : its.value;
@@ -67,7 +67,7 @@ var itmDocumentOut = function ( rdd, itsf, wordVectors ) {
6767
// returned the `value`. Refer to `its.markedUpText`.
6868
var asfn = ( itsfn === its.markedUpText ) ? as.markedUpText : as.text;
6969

70-
return colTokensOut( document[ 0 ], document[ 1 ], rdd, itsfn, asfn );
70+
return colTokensOut( document[ 0 ], document[ 1 ], rdd, itsfn, asfn, addons );
7171
}; // itmDocumentOut()
7272

7373
module.exports = itmDocumentOut;

src/api/itm-sentence-out.js

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,15 +42,15 @@ var colTokensOut = require( './col-tokens-out.js' );
4242
* @param {number} index The index of desired sentence.
4343
* @param {Object} rdd Raw Document Data-structure.
4444
* @param {function} itsf Desired `its` mapper.
45-
* @param {object} wordVectors The word vectors.
45+
* @param {object} addons The model's addons.
4646
* @return {*} Mapped value.
4747
* @private
4848
*/
49-
var itmSentenceOut = function ( index, rdd, itsf, wordVectors ) {
49+
var itmSentenceOut = function ( index, rdd, itsf, addons ) {
5050
var sentence = rdd.sentences[ index ];
5151
// Vectors require completely different handling.
5252
if ( itsf === its.vector ) {
53-
return its.vector( sentence, rdd, wordVectors );
53+
return its.vector( sentence, rdd, addons );
5454
}
5555

5656
var itsfn = ( itsf && allowed.its4sentence.has( itsf ) ) ? itsf : its.value;
@@ -68,7 +68,7 @@ var itmSentenceOut = function ( index, rdd, itsf, wordVectors ) {
6868
// returned the `value`. Refer to `its.markedUpText`.
6969
var asfn = ( itsfn === its.markedUpText ) ? as.markedUpText : as.text;
7070

71-
return colTokensOut( sentence[ 0 ], sentence[ 1 ], rdd, itsfn, asfn );
71+
return colTokensOut( sentence[ 0 ], sentence[ 1 ], rdd, itsfn, asfn, addons );
7272
}; // itmSentenceOut()
7373

7474
module.exports = itmSentenceOut;

src/api/itm-token-out.js

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,18 +40,18 @@ var allowed = require( '../allowed.js' );
4040
* @param {number} index The index of desired token.
4141
* @param {Object} rdd Raw Document Data-structure.
4242
* @param {function} itsf Desired `its` mapper.
43-
* @param {object} wordVectors The word vectors.
43+
* @param {object} addons The model's addons.
4444
* @return {*} Mapped value.
4545
* @private
4646
*/
47-
var itmTokenOut = function ( index, rdd, itsf, wordVectors ) {
47+
var itmTokenOut = function ( index, rdd, itsf, addons ) {
4848
// Vectors require completely different handling.
4949
if ( itsf === its.vector ) {
50-
return its.vector( index, rdd, wordVectors );
50+
return its.vector( index, rdd, addons );
5151
}
5252
// Not a vector request, map using `itsf`.
5353
var f = ( allowed.its4token.has( itsf ) ) ? itsf : its.value;
54-
return f( index, rdd.tokens, rdd.cache );
54+
return f( index, rdd.tokens, rdd.cache, addons );
5555
}; // itmTokenOut()
5656

5757
module.exports = itmTokenOut;

src/api/sel-tokens-out.js

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,14 +47,14 @@ var psMask = constants.psMask;
4747
* @param {obejct} rdd Raw document data structure.
4848
* @param {function} itsf Desired `its` mapper.
4949
* @param {function} asf Desired `as` reducer.
50-
* @param {object} wordVectors The word vectors.
50+
* @param {object} addons The addons from the model.
5151
* @return {*} Reduced value.
5252
* @private
5353
*/
54-
var selTokensOut = function ( selTokens, rdd, itsf, asf, wordVectors ) {
54+
var selTokensOut = function ( selTokens, rdd, itsf, asf, addons ) {
5555
// Vectors require completely different handling.
5656
if ( itsf === its.vector ) {
57-
return its.vector( selTokens, rdd.tokens, wordVectors );
57+
return its.vector( selTokens, rdd.tokens, addons );
5858
}
5959

6060
// Not a vector request, perform map-reduce.
@@ -66,11 +66,11 @@ var selTokensOut = function ( selTokens, rdd, itsf, asf, wordVectors ) {
6666
// No `markedUpText` allowed here.
6767
if ( asfn === as.text ) {
6868
for ( let i = 0; i < selTokens.length; i += 1 ) {
69-
mappedTkns.push( ''.padEnd( rdd.tokens[ ( selTokens[ i ] * tkSize ) + 1 ] & psMask ), itsf( selTokens[ i ], rdd.tokens, rdd.cache ) ); // eslint-disable-line no-bitwise
69+
mappedTkns.push( ''.padEnd( rdd.tokens[ ( selTokens[ i ] * tkSize ) + 1 ] & psMask ), itsf( selTokens[ i ], rdd.tokens, rdd.cache, addons ) ); // eslint-disable-line no-bitwise
7070
}
7171
} else {
7272
for ( let i = 0; i < selTokens.length; i += 1 ) {
73-
mappedTkns.push( itsfn( selTokens[ i ], rdd.tokens, rdd.cache ) );
73+
mappedTkns.push( itsfn( selTokens[ i ], rdd.tokens, rdd.cache, addons ) );
7474
}
7575
}
7676

src/doc-v2.js

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -79,11 +79,11 @@ var printTokens = require( './api/print-tokens.js' );
7979
* developer APIs.
8080
*
8181
* @param {object} docData It encapsulates the document data.
82-
* @param {object} wordVectors Word vector, optional.
82+
* @param {object} addons The model's addon, may contain word vectors, stemmer etc.
8383
* @return {object} conatining APIs.
8484
* @private
8585
*/
86-
var doc = function ( docData, wordVectors ) {
86+
var doc = function ( docData, addons ) {
8787
// Extract `cache` as it is frequently accessed.
8888
var cache = docData.cache;
8989

@@ -158,7 +158,7 @@ var doc = function ( docData, wordVectors ) {
158158
// Markup this token.
159159
api.markup = ( beginMarker, endMarker ) => markings.push( [ index, index, beginMarker, endMarker ] );
160160
// Output this token or its properties using mapper function — `f`.
161-
api.out = ( f ) => itmTokenOut( index, docData, f, wordVectors );
161+
api.out = ( f ) => itmTokenOut( index, docData, f, addons );
162162
// Access the parent sentence.
163163
api.parentSentence = () => getParentItem( index, sentences, itemSentence );
164164
// Index within the document.
@@ -188,7 +188,7 @@ var doc = function ( docData, wordVectors ) {
188188
api.length = () => ( selectedTokens.length );
189189
// Output this collection of selected tokens as a reduced values or properties
190190
// using map/reduce functions — `f/g`.
191-
api.out = ( f, g ) => selTokensOut( selectedTokens, docData, f, g, wordVectors );
191+
api.out = ( f, g ) => selTokensOut( selectedTokens, docData, f, g, addons );
192192
return api;
193193
}; // colTokens()
194194

@@ -217,7 +217,7 @@ var doc = function ( docData, wordVectors ) {
217217
api.length = () => ( end - start + 1 );
218218
// Output this token collection as a reduced values or properties using
219219
// map/reduce functions — `f/g`.
220-
api.out = ( f, g ) => colTokensOut( start, end, docData, f, g, wordVectors );
220+
api.out = ( f, g ) => colTokensOut( start, end, docData, f, g, addons );
221221

222222
return api;
223223
}
@@ -409,7 +409,7 @@ var doc = function ( docData, wordVectors ) {
409409
// Markup this sentence.
410410
api.markup = ( beginMarker, endMarker ) => markings.push( [ sentences[ index ][ 0 ], sentences[ index ][ 1 ], beginMarker, endMarker ] );
411411
// Output this sentence as text.
412-
api.out = ( f ) => itmSentenceOut( index, docData, f, wordVectors );
412+
api.out = ( f ) => itmSentenceOut( index, docData, f, addons );
413413
// Outputs the collection of entities, if any, contained in this sentence.
414414
api.entities = () => colSelectedEntities( containedEntities( entities, sentences[ index ][ 0 ], sentences[ index ][ 1 ] ) );
415415
// Outputs the collection of custom entities, if any, contained in this sentence.
@@ -438,7 +438,7 @@ var doc = function ( docData, wordVectors ) {
438438
// Length of this collection.
439439
api.length = () => ( sentences.length );
440440
// Output this collection of sentences as an array of strings.
441-
api.out = ( f ) => colSentencesOut( docData, f, wordVectors );
441+
api.out = ( f ) => colSentencesOut( docData, f, addons );
442442
return api;
443443
}; // colSentences()
444444

@@ -450,7 +450,7 @@ var doc = function ( docData, wordVectors ) {
450450
methods.customEntities = colCustomEntities;
451451
methods.isLexeme = isLexeme;
452452
methods.isOOV = cache.isOOV;
453-
methods.out = ( f ) => itmDocumentOut( docData, f, wordVectors );
453+
methods.out = ( f ) => itmDocumentOut( docData, f, addons );
454454
methods.sentences = colSentences;
455455
methods.tokens = colTokens( 0, docData.numOfTokens - 1 );
456456

src/its.js

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,10 @@ its.value = function ( index, tokens, cache ) {
111111
return cache.value( tokens[ index * tkSize ] );
112112
}; // value()
113113

114+
its.stem = function ( index, tokens, cache, addons ) {
115+
return addons.stem( cache.value( tokens[ index * tkSize ] ) );
116+
}; // value()
117+
114118
its.vector = function ( ) {
115119
return ( new Array( 100 ).fill( 0 ) );
116120
}; // vector()

src/wink-nlp.js

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ var nlp = function ( theModel ) {
117117
* @returns {void} nothing!.
118118
* @private
119119
*/
120-
var load = function ( ) {
120+
var load = function () {
121121
// Load language model.
122122
model = theModel.core();
123123
// With `intrinsicSize` captured, instantiate cache etc.
@@ -292,7 +292,7 @@ var nlp = function ( theModel ) {
292292
// }
293293

294294
// Now create the document!
295-
var doc = Doc( rdd, theModel.wordVectors ); // eslint-disable-line new-cap
295+
var doc = Doc( rdd, theModel.addons ); // eslint-disable-line new-cap
296296

297297
// All done — cleanup document's data.
298298
wrappedDocData.clean();
@@ -361,7 +361,7 @@ var nlp = function ( theModel ) {
361361
}
362362

363363
// Load the model.
364-
load( theModel );
364+
load();
365365
// Setup default configuration.
366366
// definePipeConfig();
367367
// Methods.

0 commit comments

Comments
 (0)