@@ -60,12 +60,13 @@ var tkSize = constants.tkSize;
6060 * @private
6161 *
6262 * @param {object } theModel language model.
63+ * @param {string[] } pipe of nlp annotations.
6364 * @returns {object } conatining set of API methods for natural language processing.
6465 * @example
6566 * const nlp = require( 'wink-nlp' );
6667 * var myNLP = nlp();
6768*/
68- var nlp = function ( theModel ) {
69+ var nlp = function ( theModel , pipe ) {
6970
7071 var methods = Object . create ( null ) ;
7172 // Token Regex; compiled from `model`
@@ -108,6 +109,18 @@ var nlp = function ( theModel ) {
108109 // Used to innstantiate the compiler.
109110 var cerMetaModel ;
110111
112+ // Annotation stuff.
113+ var validAnnotations = Object . create ( null ) ;
114+ validAnnotations . sbd = true ;
115+ validAnnotations . negation = true ;
116+ validAnnotations . sentiment = true ;
117+ validAnnotations . pos = true ;
118+ validAnnotations . ner = true ;
119+ validAnnotations . cer = true ;
120+ // Current pipe.
121+ var currPipe = Object . create ( null ) ;
122+ var onlyTokenization = true ;
123+
111124 // Private methods.
112125
113126 // ## load
@@ -229,63 +242,83 @@ var nlp = function ( theModel ) {
229242 // The structure is `[ start, end, negationFlag, sentimentScore ]`.
230243 rdd . document = [ 0 , ( rdd . numOfTokens - 1 ) , 0 , 0 ] ;
231244
232- // Map tokens for automata.
233- var tokens4Automata = mapRawTokens2UIdOfNormal ( rdd ) ;
234- // Sentence Boundary Detection.
235- // Set first `Pattern Swap (x)` as `null`.
236- var px = null ;
237- for ( let i = 0 ; i < sbdAutomata . length ; i += 1 ) {
238- sbdAutomata [ i ] . setPatternSwap ( px ) ;
239- // For SBD, all tokens are required to extract preceeding spaces.
240- px = sbdAutomata [ i ] . recognize ( tokens4Automata , sbdTransformers [ i ] , rdd . tokens ) ;
245+ // Map tokens for automata if there are other annotations to be performed.
246+ var tokens4Automata = ( onlyTokenization ) ? null : mapRawTokens2UIdOfNormal ( rdd ) ;
247+
248+ var px ;
249+ if ( currPipe . sbd ) {
250+ // Sentence Boundary Detection.
251+ // Set first `Pattern Swap (x)` as `null`.
252+ px = null ;
253+ for ( let i = 0 ; i < sbdAutomata . length ; i += 1 ) {
254+ sbdAutomata [ i ] . setPatternSwap ( px ) ;
255+ // For SBD, all tokens are required to extract preceeding spaces.
256+ px = sbdAutomata [ i ] . recognize ( tokens4Automata , sbdTransformers [ i ] , rdd . tokens ) ;
257+ }
258+ // The structure of sentence is:<br/>
259+ // `[ start, end, negationFlag, sentimentScore ]`
260+ sbdSetter ( px , rdd ) ;
261+ // Compute number of sentences!
262+ rdd . numOfSentences = rdd . sentences . length ;
263+ } else {
264+ // Setup default sentence as entire document!
265+ rdd . numOfSentences = 1 ;
266+ rdd . sentences = [ [ 0 , ( rdd . numOfTokens - 1 ) , 0 , 0 ] ] ;
267+ }
268+
269+ if ( currPipe . ner ) {
270+ // Named entity detection.
271+ px = null ;
272+ for ( let i = 0 ; i < nerAutomata . length ; i += 1 ) {
273+ nerAutomata [ i ] . setPatternSwap ( px ) ;
274+ px = nerAutomata [ i ] . recognize ( tokens4Automata , nerTransformers [ i ] ) ;
275+ }
276+ // Entities — storted as array of `[ start, end, entity type ].`
277+ // There are no setter for entities as no transformation is needed.
278+ rdd . entities = px ;
279+ } else {
280+ rdd . entities = [ ] ;
241281 }
242- // The structure of sentence is:<br/>
243- // `[ start, end, negationFlag, sentimentScore ]`
244- sbdSetter ( px , rdd ) ;
245- // Compute number of sentences!
246- rdd . numOfSentences = rdd . sentences . length ;
247-
248- // Named entity detection.
249- px = null ;
250- for ( let i = 0 ; i < nerAutomata . length ; i += 1 ) {
251- nerAutomata [ i ] . setPatternSwap ( px ) ;
252- px = nerAutomata [ i ] . recognize ( tokens4Automata , nerTransformers [ i ] ) ;
282+
283+ if ( currPipe . negation ) {
284+ // Negation
285+ px = null ;
286+ px = negAutomata . recognize ( tokens4Automata ) ;
287+ negSetter ( px , rdd , constants , search ) ;
253288 }
254- // Entities — storted as array of `[ start, end, entity type ].`
255- // There are no setter for entities as no transformation is needed.
256- rdd . entities = px ;
257-
258- // Negation
259- px = null ;
260- px = negAutomata . recognize ( tokens4Automata ) ;
261- negSetter ( px , rdd , constants , search ) ;
262-
263- // Sentiment Analysis
264- px = null ;
265- px = saAutomata . recognize ( tokens4Automata ) ;
266- saSetter ( px , rdd , constants , locate ) ;
267-
268- // PoS Tagging
269- const posTags = mapRawTokens2UIdOfDefaultPOS ( rdd ) ;
270- px = null ;
271- for ( let i = 0 ; i < posAutomata . length ; i += 1 ) {
272- px = posAutomata [ i ] . recognize ( posTags , posTransformers [ 0 ] , rdd . tokens ) ;
273- posUpdater ( px , cache , posTags , tokens4Automata ) ;
289+
290+ if ( currPipe . sentiment ) {
291+ // Sentiment Analysis
292+ px = null ;
293+ px = saAutomata . recognize ( tokens4Automata ) ;
294+ saSetter ( px , rdd , constants , locate ) ;
274295 }
275- posSetter ( rdd , posTags , tkSize , constants . bits4lemma ) ;
276-
277- // Patterns
278- px = null ;
279- if ( cerAutomata !== undefined && cerLearnings > 0 ) {
280- cerConfig . rdd = rdd ;
281- cerConfig . preserve = cerPreserve ;
282- cerConfig . constants = constants ;
283- if ( cerConfig . useEntity ) cerAutomata . setPatternSwap ( rdd . entities ) ;
284- px = cerAutomata . recognize ( tokens4Automata , cerTransformer , cerConfig ) ;
296+
297+ if ( currPipe . pos ) {
298+ // PoS Tagging
299+ const posTags = mapRawTokens2UIdOfDefaultPOS ( rdd ) ;
300+ px = null ;
301+ for ( let i = 0 ; i < posAutomata . length ; i += 1 ) {
302+ px = posAutomata [ i ] . recognize ( posTags , posTransformers [ 0 ] , rdd . tokens ) ;
303+ posUpdater ( px , cache , posTags , tokens4Automata ) ;
304+ }
305+ posSetter ( rdd , posTags , tkSize , constants . bits4lemma ) ;
285306 }
286- // If there are no custom entities, then `px` will be `null`; in such a case
287- // set `customEntities` to an empty array.
288- rdd . customEntities = px || [ ] ;
307+
308+ if ( currPipe . cer ) {
309+ // Patterns
310+ px = null ;
311+ if ( cerAutomata !== undefined && cerLearnings > 0 ) {
312+ cerConfig . rdd = rdd ;
313+ cerConfig . preserve = cerPreserve ;
314+ cerConfig . constants = constants ;
315+ if ( cerConfig . useEntity ) cerAutomata . setPatternSwap ( rdd . entities ) ;
316+ px = cerAutomata . recognize ( tokens4Automata , cerTransformer , cerConfig ) ;
317+ }
318+ // If there are no custom entities, then `px` will be `null`; in such a case
319+ // set `customEntities` to an empty array.
320+ rdd . customEntities = px || [ ] ;
321+ } else rdd . customEntities = [ ] ;
289322
290323
291324 // Word Vector
@@ -362,6 +395,15 @@ var nlp = function ( theModel ) {
362395 throw Error ( 'wink-nlp: invalid model used.' ) ;
363396 }
364397
398+ const tempPipe = ( pipe === undefined ) ? Object . keys ( validAnnotations ) : pipe ;
399+ if ( helper . isArray ( tempPipe ) ) {
400+ tempPipe . forEach ( ( at ) => {
401+ if ( ! validAnnotations [ at ] ) throw Error ( `wink-nlp: invalid pipe annotation "${ at } " found.` ) ;
402+ currPipe [ at ] = true ;
403+ onlyTokenization = false ;
404+ } ) ;
405+ } else throw Error ( `wink-nlp: invalid pipe, it must be an array instead found a "${ typeof pipe } ".` ) ;
406+
365407 // Load the model.
366408 load ( ) ;
367409 // Setup default configuration.
0 commit comments