@@ -101,72 +101,92 @@ public AnnotatedText annotateText(String text, Object id, boolean sentiment, boo
101101 pipelines .get (PIPELINE .BASIC ).annotate (document );
102102 }
103103 List <CoreMap > sentences = document .get (CoreAnnotations .SentencesAnnotation .class );
104- final String background = backgroundSymbol ;
105104 final AtomicInteger sentenceSequence = new AtomicInteger (0 );
106105 sentences .stream ().map ((sentence ) -> {
107106 return sentence ;
108107 }).forEach ((sentence ) -> {
109108 String sentenceId = id + "_" + sentenceSequence .getAndIncrement ();
110109 final Sentence newSentence = new Sentence (sentence .toString (), store , sentenceId );
111- final AtomicReference <String > prevNe = new AtomicReference <>();
112- prevNe .set (background );
113- final AtomicReference <StringBuilder > sb = new AtomicReference <>();
114- sb .set (new StringBuilder ());
115- List <CoreLabel > tokens = sentence .get (CoreAnnotations .TokensAnnotation .class );
116- tokens .stream ()
117- .filter ((token ) -> (token != null ) && checkPuntuation (token .get (CoreAnnotations .LemmaAnnotation .class )))
118- .map ((token ) -> {
119- //
120- String currentNe = StringUtils .getNotNullString (token .get (CoreAnnotations .NamedEntityTagAnnotation .class ));
121- if (currentNe .equals (background ) && prevNe .get ().equals (background )) {
122- Tag tag = getTag (token );
123- if (tag != null ) {
124- newSentence .addTag (tag );
125- }
126- } else if (currentNe .equals (background ) && !prevNe .get ().equals (background )) {
127- Tag newTag = new Tag (sb .get ().toString ());
128- newTag .setNe (prevNe .get ());
129- newSentence .addTag (newTag );
130- sb .set (new StringBuilder ());
131- Tag tag = getTag (token );
132- if (tag != null ) {
133- newSentence .addTag (tag );
134- }
135- } else if (!currentNe .equals (prevNe .get ()) && !prevNe .get ().equals (background )) {
136- Tag newTag = new Tag (sb .get ().toString ());
137- newTag .setNe (prevNe .get ());
138- newSentence .addTag (newTag );
139- sb .set (new StringBuilder ());
140- sb .get ().append (StringUtils .getNotNullString (token .get (CoreAnnotations .OriginalTextAnnotation .class )));
141- } else if (!currentNe .equals (background ) && prevNe .get ().equals (background )) {
142- sb .get ().append (StringUtils .getNotNullString (token .get (CoreAnnotations .OriginalTextAnnotation .class )));
143- } else {
144- String before = StringUtils .getNotNullString (token .get (CoreAnnotations .BeforeAnnotation .class ));
145- String currentText = StringUtils .getNotNullString (token .get (CoreAnnotations .OriginalTextAnnotation .class ));
146- sb .get ().append (before );
147- sb .get ().append (currentText );
148- }
149- return currentNe ;
150- }).forEach ((currentNe ) -> {
151- prevNe .set (currentNe );
152- });
153-
154- if (sb .get ().length () > 0 ) {
155- Tag tag = new Tag (sb .get ().toString ());
156- tag .setNe (prevNe .get ());
157- newSentence .addTag (tag );
158- }
159-
110+ extractTokens (sentence , newSentence );
160111 if (sentiment ) {
161- int score = extractSentiment (sentence );
162- newSentence .setSentiment (score );
112+ extractSentiment (sentence , newSentence );
163113 }
164114 result .addSentence (newSentence );
165115
166116 });
167117 return result ;
168118 }
169119
120+ protected void extractSentiment (CoreMap sentence , final Sentence newSentence ) {
121+ int score = extractSentiment (sentence );
122+ newSentence .setSentiment (score );
123+ }
124+
125+ protected void extractTokens (CoreMap sentence , final Sentence newSentence ) {
126+ List <CoreLabel > tokens = sentence .get (CoreAnnotations .TokensAnnotation .class );
127+ final String background = backgroundSymbol ;
128+ // final AtomicReference<String> prevNe = new AtomicReference<>();
129+ // prevNe.set(background);
130+ // final AtomicReference<StringBuilder> sb = new AtomicReference<>();
131+ // sb.set(new StringBuilder());
132+ TokenHolder currToken = new TokenHolder ();
133+ currToken .setNe (background );
134+ tokens .stream ()
135+ .filter ((token ) -> (token != null ) && checkPuntuation (token .get (CoreAnnotations .LemmaAnnotation .class )))
136+ .map ((token ) -> {
137+ //
138+ String currentNe = StringUtils .getNotNullString (token .get (CoreAnnotations .NamedEntityTagAnnotation .class ));
139+ if (currentNe .equals (background ) && currToken .getNe ().equals (background )) {
140+ Tag tag = getTag (token );
141+ if (tag != null ) {
142+ newSentence .addTag (tag );
143+ newSentence .addOccurrence (token .beginPosition (), token .endPosition (), tag );
144+ }
145+ } else if (currentNe .equals (background ) && !currToken .getNe ().equals (background )) {
146+ Tag newTag = new Tag (currToken .getToken ());
147+ newTag .setNe (currToken .getNe ());
148+ newSentence .addTag (newTag );
149+ newSentence .addOccurrence (currToken .getBeginPosition (), currToken .getEndPosition (), newTag );
150+ currToken .reset ();
151+ Tag tag = getTag (token );
152+ if (tag != null ) {
153+ newSentence .addTag (tag );
154+ newSentence .addOccurrence (token .beginPosition (), token .endPosition (), tag );
155+ }
156+ } else if (!currentNe .equals (currToken .getNe ()) && !currToken .getNe ().equals (background )) {
157+ Tag tag = new Tag (currToken .getToken ());
158+ tag .setNe (currToken .getNe ());
159+ newSentence .addTag (tag );
160+ newSentence .addOccurrence (currToken .getBeginPosition (), currToken .getEndPosition (), tag );
161+ currToken .reset ();
162+ currToken .updateToken (StringUtils .getNotNullString (token .get (CoreAnnotations .OriginalTextAnnotation .class )));
163+ currToken .setBeginPosition (token .beginPosition ());
164+ currToken .setEndPosition (token .endPosition ());
165+ } else if (!currentNe .equals (background ) && currToken .getNe ().equals (background )) {
166+ currToken .updateToken (StringUtils .getNotNullString (token .get (CoreAnnotations .OriginalTextAnnotation .class )));
167+ currToken .setBeginPosition (token .beginPosition ());
168+ currToken .setEndPosition (token .endPosition ());
169+ } else {
170+ String before = StringUtils .getNotNullString (token .get (CoreAnnotations .BeforeAnnotation .class ));
171+ String currentText = StringUtils .getNotNullString (token .get (CoreAnnotations .OriginalTextAnnotation .class ));
172+ currToken .updateToken (before );
173+ currToken .updateToken (currentText );
174+ currToken .setBeginPosition (token .beginPosition ());
175+ currToken .setEndPosition (token .endPosition ());
176+ }
177+ return currentNe ;
178+ }).forEach ((currentNe ) -> {
179+ currToken .setNe (currentNe );
180+ });
181+
182+ if (currToken .getToken ().length () > 0 ) {
183+ Tag tag = new Tag (currToken .getToken ());
184+ tag .setNe (currToken .getNe ());
185+ newSentence .addTag (tag );
186+ newSentence .addOccurrence (currToken .getBeginPosition (), currToken .getEndPosition (), tag );
187+ }
188+ }
189+
170190 public AnnotatedText sentiment (AnnotatedText annotated ) {
171191 StanfordCoreNLP pipeline = pipelines .get (PIPELINE .SENTIMENT );
172192 annotated .getSentences ().parallelStream ().forEach ((item ) -> {
@@ -175,8 +195,7 @@ public AnnotatedText sentiment(AnnotatedText annotated) {
175195 List <CoreMap > sentences = document .get (CoreAnnotations .SentencesAnnotation .class );
176196 Optional <CoreMap > sentence = sentences .stream ().findFirst ();
177197 if (sentence != null && sentence .isPresent ()) {
178- int score = extractSentiment (sentence .get ());
179- item .setSentiment (score );
198+ extractSentiment (sentence .get (), item );
180199 }
181200 });
182201 return annotated ;
@@ -250,6 +269,61 @@ public boolean checkPuntuation(String value) {
250269 return !match .find ();
251270 }
252271
272+ class TokenHolder {
273+
274+ private String ne ;
275+ private StringBuilder sb ;
276+ private int beginPosition ;
277+ private int endPosition ;
278+
279+ public TokenHolder () {
280+ reset ();
281+ }
282+
283+ public String getNe () {
284+ return ne ;
285+ }
286+
287+ public String getToken () {
288+ if (sb == null ) {
289+ return " - " ;
290+ }
291+ return sb .toString ();
292+ }
293+
294+ public int getBeginPosition () {
295+ return beginPosition ;
296+ }
297+
298+ public int getEndPosition () {
299+ return endPosition ;
300+ }
301+
302+ public void setNe (String ne ) {
303+ this .ne = ne ;
304+ }
305+
306+ public void updateToken (String tknStr ) {
307+ this .sb .append (tknStr );
308+ }
309+
310+ public void setBeginPosition (int beginPosition ) {
311+ if (this .beginPosition < 0 ) {
312+ this .beginPosition = beginPosition ;
313+ }
314+ }
315+
316+ public void setEndPosition (int endPosition ) {
317+ this .endPosition = endPosition ;
318+ }
319+
320+ public final void reset () {
321+ sb = new StringBuilder ();
322+ beginPosition = -1 ;
323+ endPosition = -1 ;
324+ }
325+ }
326+
253327 static class PipelineBuilder {
254328
255329 private final Properties properties = new Properties ();
0 commit comments