1919import com .graphaware .nlp .domain .Phrase ;
2020import com .graphaware .nlp .domain .Sentence ;
2121import com .graphaware .nlp .domain .Tag ;
22+ import edu .stanford .nlp .hcoref .CorefCoreAnnotations ;
23+ import edu .stanford .nlp .hcoref .data .CorefChain ;
2224import edu .stanford .nlp .ling .CoreAnnotations ;
2325import edu .stanford .nlp .ling .CoreLabel ;
2426import edu .stanford .nlp .ling .Word ;
@@ -98,16 +100,16 @@ private void createCompletePipeline() {
98100 public AnnotatedText annotateText (String text , Object id , boolean sentiment , boolean store ) {
99101 StanfordCoreNLP pipeline ;
100102 if (sentiment ) {
101- pipeline = pipelines .get (PIPELINE .COMPLETE );
103+ pipeline = pipelines .get (PIPELINE .COMPLETE );
102104 } else {
103105 pipeline = pipelines .get (PIPELINE .BASIC );
104106 }
105107 return annotateText (text , id , pipeline , store );
106108 }
107-
108- public AnnotatedText annotateText (String text , Object id , StanfordCoreNLP pipeline , boolean store ) {
109+
110+ public AnnotatedText annotateText (String text , Object id , StanfordCoreNLP pipeline , boolean store ) {
109111 AnnotatedText result = new AnnotatedText (id );
110- Annotation document = new Annotation (text );
112+ Annotation document = new Annotation (text );
111113 pipeline .annotate (document );
112114 List <CoreMap > sentences = document .get (CoreAnnotations .SentencesAnnotation .class );
113115 final AtomicInteger sentenceSequence = new AtomicInteger (0 );
@@ -121,13 +123,15 @@ public AnnotatedText annotateText(String text, Object id, StanfordCoreNLP pipeli
121123 extractPhrases (sentence , newSentence );
122124 result .addSentence (newSentence );
123125 });
126+ extractRelationship (result , sentences , document );
124127 return result ;
125128 }
126129
127130 protected void extractPhrases (CoreMap sentence , Sentence newSentence ) {
128131 Tree tree = sentence .get (TreeCoreAnnotations .TreeAnnotation .class );
129- if (tree == null )
132+ if (tree == null ) {
130133 return ;
134+ }
131135 Set <PhraseHolder > extractedPhrases = inspectSubTree (tree );
132136 extractedPhrases .stream ().forEach ((holder ) -> {
133137 newSentence .addPhraseOccurrence (holder .getBeginPosition (), holder .getEndPosition (), new Phrase (holder .getPhrase ()));
@@ -199,6 +203,41 @@ protected void extractTokens(CoreMap sentence, final Sentence newSentence) {
199203 }
200204 }
201205
206+ private void extractRelationship (AnnotatedText annotatedText , List <CoreMap > sentences , Annotation document ) {
207+ Map <Integer , CorefChain > corefChains = document .get (CorefCoreAnnotations .CorefChainAnnotation .class );
208+ if (corefChains != null ) {
209+ for (CorefChain chain : corefChains .values ()) {
210+ CorefChain .CorefMention representative = chain .getRepresentativeMention ();
211+ int representativeSenteceNumber = representative .sentNum - 1 ;
212+ List <CoreLabel > representativeTokens = sentences .get (representativeSenteceNumber ).get (CoreAnnotations .TokensAnnotation .class );
213+ int beginPosition = representativeTokens .get (representative .startIndex - 1 ).beginPosition ();
214+ int endPosition = representativeTokens .get (representative .endIndex - 1 ).endPosition ();
215+ Phrase representativePhraseOccurrence = annotatedText .getSentences ().get (representativeSenteceNumber ).getPhraseOccurrence (beginPosition , endPosition );
216+ if (representativePhraseOccurrence == null ) {
217+ LOG .warn ("Representative Phrase not found: " + representative .mentionSpan );
218+ }
219+ for (CorefChain .CorefMention mention : chain .getMentionsInTextualOrder ()) {
220+ if (mention == representative ) {
221+ continue ;
222+ }
223+ int mentionSentenceNumber = mention .sentNum - 1 ;
224+
225+ List <CoreLabel > mentionTokens = sentences .get (mentionSentenceNumber ).get (CoreAnnotations .TokensAnnotation .class );
226+ int beginPositionMention = mentionTokens .get (mention .startIndex - 1 ).beginPosition ();
227+ int endPositionMention = mentionTokens .get (mention .endIndex - 1 ).endPosition ();
228+ Phrase mentionPhraseOccurrence = annotatedText .getSentences ().get (representativeSenteceNumber ).getPhraseOccurrence (beginPositionMention , endPositionMention );
229+ if (mentionPhraseOccurrence == null ) {
230+ LOG .warn ("Mention Phrase not found: " + mention .mentionSpan );
231+ }
232+ if (representativePhraseOccurrence != null
233+ && mentionPhraseOccurrence != null ) {
234+ mentionPhraseOccurrence .setReference (representativePhraseOccurrence );
235+ }
236+ }
237+ }
238+ }
239+ }
240+
202241 public AnnotatedText sentiment (AnnotatedText annotated ) {
203242 StanfordCoreNLP pipeline = pipelines .get (PIPELINE .SENTIMENT );
204243 annotated .getSentences ().parallelStream ().forEach ((item ) -> {
@@ -216,8 +255,9 @@ public AnnotatedText sentiment(AnnotatedText annotated) {
216255 private int extractSentiment (CoreMap sentence ) {
217256 Tree tree = sentence
218257 .get (SentimentCoreAnnotations .SentimentAnnotatedTree .class );
219- if (tree == null )
258+ if (tree == null ) {
220259 return Sentence .NO_SENTIMENT ;
260+ }
221261 int score = RNNCoreAnnotations .getPredictedClass (tree );
222262 return score ;
223263 }
0 commit comments