Skip to content
This repository was archived by the owner on May 6, 2021. It is now read-only.

Commit 1b0cb3b

Browse files
committed
added phrases and relationships, refactoring, bug fixes, class for profiling
1 parent 1fde6fe commit 1b0cb3b

File tree

13 files changed

+260
-76
lines changed

13 files changed

+260
-76
lines changed

src/main/java/com/graphaware/nlp/domain/AnnotatedText.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import static com.graphaware.nlp.domain.Relationships.CONTAINS_SENTENCE;
2020
import static com.graphaware.nlp.domain.Relationships.FIRST_SENTENCE;
2121
import static com.graphaware.nlp.domain.Relationships.NEXT_SENTENCE;
22+
import static com.graphaware.nlp.domain.Relationships.REFER_TO;
2223
import java.util.ArrayList;
2324
import java.util.HashMap;
2425
import java.util.List;
@@ -57,7 +58,8 @@ public Node storeOnGraph(GraphDatabaseService database) {
5758
annotatedTextNode.setProperty(Properties.NUM_TERMS, getTokens().size());
5859
final AtomicReference<Node> previousSentenceReference = new AtomicReference<>();
5960

60-
sentences.stream().map((sentence) -> sentence.storeOnGraph(database)).forEach((sentenceNode) -> {
61+
sentences.stream().forEach((sentence) -> {
62+
Node sentenceNode = sentence.storeOnGraph(database);
6163
annotatedTextNode.createRelationshipTo(sentenceNode, CONTAINS_SENTENCE);
6264
Node previousSentence = previousSentenceReference.get();
6365
if (previousSentence == null) {
@@ -66,6 +68,14 @@ public Node storeOnGraph(GraphDatabaseService database) {
6668
previousSentence.createRelationshipTo(sentenceNode, NEXT_SENTENCE);
6769
}
6870
previousSentenceReference.set(sentenceNode);
71+
List<Phrase> phraseOccurrences = sentence.getPhraseOccurrence();
72+
phraseOccurrences.stream().forEach((phrase) -> {
73+
if (phrase.getReference() != null) {
74+
Node phraseNode = phrase.getOrCreate(database);
75+
Node referredPhraseNode = phrase.getReference().getOrCreate(database);
76+
phraseNode.createRelationshipTo(referredPhraseNode, REFER_TO);
77+
}
78+
});
6979
});
7080
tmpAnnotatedNode = annotatedTextNode;
7181
} else {

src/main/java/com/graphaware/nlp/domain/Labels.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,5 +24,7 @@ public enum Labels implements Label {
2424
AnnotatedText,
2525
Sentence,
2626
Tag,
27-
Phrase
27+
Phrase,
28+
PhraseOccurrence,
29+
TagOccurrence
2830
}

src/main/java/com/graphaware/nlp/domain/Phrase.java

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,16 @@
1515
*/
1616
package com.graphaware.nlp.domain;
1717

18-
public class Phrase {
18+
import static com.graphaware.nlp.domain.Labels.Phrase;
19+
import static com.graphaware.nlp.domain.Properties.CONTENT_VALUE;
20+
import java.util.Objects;
21+
import org.neo4j.graphdb.GraphDatabaseService;
22+
import org.neo4j.graphdb.Node;
23+
24+
public class Phrase implements Persistable {
1925
private final String content;
2026
private Phrase reference;
27+
private Node phraseNode;
2128

2229
public Phrase(String content) {
2330
this.content = content.trim();
@@ -34,11 +41,37 @@ public boolean equals(Object o) {
3441
return this.content.equalsIgnoreCase(((Phrase)o).content);
3542
}
3643

44+
@Override
45+
public int hashCode() {
46+
int hash = 3;
47+
hash = 37 * hash + Objects.hashCode(this.content);
48+
return hash;
49+
}
50+
3751
public Phrase getReference() {
3852
return reference;
3953
}
4054

4155
public void setReference(Phrase reference) {
4256
this.reference = reference;
4357
}
58+
59+
@Override
60+
public Node storeOnGraph(GraphDatabaseService database) {
61+
phraseNode = getOrCreate(database);
62+
return phraseNode;
63+
}
64+
65+
public Node getOrCreate(GraphDatabaseService database) {
66+
if (phraseNode != null) {
67+
return phraseNode;
68+
}
69+
phraseNode = database.findNode(Phrase, CONTENT_VALUE, content);
70+
if (phraseNode != null) {
71+
return phraseNode;
72+
}
73+
phraseNode = database.createNode(Phrase);
74+
phraseNode.setProperty(CONTENT_VALUE, content);
75+
return phraseNode;
76+
}
4477
}

src/main/java/com/graphaware/nlp/domain/Properties.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@ public final class Properties {
2828
public static final String HASH = "hash";
2929
public static final String TEXT = "text";
3030
public static final String NUM_TERMS = "numTerms";
31+
public static final String SENTENCE_NUMBER = "sentenceNumber";
32+
public static final String START_POSITION = "startPosition";
33+
public static final String END_POSITION = "endPosition";
34+
public static final String CONTENT_VALUE = "value";
3135

3236
/**
3337
* Private constructor to prevent people from instantiating this class - it's not meant to be instantiated.

src/main/java/com/graphaware/nlp/domain/Relationships.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,5 +26,11 @@ public enum Relationships implements RelationshipType {
2626
IS_RELATED_TO,
2727
SIMILARITY_COSINE,
2828
FIRST_SENTENCE,
29-
NEXT_SENTENCE
29+
NEXT_SENTENCE,
30+
HAS_PHRASE,
31+
SENTENCE_TAG_OCCURRENCE,
32+
TAG_OCCURRENCE_TAG,
33+
SENTENCE_PHRASE_OCCURRENCE,
34+
PHRASE_OCCURRENCE_PHRASE,
35+
REFER_TO
3036
}

src/main/java/com/graphaware/nlp/domain/Sentence.java

Lines changed: 87 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,22 @@
1515
*/
1616
package com.graphaware.nlp.domain;
1717

18+
import static com.graphaware.nlp.domain.Labels.PhraseOccurrence;
1819
import static com.graphaware.nlp.domain.SentimentLabels.*;
1920
import static com.graphaware.nlp.domain.Labels.Sentence;
21+
import static com.graphaware.nlp.domain.Labels.TagOccurrence;
22+
import static com.graphaware.nlp.domain.Properties.END_POSITION;
2023
import static com.graphaware.nlp.domain.Properties.HASH;
2124
import static com.graphaware.nlp.domain.Properties.PROPERTY_ID;
25+
import static com.graphaware.nlp.domain.Properties.SENTENCE_NUMBER;
26+
import static com.graphaware.nlp.domain.Properties.START_POSITION;
2227
import static com.graphaware.nlp.domain.Properties.TEXT;
28+
import static com.graphaware.nlp.domain.Relationships.HAS_PHRASE;
2329
import static com.graphaware.nlp.domain.Relationships.HAS_TAG;
30+
import static com.graphaware.nlp.domain.Relationships.PHRASE_OCCURRENCE_PHRASE;
31+
import static com.graphaware.nlp.domain.Relationships.SENTENCE_PHRASE_OCCURRENCE;
32+
import static com.graphaware.nlp.domain.Relationships.SENTENCE_TAG_OCCURRENCE;
33+
import static com.graphaware.nlp.domain.Relationships.TAG_OCCURRENCE_TAG;
2434
import static com.graphaware.nlp.util.HashFunctions.MD5;
2535
import java.util.ArrayList;
2636
import java.util.Collection;
@@ -31,28 +41,32 @@
3141
import org.neo4j.graphdb.Node;
3242
import org.neo4j.graphdb.Relationship;
3343
import org.neo4j.graphdb.ResourceIterator;
44+
import org.neo4j.graphdb.Transaction;
3445

3546
public class Sentence implements Persistable {
3647

3748
public static final int NO_SENTIMENT = -1;
3849

3950
private final Map<String, Tag> tags;
40-
private Map<Integer, PartOfTextOccurrence<Tag>> tagOccurrences;
41-
private Map<Integer, Map<Integer, PartOfTextOccurrence<Phrase>>> phraseOccurrences;
51+
private Map<Integer, PartOfTextOccurrence<Tag>> tagOccurrences = new HashMap<>();
52+
private Map<Integer, Map<Integer, PartOfTextOccurrence<Phrase>>> phraseOccurrences = new HashMap<>();
4253

4354
private final String sentence;
4455
private int sentiment = NO_SENTIMENT;
4556

4657
private boolean store = false;
4758
private String id;
59+
private int sentenceNumber;
4860

49-
public Sentence(String sentence, boolean store, String id) {
61+
public Sentence(String sentence, boolean store, String id, int sentenceNumber) {
5062
this(sentence, id);
5163
this.store = store;
64+
this.sentenceNumber = sentenceNumber;
5265
}
5366

5467
public Sentence(String sentence, String id) {
5568
this.tags = new HashMap<>();
69+
this.tagOccurrences = new HashMap<>();
5670
this.sentence = sentence;
5771
this.id = id;
5872
}
@@ -61,11 +75,14 @@ public Collection<Tag> getTags() {
6175
return tags.values();
6276
}
6377

64-
public void addTag(Tag tag) {
78+
public Tag addTag(Tag tag) {
6579
if (tags.containsKey(tag.getLemma())) {
66-
tags.get(tag.getLemma()).incMultiplicity();
80+
Tag result = tags.get(tag.getLemma());
81+
result.incMultiplicity();
82+
return result;
6783
} else {
6884
tags.put(tag.getLemma(), tag);
85+
return tag;
6986
}
7087
}
7188

@@ -85,9 +102,6 @@ public void addTagOccurrence(int begin, int end, Tag tag) {
85102
if (begin < 0) {
86103
throw new RuntimeException("Begin cannot be negative (for tag: " + tag.getLemma() + ")");
87104
}
88-
if (tagOccurrences == null) {
89-
tagOccurrences = new HashMap<>();
90-
}
91105
//Will update end if already exist
92106
tagOccurrences.put(begin, new PartOfTextOccurrence<>(tag, begin, end));
93107
}
@@ -144,30 +158,78 @@ public Phrase getPhraseOccurrence(int begin, int end) {
144158
Map<Integer, PartOfTextOccurrence<Phrase>> occurrences = phraseOccurrences.get(begin);
145159

146160
if (occurrences != null && occurrences.containsKey(end)) {
147-
return occurrences.get(end).getElement();
161+
return occurrences.get(end).getElement();
148162
}
149163
return null;
150164
}
151165

166+
public List<Phrase> getPhraseOccurrence() {
167+
List<Phrase> result = new ArrayList<>();
168+
phraseOccurrences.values().stream().forEach((phraseList) -> {
169+
phraseList.values().stream().forEach((item) -> {
170+
result.add(item.getElement());
171+
});
172+
});
173+
174+
return result;
175+
176+
}
177+
152178
@Override
153179
public Node storeOnGraph(GraphDatabaseService database) {
154-
Node sequenceNode = checkIfExist(database, id);
155-
if (sequenceNode == null) {
156-
Node newSentenceNode = database.createNode(Sentence);
157-
newSentenceNode.setProperty(HASH, MD5(sentence));
158-
newSentenceNode.setProperty(PROPERTY_ID, id);
159-
if (store) {
160-
newSentenceNode.setProperty(TEXT, sentence);
180+
Node sentenceNode = checkIfExist(database, id);
181+
if (sentenceNode == null) {
182+
try (Transaction tx = database.beginTx();) {
183+
Node newSentenceNode = database.createNode(Sentence);
184+
newSentenceNode.setProperty(HASH, MD5(sentence));
185+
newSentenceNode.setProperty(PROPERTY_ID, id);
186+
newSentenceNode.setProperty(SENTENCE_NUMBER, sentenceNumber);
187+
if (store) {
188+
newSentenceNode.setProperty(TEXT, sentence);
189+
}
190+
storeTags(database, newSentenceNode);
191+
storePhrases(database, newSentenceNode);
192+
sentenceNode = newSentenceNode;
193+
assignSentimentLabel(sentenceNode);
194+
tx.success();
161195
}
162-
tags.values().stream().forEach((tag) -> {
163-
Node tagNode = tag.storeOnGraph(database);
164-
Relationship hasTagRel = newSentenceNode.createRelationshipTo(tagNode, HAS_TAG);
165-
hasTagRel.setProperty("tf", tag.getMultiplicity());
196+
} else {
197+
assignSentimentLabel(sentenceNode);
198+
}
199+
return sentenceNode;
200+
}
201+
202+
private void storeTags(GraphDatabaseService database, Node newSentenceNode) {
203+
tags.values().stream().forEach((tag) -> {
204+
Node tagNode = tag.storeOnGraph(database);
205+
Relationship hasTagRel = newSentenceNode.createRelationshipTo(tagNode, HAS_TAG);
206+
hasTagRel.setProperty("tf", tag.getMultiplicity());
207+
});
208+
tagOccurrences.values().stream().forEach((tagOccurrenceAtPosition) -> {
209+
Node tagNode = tagOccurrenceAtPosition.getElement().getOrCreate(database);
210+
Node tagOccurrenceNode = database.createNode(TagOccurrence);
211+
tagOccurrenceNode.setProperty(START_POSITION, tagOccurrenceAtPosition.getSpan().first());
212+
tagOccurrenceNode.setProperty(END_POSITION, tagOccurrenceAtPosition.getSpan().second());
213+
newSentenceNode.createRelationshipTo(tagOccurrenceNode, SENTENCE_TAG_OCCURRENCE);
214+
tagOccurrenceNode.createRelationshipTo(tagNode, TAG_OCCURRENCE_TAG);
215+
});
216+
}
217+
218+
private void storePhrases(GraphDatabaseService database, Node newSentenceNode) {
219+
if (phraseOccurrences != null) {
220+
phraseOccurrences.values().stream().forEach((phraseOccurrencesAtPosition) -> {
221+
phraseOccurrencesAtPosition.values().stream().forEach((phraseOccurrence) -> {
222+
Node phraseNode = phraseOccurrence.getElement().storeOnGraph(database);
223+
newSentenceNode.createRelationshipTo(phraseNode, HAS_PHRASE);
224+
Node phraseOccurrenceNode = database.createNode(PhraseOccurrence);
225+
phraseOccurrenceNode.setProperty(START_POSITION, phraseOccurrence.getSpan().first());
226+
phraseOccurrenceNode.setProperty(END_POSITION, phraseOccurrence.getSpan().second());
227+
newSentenceNode.createRelationshipTo(phraseOccurrenceNode, SENTENCE_PHRASE_OCCURRENCE);
228+
phraseOccurrenceNode.createRelationshipTo(phraseNode, PHRASE_OCCURRENCE_PHRASE);
229+
//TODO: Add relationship with tags
230+
});
166231
});
167-
sequenceNode = newSentenceNode;
168232
}
169-
assignSentimentLabel(sequenceNode);
170-
return sequenceNode;
171233
}
172234

173235
private void assignSentimentLabel(Node sentenceNode) {
@@ -202,7 +264,8 @@ public static Sentence load(Node sentenceNode) {
202264
}
203265
String text = (String) sentenceNode.getProperty(TEXT);
204266
String id = (String) sentenceNode.getProperty(PROPERTY_ID);
205-
return new Sentence(text, true, id);
267+
Integer sentenceNumber = (Integer) sentenceNode.getProperty(SENTENCE_NUMBER);
268+
return new Sentence(text, true, id, sentenceNumber);
206269
}
207270

208271
private Node checkIfExist(GraphDatabaseService database, Object id) {

src/main/java/com/graphaware/nlp/domain/Tag.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
package com.graphaware.nlp.domain;
1818

1919
import static com.graphaware.nlp.domain.Labels.Tag;
20+
import static com.graphaware.nlp.domain.Properties.CONTENT_VALUE;
2021
import java.util.Collection;
2122
import java.util.HashMap;
2223
import java.util.HashSet;
@@ -89,13 +90,13 @@ public Node storeOnGraph(GraphDatabaseService database) {
8990
return tagNode;
9091
}
9192

92-
private Node getOrCreate(GraphDatabaseService database) {
93-
Node tagNode = database.findNode(Tag, "value", lemma);
93+
public Node getOrCreate(GraphDatabaseService database) {
94+
Node tagNode = database.findNode(Tag, CONTENT_VALUE, lemma);
9495
if (tagNode != null) {
9596
return tagNode;
9697
}
9798
tagNode = database.createNode(Tag);
98-
tagNode.setProperty("value", lemma);
99+
tagNode.setProperty(CONTENT_VALUE, lemma);
99100
if (ne != null) {
100101
tagNode.setProperty("ne", ne);
101102
}

src/main/java/com/graphaware/nlp/procedure/NLPProcedure.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ public class NLPProcedure {
6666
private static final String PARAMETER_NAME_LANG = "lang";
6767
private static final String PARAMETER_NAME_ADMITTED_RELATIONSHIPS = "admittedRelationships";
6868
private static final String PARAMETER_NAME_ID = "id";
69-
private static final String PARAMETER_NAME_SENTIMENT = "sentiment";
69+
private static final String PARAMETER_NAME_DEEP_LEVEL = "nlpDepth";
7070
private static final String PARAMETER_NAME_STORE_TEXT = "store";
7171
private static final String PARAMETER_NAME_INPUT_OUTPUT = "result";
7272
private static final String PARAMETER_NAME_SCORE = "score";
@@ -100,11 +100,11 @@ public RawIterator<Object[], ProcedureException> apply(Context ctx, Object[] inp
100100
return Iterators.asRawIterator(Collections.<Object[]>emptyIterator());
101101
}
102102
Object id = inputParams.get(PARAMETER_NAME_ID);
103-
boolean sentiment = (Boolean) inputParams.getOrDefault(PARAMETER_NAME_SENTIMENT, false);
103+
int level = ((Long) inputParams.getOrDefault(PARAMETER_NAME_DEEP_LEVEL, 0l)).intValue();
104104
boolean store = (Boolean) inputParams.getOrDefault(PARAMETER_NAME_STORE_TEXT, true);
105105
Node annotatedText = checkIfExist(id);
106106
if (annotatedText == null) {
107-
AnnotatedText annotateText = textProcessor.annotateText(text, id, sentiment, store);
107+
AnnotatedText annotateText = textProcessor.annotateText(text, id, level, store);
108108
annotatedText = annotateText.storeOnGraph(database);
109109
}
110110
return Iterators.asRawIterator(Collections.<Object[]>singleton(new Object[]{annotatedText}).iterator());
@@ -162,7 +162,7 @@ public RawIterator<Object[], ProcedureException> apply(CallableProcedure.Context
162162
if (filter == null) {
163163
throw new RuntimeException("A filter value needs to be provided");
164164
}
165-
AnnotatedText annotatedText = textProcessor.annotateText(text, 0, false, false);
165+
AnnotatedText annotatedText = textProcessor.annotateText(text, 0, 0, false);
166166
return Iterators.asRawIterator(Collections.<Object[]>singleton(new Object[]{annotatedText.filter(filter)}).iterator());
167167
}
168168
};
@@ -258,7 +258,7 @@ public CallableProcedure.BasicProcedure search() {
258258
@Override
259259
public RawIterator<Object[], ProcedureException> apply(CallableProcedure.Context ctx, Object[] input) throws ProcedureException {
260260
String text = (String) input[0];
261-
AnnotatedText annotateText = textProcessor.annotateText(text, 0, false, false);
261+
AnnotatedText annotateText = textProcessor.annotateText(text, 0, 0, false);
262262
List<String> tokens = annotateText.getTokens();
263263
Map<String, Object> params = new HashMap<>();
264264
params.put("tokens", tokens);

src/main/java/com/graphaware/nlp/processor/PipelineBuilder.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ private void checkForExistingAnnotators() {
3030
public PipelineBuilder extractSentiment() {
3131
checkForExistingAnnotators();
3232
annotattors.append("parse, sentiment");
33+
//properties.setProperty("parse.model", "edu/stanford/nlp/models/srparser/englishSR.ser.gz");
3334
return this;
3435
}
3536

0 commit comments

Comments
 (0)