Skip to content

Commit 385cf9e

Browse files
author
dlutz2
committed
Switching to Emory lemmatizer
1 parent 1122be0 commit 385cf9e

File tree

2 files changed

+16
-7
lines changed

2 files changed

+16
-7
lines changed

pom.xml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -109,11 +109,11 @@
109109
<version>1.7.14</version>
110110
</dependency>
111111

112-
<!-- Stanford NLP core, only word normalizer used -->
112+
<!-- Emory Lemmatizer -->
113113
<dependency>
114-
<groupId>edu.stanford.nlp</groupId>
115-
<artifactId>stanford-corenlp</artifactId>
116-
<version>3.6.0</version>
114+
<groupId>edu.emory.mathcs.nlp</groupId>
115+
<artifactId>nlp4j-morphology</artifactId>
116+
<version>1.1.2</version>
117117
</dependency>
118118

119119
<!-- Part of speech tagger, used in Lexer -->

src/org/opensextant/howler/utils/OWLUtils.java

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,16 @@
4545
import org.slf4j.Logger;
4646
import org.slf4j.LoggerFactory;
4747

48-
import edu.stanford.nlp.process.Morphology;
48+
import edu.emory.mathcs.nlp.common.util.StringUtils;
49+
import edu.emory.mathcs.nlp.component.morph.MorphAnalyzer;
50+
import edu.emory.mathcs.nlp.component.morph.english.EnglishMorphAnalyzer;
4951

5052
public class OWLUtils {
5153

5254
static Map<String, Integer> numbers = new HashMap<String, Integer>();
53-
55+
56+
static MorphAnalyzer lemmatizer = new EnglishMorphAnalyzer();
57+
5458
static {
5559
numbers.put("one", 1);
5660
numbers.put("two", 2);
@@ -199,12 +203,17 @@ public static String normalize(String word, String pos, boolean lower) {
199203
return word;
200204
}
201205

206+
// don't change numbers or Fixed vocab
207+
if (pos.equals("CD") || pos.equals("FIXED")) {
208+
return word;
209+
}
210+
202211
// don't normalize verbs yet
203212
if (pos.startsWith("V")) {
204213
return word;
205214
}
206215

207-
return (Morphology.lemmaStatic(word, pos, lower));
216+
return lemmatizer.lemmatize(StringUtils.toSimplifiedForm(word, lower), pos);
208217
}
209218

210219
public static SubjectPredicateObject rewriteSPO(

0 commit comments

Comments
 (0)