Skip to content

Commit 26cac4f

Browse files
committed
OPENNLP-1757: Expose probs() method in thread-safe probabilistic ME classes (OpenNLP 2.x)
- adds Probabilistic marker interface to API - adjusts classic ME related thread-safe classes to implement common probs() method via Probabilistic interface - declares non-common probs-like methods 'getSentenceProbabilities' and 'getTokenProbabilities' deprecated as 'probs()' must now be implemented by all ME classes. - improves JavaDoc along the path
1 parent 1700b9f commit 26cac4f

25 files changed

+235
-88
lines changed

opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
import opennlp.tools.ml.BeamSearch;
2626
import opennlp.tools.ml.EventTrainer;
27+
import opennlp.tools.ml.Probabilistic;
2728
import opennlp.tools.ml.SequenceTrainer;
2829
import opennlp.tools.ml.TrainerFactory;
2930
import opennlp.tools.ml.TrainerFactory.TrainerType;
@@ -40,10 +41,13 @@
4041
import opennlp.tools.util.TrainingParameters;
4142

4243
/**
43-
* The class represents a maximum-entropy-based {@link Chunker}. This chunker can be used to
44+
* The class represents a maximum-entropy-based {@link Chunker}. A chunker can be used to
4445
* find flat structures based on sequence inputs such as noun phrases or named entities.
46+
*
47+
* @see Chunker
48+
* @see Probabilistic
4549
*/
46-
public class ChunkerME implements Chunker {
50+
public class ChunkerME implements Chunker, Probabilistic {
4751

4852
public static final int DEFAULT_BEAM_SIZE = 10;
4953

@@ -128,12 +132,13 @@ public void probs(double[] probs) {
128132
}
129133

130134
/**
131-
* Returns an array with the probabilities of the last decoded sequence. The
132-
* sequence was determined based on the previous call to {@link #chunk(String[], String[])}.
135+
* {@inheritDoc}
136+
* The sequence was determined based on the previous call to {@link #chunk(String[], String[])}.
133137
*
134138
* @return An array with the same number of probabilities as tokens when
135139
* {@link ChunkerME#chunk(String[], String[])} was last called.
136140
*/
141+
@Override
137142
public double[] probs() {
138143
return bestSequence.getProbs();
139144
}

opennlp-tools/src/main/java/opennlp/tools/chunker/ThreadSafeChunkerME.java

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,27 +18,30 @@
1818
package opennlp.tools.chunker;
1919

2020
import opennlp.tools.commons.ThreadSafe;
21+
import opennlp.tools.ml.Probabilistic;
2122
import opennlp.tools.util.Sequence;
2223
import opennlp.tools.util.Span;
2324

2425
/**
2526
* A thread-safe version of the {@link ChunkerME}. Using it is completely transparent.
2627
* You can use it in a single-threaded context as well, it only incurs a minimal overhead.
27-
*
28-
* @implNote
28+
* <p>
29+
* <b>Note:</b><br/>
2930
* This implementation uses a {@link ThreadLocal}. Although the implementation is
3031
* lightweight because the model is not duplicated, if you have many long-running threads,
3132
* you may run into memory problems.
3233
* <p>
3334
* Be careful when using this in a Jakarta EE application, for example.
3435
* </p>
35-
* The user is responsible for clearing the {@link ThreadLocal}.
36+
* The user is responsible for clearing the {@link ThreadLocal}
37+
* via calling {@link #close()}.
3638
*
3739
* @see Chunker
3840
* @see ChunkerME
41+
* @see Probabilistic
3942
*/
4043
@ThreadSafe
41-
public class ThreadSafeChunkerME implements Chunker, AutoCloseable {
44+
public class ThreadSafeChunkerME implements Chunker, Probabilistic, AutoCloseable {
4245

4346
private final ChunkerModel model;
4447

@@ -88,4 +91,8 @@ public void close() {
8891
threadLocal.remove();
8992
}
9093

94+
@Override
95+
public double[] probs() {
96+
return getChunker().probs();
97+
}
9198
}

opennlp-tools/src/main/java/opennlp/tools/langdetect/ThreadSafeLanguageDetectorME.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,16 @@
2222
/**
2323
* A thread-safe version of the {@link LanguageDetectorME}. Using it is completely transparent.
2424
* You can use it in a single-threaded context as well, it only incurs a minimal overhead.
25-
*
26-
* @implNote
25+
* <p>
26+
* <b>Note:</b><br/>
2727
* This implementation uses a {@link ThreadLocal}. Although the implementation is
2828
* lightweight because the model is not duplicated, if you have many long-running threads,
2929
* you may run into memory problems.
3030
* <p>
3131
* Be careful when using this in a Jakarta EE application, for example.
3232
* </p>
33-
* The user is responsible for clearing the {@link ThreadLocal}.
33+
* The user is responsible for clearing the {@link ThreadLocal}
34+
* via calling {@link #close()}.
3435
*
3536
* @see LanguageDetector
3637
* @see LanguageDetectorME

opennlp-tools/src/main/java/opennlp/tools/lemmatizer/LemmatizerME.java

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import opennlp.tools.ml.BeamSearch;
2828
import opennlp.tools.ml.EventModelSequenceTrainer;
2929
import opennlp.tools.ml.EventTrainer;
30+
import opennlp.tools.ml.Probabilistic;
3031
import opennlp.tools.ml.SequenceTrainer;
3132
import opennlp.tools.ml.TrainerFactory;
3233
import opennlp.tools.ml.TrainerFactory.TrainerType;
@@ -50,8 +51,10 @@
5051
* Towards a Machine-Learning Architecture for Lexical Functional Grammar Parsing.
5152
* </a> PhD dissertation, Dublin City University
5253
*
54+
* @see Lemmatizer
55+
* @see Probabilistic
5356
*/
54-
public class LemmatizerME implements Lemmatizer {
57+
public class LemmatizerME implements Lemmatizer, Probabilistic {
5558

5659
public static final int LEMMA_NUMBER = 29;
5760
public static final int DEFAULT_BEAM_SIZE = 3;
@@ -100,8 +103,7 @@ public String[] lemmatize(String[] toks, String[] tags) {
100103
}
101104

102105
@Override
103-
public List<List<String>> lemmatize(List<String> toks,
104-
List<String> tags) {
106+
public List<List<String>> lemmatize(List<String> toks, List<String> tags) {
105107
String[] tokens = toks.toArray(new String[0]);
106108
String[] posTags = tags.toArray(new String[0]);
107109
String[][] allLemmas = predictLemmas(LEMMA_NUMBER, tokens, posTags);
@@ -225,13 +227,15 @@ public void probs(double[] probs) {
225227
}
226228

227229
/**
228-
* Returns an array with the probabilities of the last decoded sequence.
230+
* {@inheritDoc}
231+
*
229232
* The sequence was determined based on the previous call to
230233
* {@link #lemmatize(String[], String[])}.
231234
*
232235
* @return An array with the same number of probabilities as tokens were sent to
233236
* {@link #lemmatize(String[], String[])} when it was last called.
234237
*/
238+
@Override
235239
public double[] probs() {
236240
return bestSequence.getProbs();
237241
}

opennlp-tools/src/main/java/opennlp/tools/lemmatizer/ThreadSafeLemmatizerME.java

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,25 +20,27 @@
2020
import java.util.List;
2121

2222
import opennlp.tools.commons.ThreadSafe;
23+
import opennlp.tools.ml.Probabilistic;
2324

2425
/**
2526
* A thread-safe version of the {@link LemmatizerME}. Using it is completely transparent.
2627
* You can use it in a single-threaded context as well, it only incurs a minimal overhead.
27-
*
28-
* @implNote
28+
* <p>
29+
* <b>Note:</b><br/>
2930
* This implementation uses a {@link ThreadLocal}. Although the implementation is
3031
* lightweight because the model is not duplicated, if you have many long-running threads,
3132
* you may run into memory problems.
3233
* <p>
3334
* Be careful when using this in a Jakarta EE application, for example.
3435
* </p>
35-
* The user is responsible for clearing the {@link ThreadLocal}.
36+
* The user is responsible for clearing the {@link ThreadLocal}
37+
* via calling {@link #close()}.
3638
*
3739
* @see Lemmatizer
3840
* @see LemmatizerME
3941
*/
4042
@ThreadSafe
41-
public class ThreadSafeLemmatizerME implements Lemmatizer, AutoCloseable {
43+
public class ThreadSafeLemmatizerME implements Lemmatizer, Probabilistic, AutoCloseable {
4244

4345
private final LemmatizerModel model;
4446

@@ -73,6 +75,11 @@ public List<List<String>> lemmatize(List<String> toks, List<String> tags) {
7375
return getLemmatizer().lemmatize(toks, tags);
7476
}
7577

78+
@Override
79+
public double[] probs() {
80+
return getLemmatizer().probs();
81+
}
82+
7683
@Override
7784
public void close() {
7885
threadLocal.remove();
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package opennlp.tools.ml;
19+
20+
/**
21+
* A marker interface for classes with probabilistic capabilities.
22+
*/
23+
public interface Probabilistic {
24+
25+
/**
26+
* Retrieves the probabilities of the last decoded sequence.
27+
*
28+
* @return An array with the same number of probabilities as tokens were sent to
29+
* the computational method when it was last called.
30+
*/
31+
double[] probs();
32+
}

opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import opennlp.tools.ml.BeamSearch;
3131
import opennlp.tools.ml.EventModelSequenceTrainer;
3232
import opennlp.tools.ml.EventTrainer;
33+
import opennlp.tools.ml.Probabilistic;
3334
import opennlp.tools.ml.SequenceTrainer;
3435
import opennlp.tools.ml.TrainerFactory;
3536
import opennlp.tools.ml.TrainerFactory.TrainerType;
@@ -48,8 +49,11 @@
4849

4950
/**
5051
* A maximum-entropy-based {@link TokenNameFinder name finder} implementation.
52+
*
53+
* @see Probabilistic
54+
* @see TokenNameFinder
5155
*/
52-
public class NameFinderME implements TokenNameFinder {
56+
public class NameFinderME implements TokenNameFinder, Probabilistic {
5357

5458
private static final String[][] EMPTY = new String[0][0];
5559
public static final int DEFAULT_BEAM_SIZE = 3;
@@ -135,12 +139,14 @@ public void probs(double[] probs) {
135139
}
136140

137141
/**
138-
* Retrieves the probabilities of the last decoded sequence. The
139-
* sequence was determined based on the previous call to {@link #find(String[])}.
142+
* {@inheritDoc}
143+
*
144+
* The sequence was determined based on the previous call to {@link #find(String[])}.
140145
*
141146
* @return An array with the same number of probabilities as tokens were sent
142147
* to {@link #find(String[])} when it was last called.
143148
*/
149+
@Override
144150
public double[] probs() {
145151
return bestSequence.getProbs();
146152
}

opennlp-tools/src/main/java/opennlp/tools/namefind/ThreadSafeNameFinderME.java

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,26 +18,28 @@
1818
package opennlp.tools.namefind;
1919

2020
import opennlp.tools.commons.ThreadSafe;
21+
import opennlp.tools.ml.Probabilistic;
2122
import opennlp.tools.util.Span;
2223

2324
/**
2425
* A thread-safe version of {@link NameFinderME}. Using it is completely transparent.
2526
* You can use it in a single-threaded context as well, it only incurs a minimal overhead.
26-
*
27-
* @implNote
27+
* <p>
28+
* <b>Note:</b><br/>
2829
* This implementation uses a {@link ThreadLocal}. Although the implementation is
2930
* lightweight because the model is not duplicated, if you have many long-running threads,
3031
* you may run into memory problems.
3132
* <p>
3233
* Be careful when using this in a Jakarta EE application, for example.
3334
* </p>
34-
* The user is responsible for clearing the {@link ThreadLocal}.
35+
* The user is responsible for clearing the {@link ThreadLocal} via calling {@link #close()}.
3536
*
3637
* @see NameFinderME
38+
* @see Probabilistic
3739
* @see TokenNameFinder
3840
*/
3941
@ThreadSafe
40-
public class ThreadSafeNameFinderME implements TokenNameFinder, AutoCloseable {
42+
public class ThreadSafeNameFinderME implements TokenNameFinder, Probabilistic, AutoCloseable {
4143

4244
private final TokenNameFinderModel model;
4345

@@ -68,6 +70,11 @@ public Span[] find(String[] tokens) {
6870
return getNameFinder().find(tokens);
6971
}
7072

73+
@Override
74+
public double[] probs() {
75+
return getNameFinder().probs();
76+
}
77+
7178
@Override
7279
public void clearAdaptiveData() {
7380
getNameFinder().clearAdaptiveData();

opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerME.java

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import opennlp.tools.ml.BeamSearch;
3434
import opennlp.tools.ml.EventModelSequenceTrainer;
3535
import opennlp.tools.ml.EventTrainer;
36+
import opennlp.tools.ml.Probabilistic;
3637
import opennlp.tools.ml.SequenceTrainer;
3738
import opennlp.tools.ml.TrainerFactory;
3839
import opennlp.tools.ml.TrainerFactory.TrainerType;
@@ -59,8 +60,9 @@
5960
* @see POSModel
6061
* @see POSTagFormat
6162
* @see POSTagger
63+
* @see Probabilistic
6264
*/
63-
public class POSTaggerME implements POSTagger {
65+
public class POSTaggerME implements POSTagger, Probabilistic {
6466

6567
private static final Logger logger = LoggerFactory.getLogger(POSTaggerME.class);
6668

@@ -245,8 +247,14 @@ public void probs(double[] probs) {
245247
}
246248

247249
/**
248-
* @return An array with the probabilities for each tag of the last tagged sentence.
250+
* {@inheritDoc}
251+
*
252+
* The sequence was determined based on the previous call to {@link #tag(String[])}.
253+
*
254+
* @return An array with the same number of probabilities as tokens were sent
255+
* to {@link #tag(String[])} when it was last called.
249256
*/
257+
@Override
250258
public double[] probs() {
251259
return bestSequence.getProbs();
252260
}

opennlp-tools/src/main/java/opennlp/tools/postag/ThreadSafePOSTaggerME.java

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,28 +20,31 @@
2020
import java.io.IOException;
2121

2222
import opennlp.tools.commons.ThreadSafe;
23+
import opennlp.tools.ml.Probabilistic;
2324
import opennlp.tools.models.ModelType;
2425
import opennlp.tools.util.DownloadUtil;
2526
import opennlp.tools.util.Sequence;
2627

2728
/**
2829
* A thread-safe version of the {@link POSTaggerME}. Using it is completely transparent.
2930
* You can use it in a single-threaded context as well, it only incurs a minimal overhead.
30-
*
31-
* @implNote
31+
* <p>
32+
* <b>Note:</b><br/>
3233
* This implementation uses a {@link ThreadLocal}. Although the implementation is
3334
* lightweight because the model is not duplicated, if you have many long-running threads,
3435
* you may run into memory problems.
3536
* <p>
3637
* Be careful when using this in a Jakarta EE application, for example.
3738
* </p>
38-
* The user is responsible for clearing the {@link ThreadLocal}.
39+
* The user is responsible for clearing the {@link ThreadLocal}
40+
* via calling {@link #close()}.
3941
*
4042
* @see POSTagger
4143
* @see POSTaggerME
44+
* @see Probabilistic
4245
*/
4346
@ThreadSafe
44-
public class ThreadSafePOSTaggerME implements POSTagger, AutoCloseable {
47+
public class ThreadSafePOSTaggerME implements POSTagger, Probabilistic, AutoCloseable {
4548

4649
private final POSModel model;
4750

@@ -122,6 +125,11 @@ public Sequence[] topKSequences(String[] sentence, Object[] additionaContext) {
122125
return getTagger().topKSequences(sentence, additionaContext);
123126
}
124127

128+
@Override
129+
public double[] probs() {
130+
return getTagger().probs();
131+
}
132+
125133
@Override
126134
public void close() {
127135
threadLocal.remove();

0 commit comments

Comments
 (0)