Skip to content

Commit 32fa977

Browse files
mawiesneamensikorzo1
authored
OPENNLP-855: Add SentimentDetector to derive sentiment from text (#579)
* OPENNLP-855: Add SentimentDetector to derive sentiment from text - adapts existing Sentiment code to OpenNLP 3.x module structures - introduces SentimentDetector API interface - cleans up Sentiment Analysis implementation and add tests - fixes broken sequence labeling code (find/predict2) from SentimentME and SentimentDetector; sentiment is a classification task, not sequence labeling - removes getSentimentModel() from SentimentModel (wrapped MaxentModel in unused BeamSearch) - adds toString/equals/hashCode to SentimentSample, remove unused id field - fixes SentimentSampleTypeFilter to actually filter by sentiment type - fixes SentimentDetailedFMeasureListener.asSpanArray() returning null - removes dead detailedFListener code in CLI tools - adds 44 unit tests covering all runtime sentiment classes - enhances dev manual in docs - fine-tunes test classes --------- Co-authored-by: amensiko <anastasija.mensikova@trincoll.edu> Co-authored-by: Richard Zowalla <rzo1@apache.org>
1 parent 9bfd807 commit 32fa977

34 files changed

+2871
-3
lines changed
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package opennlp.tools.sentiment;
19+
20+
public interface SentimentDetector {
21+
22+
/**
23+
* Conducts a sentiment prediction for the specifed sentence.
24+
*
25+
* @param sentence The text to be analysed for its sentiment.
26+
* @return The predicted sentiment.
27+
*/
28+
String predict(String sentence);
29+
30+
/**
31+
* Conducts a sentiment prediction for the specifed sentence.
32+
*
33+
* @param tokens The text to be analysed for its sentiment.
34+
* @return The predicted sentiment.
35+
*/
36+
String predict(String[] tokens);
37+
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package opennlp.tools.sentiment;
19+
20+
import opennlp.tools.util.eval.EvaluationMonitor;
21+
22+
/**
23+
* An sentiment specific {@link EvaluationMonitor} to be used by the evaluator.
24+
*
25+
* @see SentimentSample
26+
*/
27+
public interface SentimentEvaluationMonitor extends EvaluationMonitor<SentimentSample> {
28+
29+
}
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package opennlp.tools.sentiment;
19+
20+
import java.io.Serial;
21+
import java.util.List;
22+
import java.util.Objects;
23+
24+
import opennlp.tools.commons.Sample;
25+
26+
/**
27+
* Class for holding text used for sentiment analysis.
28+
*/
29+
public class SentimentSample implements Sample {
30+
31+
@Serial
32+
private static final long serialVersionUID = 2477213313738337539L;
33+
34+
private final String sentiment;
35+
private final List<String> sentence;
36+
private final boolean isClearAdaptiveData;
37+
38+
/**
39+
* Instantiates a {@link SentimentSample} object.
40+
*
41+
* @param sentiment
42+
* training sentiment
43+
* @param sentence
44+
* training sentence
45+
*/
46+
public SentimentSample(String sentiment, String[] sentence) {
47+
this(sentiment, sentence, true);
48+
}
49+
50+
public SentimentSample(String sentiment, String[] sentence,
51+
boolean clearAdaptiveData) {
52+
if (sentiment == null) {
53+
throw new IllegalArgumentException("sentiment must not be null");
54+
}
55+
if (sentence == null) {
56+
throw new IllegalArgumentException("sentence must not be null");
57+
}
58+
59+
this.sentiment = sentiment;
60+
this.sentence = List.of(sentence);
61+
this.isClearAdaptiveData = clearAdaptiveData;
62+
}
63+
64+
/**
65+
* @return Returns the sentiment.
66+
*/
67+
public String getSentiment() {
68+
return sentiment;
69+
}
70+
71+
/**
72+
* @return Returns the sentence.
73+
*/
74+
public String[] getSentence() {
75+
return sentence.toArray(new String[0]);
76+
}
77+
78+
/**
79+
* @return Returns the value of isClearAdaptiveData, {@code true} or {@code false}.
80+
*/
81+
public boolean isClearAdaptiveDataSet() {
82+
return isClearAdaptiveData;
83+
}
84+
85+
@Override
86+
public String toString() {
87+
return sentiment + " " + String.join(" ", sentence);
88+
}
89+
90+
@Override
91+
public boolean equals(Object obj) {
92+
if (this == obj) {
93+
return true;
94+
}
95+
if (obj == null || getClass() != obj.getClass()) {
96+
return false;
97+
}
98+
SentimentSample that = (SentimentSample) obj;
99+
return Objects.equals(sentiment, that.sentiment)
100+
&& Objects.equals(sentence, that.sentence);
101+
}
102+
103+
@Override
104+
public int hashCode() {
105+
return Objects.hash(sentiment, sentence);
106+
}
107+
108+
}

opennlp-core/opennlp-cli/src/main/java/opennlp/tools/cmdline/CLI.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,9 @@
7474
import opennlp.tools.cmdline.sentdetect.SentenceDetectorEvaluatorTool;
7575
import opennlp.tools.cmdline.sentdetect.SentenceDetectorTool;
7676
import opennlp.tools.cmdline.sentdetect.SentenceDetectorTrainerTool;
77+
import opennlp.tools.cmdline.sentiment.SentimentCrossValidatorTool;
78+
import opennlp.tools.cmdline.sentiment.SentimentEvaluatorTool;
79+
import opennlp.tools.cmdline.sentiment.SentimentTrainerTool;
7780
import opennlp.tools.cmdline.tokenizer.DictionaryDetokenizerTool;
7881
import opennlp.tools.cmdline.tokenizer.SimpleTokenizerTool;
7982
import opennlp.tools.cmdline.tokenizer.TokenizerConverterTool;
@@ -173,6 +176,11 @@ public final class CLI {
173176

174177
// Entity Linker
175178
tools.add(new EntityLinkerTool());
179+
180+
// Sentiment Analysis Parser
181+
tools.add(new SentimentTrainerTool());
182+
tools.add(new SentimentEvaluatorTool());
183+
tools.add(new SentimentCrossValidatorTool());
176184

177185
// Language Model
178186
tools.add(new NGramLanguageModelTool());
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package opennlp.tools.cmdline.sentiment;
19+
20+
import java.io.IOException;
21+
import java.util.LinkedList;
22+
import java.util.List;
23+
24+
import opennlp.tools.cmdline.AbstractCrossValidatorTool;
25+
import opennlp.tools.cmdline.CmdLineUtil;
26+
import opennlp.tools.cmdline.TerminateToolException;
27+
import opennlp.tools.cmdline.params.BasicTrainingParams;
28+
import opennlp.tools.cmdline.params.CVParams;
29+
import opennlp.tools.cmdline.sentiment.SentimentCrossValidatorTool.CVToolParams;
30+
import opennlp.tools.sentiment.SentimentCrossValidator;
31+
import opennlp.tools.sentiment.SentimentEvaluationMonitor;
32+
import opennlp.tools.sentiment.SentimentFactory;
33+
import opennlp.tools.sentiment.SentimentSample;
34+
import opennlp.tools.util.eval.EvaluationMonitor;
35+
import opennlp.tools.util.model.ModelUtil;
36+
37+
/**
38+
* Class for helping perform cross validation on the Sentiment Analysis Parser.
39+
*/
40+
public class SentimentCrossValidatorTool
41+
extends AbstractCrossValidatorTool<SentimentSample, CVToolParams> {
42+
43+
/**
44+
* Interface for parameters
45+
*/
46+
interface CVToolParams extends BasicTrainingParams, CVParams {
47+
48+
}
49+
50+
/**
51+
* Constructor
52+
*/
53+
public SentimentCrossValidatorTool() {
54+
super(SentimentSample.class, CVToolParams.class);
55+
}
56+
57+
/**
58+
* Returns the short description of the tool
59+
*
60+
* @return short description
61+
*/
62+
public String getShortDescription() {
63+
return "K-fold cross validator for the learnable Sentiment Analysis Parser";
64+
}
65+
66+
/**
67+
* Runs the tool
68+
*
69+
* @param format
70+
* the format to be used
71+
* @param args
72+
* the arguments
73+
*/
74+
public void run(String format, String[] args) {
75+
super.run(format, args);
76+
77+
mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), true);
78+
if (mlParams == null) {
79+
mlParams = ModelUtil.createDefaultTrainingParameters();
80+
}
81+
82+
List<EvaluationMonitor<SentimentSample>> listeners = new LinkedList<>();
83+
if (params.getMisclassified()) {
84+
listeners.add(new SentimentEvaluationErrorListener());
85+
}
86+
SentimentFactory sentimentFactory = new SentimentFactory();
87+
88+
SentimentCrossValidator validator;
89+
try {
90+
validator = new SentimentCrossValidator(params.getLang(), mlParams, sentimentFactory,
91+
listeners.toArray(new SentimentEvaluationMonitor[listeners.size()]));
92+
validator.evaluate(sampleStream, params.getFolds());
93+
} catch (IOException e) {
94+
throw new TerminateToolException(-1,
95+
"IO error while reading training data or indexing data: "
96+
+ e.getMessage(),
97+
e);
98+
} finally {
99+
try {
100+
sampleStream.close();
101+
} catch (IOException e) {
102+
// sorry that this can fail
103+
}
104+
}
105+
106+
System.out.println("done");
107+
108+
System.out.println();
109+
System.out.println(validator.getFMeasure());
110+
}
111+
112+
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package opennlp.tools.cmdline.sentiment;
19+
20+
import opennlp.tools.cmdline.DetailedFMeasureListener;
21+
import opennlp.tools.sentiment.SentimentEvaluationMonitor;
22+
import opennlp.tools.sentiment.SentimentSample;
23+
import opennlp.tools.util.Span;
24+
25+
/**
26+
* Class for creating a detailed F-Measure listener
27+
*/
28+
public class SentimentDetailedFMeasureListener
29+
extends DetailedFMeasureListener<SentimentSample>
30+
implements SentimentEvaluationMonitor {
31+
32+
/**
33+
* Returns the sentiment sample as a span array
34+
*
35+
* @param sample
36+
* the sentiment sample to be returned
37+
* @return span array of the sample
38+
*/
39+
@Override
40+
protected Span[] asSpanArray(SentimentSample sample) {
41+
return new Span[] { new Span(0, 0, sample.getSentiment()) };
42+
}
43+
}

0 commit comments

Comments
 (0)