Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
44 changes: 44 additions & 0 deletions opennlp-api/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
<?xml version="1.0" encoding="UTF-8"?>

<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->

<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.opennlp</groupId>
<artifactId>opennlp</artifactId>
<version>3.0.0-SNAPSHOT</version>
</parent>

<artifactId>opennlp-api</artifactId>
<packaging>jar</packaging>
<name>Apache OpenNLP API</name>

<dependencies>
<!-- External dependencies -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</dependency>
</dependencies>

</project>
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ public static <T> String createUsage(Class<T> argProxyInterface) {

/**
* Auxiliary record that holds information about an argument. This is used by the
* {@link GenerateManualTool}, which creates a Docbook for the CLI automatically.
* {@code GenerateManualTool}, which creates a Docbook for the CLI automatically.
*/
record Argument(String argument, String value, String description, boolean optional) {

Expand Down Expand Up @@ -331,10 +331,10 @@ public static String createUsage(Class<?>... argProxyInterfaces) {
}
}

if (usage.length() > 0)
if (!usage.isEmpty())
usage.setLength(usage.length() - 1);

if (details.length() > 0) {
if (!details.isEmpty()) {
details.setLength(details.length() - 1);
usage.append("\n\nArguments description:\n").append(details);
}
Expand Down Expand Up @@ -398,8 +398,8 @@ public static String validateArgumentsLoudly(String[] args, Class<?>... argProxy
for (Class<?> argProxyInterface : argProxyInterfaces) {
for (Method method : argProxyInterface.getMethods()) {
String paramName = methodNameToParameter(method.getName());
int paramIndex = CmdLineUtil.getParameterIndex(paramName, args);
String valueString = CmdLineUtil.getParameter(paramName, args);
int paramIndex = getParameterIndex(paramName, args);
String valueString = getParameter(paramName, args);
if (valueString == null) {
OptionalParameter optionalParam = method.getAnnotation(OptionalParameter.class);

Expand Down Expand Up @@ -456,7 +456,7 @@ public static <T> T parse(String[] args, Class<T> argProxyInterface) {
for (Method method : argProxyInterface.getMethods()) {

String parameterName = methodNameToParameter(method.getName());
String valueString = CmdLineUtil.getParameter(parameterName, args);
String valueString = getParameter(parameterName, args);

if (valueString == null) {
OptionalParameter optionalParam = method.getAnnotation(OptionalParameter.class);
Expand Down Expand Up @@ -503,10 +503,10 @@ public static <T> String[] filter(String[] args, Class<T> argProxyInterface) {
for (Method method : argProxyInterface.getMethods()) {

String parameterName = methodNameToParameter(method.getName());
int idx = CmdLineUtil.getParameterIndex(parameterName, args);
int idx = getParameterIndex(parameterName, args);
if (-1 < idx) {
parameters.add(parameterName);
String valueString = CmdLineUtil.getParameter(parameterName, args);
String valueString = getParameter(parameterName, args);
if (null != valueString) {
parameters.add(valueString);
}
Expand All @@ -515,4 +515,61 @@ public static <T> String[] filter(String[] args, Class<T> argProxyInterface) {

return parameters.toArray(new String[0]);
}

/**
* Retrieves the specified parameter from the specified arguments.
*
* @param param parameter name
* @param args arguments
* @return parameter value
*/
private static Integer getIntParameter(String param, String[] args) {
String value = getParameter(param, args);

try {
if (value != null)
return Integer.parseInt(value);
}
catch (NumberFormatException ignored) {
// in this case return null
}

return null;
}

/**
* Retrieves the specified parameter from the given arguments.
*
* @param param parameter name
* @param args arguments
* @return parameter value
*/
private static String getParameter(String param, String[] args) {
int i = getParameterIndex(param, args);
if (-1 < i) {
i++;
if (i < args.length) {
return args[i];
}
}

return null;
}

/**
* Returns the index of the parameter in the arguments, or {@code -1} if the parameter is not found.
*
* @param param parameter name
* @param args arguments
* @return the index of the parameter in the arguments, or {@code -1} if the parameter is not found
*/
private static int getParameterIndex(String param, String[] args) {
for (int i = 0; i < args.length; i++) {
if (args[i].startsWith("-") && args[i].equals(param)) {
return i;
}
}

return -1;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,31 +19,32 @@

import java.util.Map;

import opennlp.tools.util.Parameters;
import opennlp.tools.util.TrainingConfiguration;
import opennlp.tools.util.TrainingParameters;

/**
* Represents a common base for training implementations.
*/
public interface Trainer {
public interface Trainer<P extends Parameters> {

/**
* Conducts the initialization of an {@link Trainer} via
* {@link TrainingParameters} and a {@link Map report map}.
* {@link Parameters} and a {@link Map report map}.
*
* @param trainParams The {@link TrainingParameters} to use.
* @param trainParams The {@link Parameters} to use.
* @param reportMap The {@link Map} instance used as report map.
*/
void init(TrainingParameters trainParams, Map<String, String> reportMap);
void init(P trainParams, Map<String, String> reportMap);

/**
* Conducts the initialization of a {@link Trainer} via
* {@link TrainingParameters}, {@link Map report map} and {@link TrainingConfiguration}
* {@link Parameters}, {@link Map report map} and {@link TrainingConfiguration}
*
* @param trainParams The {@link TrainingParameters} to use.
* @param trainParams The {@link Parameters} to use.
* @param reportMap The {@link Map} instance used as report map.
* @param config The {@link TrainingConfiguration} to use. If null, suitable defaults will be used.
* @param config The {@link TrainingConfiguration} to use.
* If {@code null}, suitable defaults will be used.
*/
void init(TrainingParameters trainParams, Map<String, String> reportMap, TrainingConfiguration config);
void init(P trainParams, Map<String, String> reportMap, TrainingConfiguration config);

}
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,11 @@ public interface EntityLinker<T extends Span> {

/**
* Initializes an {@link EntityLinker} and allows for passing properties
* through the {@link EntityLinkerFactory} into all impls dynamically.
* through an {@code EntityLinkerFactory} into all impls dynamically.
* <p>
* {@link EntityLinker} impls should initialize reusable objects
* used by the impl in this method. If this is done, any errors will be
* captured and thrown by the {@link EntityLinkerFactory}.
* captured and thrown by an {@code EntityLinkerFactory}.
*
* @param initializationData The {@link EntityLinkerProperties} that contains
* properties needed by the impl, as well as any
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@

/**
* Properties wrapper for {@link EntityLinker} implementations.
*
* @see EntityLinkerFactory
*/
public class EntityLinkerProperties {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

/**
* A language model can calculate the probability <i>p</i> (between 0 and 1) of a
* certain {@link opennlp.tools.util.StringList sequence of tokens}, given its underlying vocabulary.
* certain sequence of tokens, given its underlying vocabulary.
*/
public interface LanguageModel {

Expand Down
108 changes: 108 additions & 0 deletions opennlp-api/src/main/java/opennlp/tools/ml/AlgorithmType.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package opennlp.tools.ml;

public enum AlgorithmType {

MAXENT("MAXENT", "GIS",
"opennlp.tools.ml.maxent.GISTrainer",
"opennlp.tools.ml.maxent.io.GISModelReader",
"opennlp.tools.ml.maxent.io.BinaryGISModelWriter"),
MAXENT_QN("MAXENT_QN", "QN",
"opennlp.tools.ml.maxent.quasinewton.QNTrainer",
"opennlp.tools.ml.maxent.io.QNModelReader",
"opennlp.tools.ml.maxent.io.BinaryQNModelWriter"),
PERCEPTRON("PERCEPTRON", "Perceptron",
"opennlp.tools.ml.perceptron.PerceptronTrainer",
"opennlp.tools.ml.perceptron.PerceptronModelReader",
"opennlp.tools.ml.perceptron.BinaryPerceptronModelWriter"),
PERCEPTRON_SEQUENCE("PERCEPTRON_SEQUENCE", "Perceptron",
"opennlp.tools.ml.perceptron.SimplePerceptronSequenceTrainer",
"opennlp.tools.ml.perceptron.PerceptronModelReader",
"opennlp.tools.ml.perceptron.BinaryPerceptronModelWriter"),
NAIVE_BAYES("NAIVEBAYES", "NaiveBayes",
"opennlp.tools.ml.naivebayes.NaiveBayesTrainer",
"opennlp.tools.ml.naivebayes.NaiveBayesModelReader",
"opennlp.tools.ml.naivebayes.BinaryNaiveBayesModelWriter");


private final String algorithmType;
private final String trainerClazz;
private final String modelType;
private final String readerClazz;
private final String writerClazz;

AlgorithmType(String type, String ioType,
String trainerClazz, String readerClazz, String writerClazz) {
this.algorithmType = type;
this.trainerClazz = trainerClazz;
this.modelType = ioType;
this.readerClazz = readerClazz;
this.writerClazz = writerClazz;
}

public String getAlgorithmType() {
return algorithmType;
}

public String getTrainerClazz() {
return trainerClazz;
}

public String getModelType() {
return modelType;
}

public String getReaderClazz() {
return readerClazz;
}

public String getWriterClazz() {
return writerClazz;
}

/**
* @param type no restriction on the type.
* @return the {@link AlgorithmType} corresponding to the given algorithm type.
* @throws IllegalArgumentException if the given type is not a valid {@link AlgorithmType}.
*/
public static AlgorithmType fromAlgorithmType(String type) {
for (AlgorithmType trainerType : AlgorithmType.values()) {
if (trainerType.algorithmType.equals(type)) {
return trainerType;
}
}
throw new IllegalArgumentException("Unknown algorithm type: " + type);
}

/**
* @param type no restriction on the type.
* @return the {@link AlgorithmType} corresponding to the given reader type.
* @throws IllegalArgumentException if the given type is not a valid {@link AlgorithmType}.
*/
public static AlgorithmType fromModelType(String type) {
for (AlgorithmType trainerType : AlgorithmType.values()) {
if (trainerType.modelType.equals(type)) {
return trainerType;
}
}
throw new IllegalArgumentException("Unknown reader type: " + type);
}


}
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,13 @@
import opennlp.tools.commons.Trainer;
import opennlp.tools.ml.model.MaxentModel;
import opennlp.tools.ml.model.SequenceStream;
import opennlp.tools.util.Parameters;

/**
* A specialized {@link Trainer} that is based on a 'EventModelSequence' approach.
* @param <T> The generic type of elements to process via a {@link SequenceStream}.
*/
public interface EventModelSequenceTrainer<T> extends Trainer {
public interface EventModelSequenceTrainer<T, P extends Parameters> extends Trainer<P> {

String SEQUENCE_VALUE = "EventModelSequence";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,12 @@
import opennlp.tools.ml.model.Event;
import opennlp.tools.ml.model.MaxentModel;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.Parameters;

/**
* A specialized {@link Trainer} that is based on an {@link Event} approach.
*/
public interface EventTrainer extends Trainer {
public interface EventTrainer<P extends Parameters> extends Trainer<P> {

String EVENT_VALUE = "Event";

Expand All @@ -50,5 +51,5 @@ public interface EventTrainer extends Trainer {
* @return The trained {@link MaxentModel}.
* @throws IOException Thrown if IO errors occurred.
*/
MaxentModel train(DataIndexer indexer) throws IOException;
MaxentModel train(DataIndexer<P> indexer) throws IOException;
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,14 @@
import opennlp.tools.commons.Trainer;
import opennlp.tools.ml.model.SequenceClassificationModel;
import opennlp.tools.ml.model.SequenceStream;
import opennlp.tools.util.Parameters;

public interface SequenceTrainer extends Trainer {
public interface SequenceTrainer<P extends Parameters> extends Trainer<P> {

String SEQUENCE_VALUE = "Sequence";

/**
* Trains a {@link SequenceClassificationModel} for given {@link SequenceStream<T> events}.
* Trains a {@link SequenceClassificationModel} for given {@link SequenceStream <T> events}.
*
* @param events The input {@link SequenceStream<T> events}.
* @param <T> The generic type of elements to process via the {@link SequenceStream}.
Expand Down
Loading