Skip to content

Commit d2ab68a

Browse files
srowenSumedh Wale
authored andcommitted
[SPARK-26966][ML] Update to JPMML 1.4.8
JPMML apparently only supports Java 9 in 1.4.2+. We are seeing text failures from JPMML relating to JAXB when running on Java 11. It's shaded and not a big change, so should be safe. Existing tests. Closes apache#23868 from srowen/SPARK-26966. Authored-by: Sean Owen <[email protected]> Signed-off-by: Sean Owen <[email protected]>
1 parent b896a6e commit d2ab68a

File tree

9 files changed

+43
-24
lines changed

9 files changed

+43
-24
lines changed

build.gradle

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ allprojects {
107107
jlineVersion = '2.14.6'
108108
xbeanAsm5Version = '4.5'
109109
breezeVersion = '0.13.2'
110-
pmmlVersion = '1.2.17'
110+
pmmlVersion = '1.4.15'
111111
classutilVersion = '1.4.0'
112112
scoptVersion = '3.7.1'
113113
mesosVersion = '1.0.4'

mllib/src/main/scala/org/apache/spark/mllib/pmml/export/BinaryClassificationPMMLModelExport.scala

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@ package org.apache.spark.mllib.pmml.export
1919

2020
import scala.{Array => SArray}
2121

22-
import org.dmg.pmml._
22+
import org.dmg.pmml.{DataDictionary, DataField, DataType, FieldName, MiningField,
23+
MiningFunction, MiningSchema, OpType}
24+
import org.dmg.pmml.regression.{NumericPredictor, RegressionModel, RegressionTable}
2325

2426
import org.apache.spark.mllib.regression.GeneralizedLinearModel
2527

@@ -29,7 +31,7 @@ import org.apache.spark.mllib.regression.GeneralizedLinearModel
2931
private[mllib] class BinaryClassificationPMMLModelExport(
3032
model: GeneralizedLinearModel,
3133
description: String,
32-
normalizationMethod: RegressionNormalizationMethodType,
34+
normalizationMethod: RegressionModel.NormalizationMethod,
3335
threshold: Double)
3436
extends PMMLModelExport {
3537

@@ -47,7 +49,7 @@ private[mllib] class BinaryClassificationPMMLModelExport(
4749
val miningSchema = new MiningSchema
4850
val regressionTableYES = new RegressionTable(model.intercept).setTargetCategory("1")
4951
var interceptNO = threshold
50-
if (RegressionNormalizationMethodType.LOGIT == normalizationMethod) {
52+
if (RegressionModel.NormalizationMethod.LOGIT == normalizationMethod) {
5153
if (threshold <= 0) {
5254
interceptNO = Double.MinValue
5355
} else if (threshold >= 1) {
@@ -58,7 +60,7 @@ private[mllib] class BinaryClassificationPMMLModelExport(
5860
}
5961
val regressionTableNO = new RegressionTable(interceptNO).setTargetCategory("0")
6062
val regressionModel = new RegressionModel()
61-
.setFunctionName(MiningFunctionType.CLASSIFICATION)
63+
.setMiningFunction(MiningFunction.CLASSIFICATION)
6264
.setMiningSchema(miningSchema)
6365
.setModelName(description)
6466
.setNormalizationMethod(normalizationMethod)
@@ -69,7 +71,7 @@ private[mllib] class BinaryClassificationPMMLModelExport(
6971
dataDictionary.addDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
7072
miningSchema
7173
.addMiningFields(new MiningField(fields(i))
72-
.setUsageType(FieldUsageType.ACTIVE))
74+
.setUsageType(MiningField.UsageType.ACTIVE))
7375
regressionTableYES.addNumericPredictors(new NumericPredictor(fields(i), model.weights(i)))
7476
}
7577

@@ -79,7 +81,7 @@ private[mllib] class BinaryClassificationPMMLModelExport(
7981
.addDataFields(new DataField(targetField, OpType.CATEGORICAL, DataType.STRING))
8082
miningSchema
8183
.addMiningFields(new MiningField(targetField)
82-
.setUsageType(FieldUsageType.TARGET))
84+
.setUsageType(MiningField.UsageType.TARGET))
8385

8486
dataDictionary.setNumberOfFields(dataDictionary.getDataFields.size)
8587

mllib/src/main/scala/org/apache/spark/mllib/pmml/export/GeneralizedLinearPMMLModelExport.scala

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@ package org.apache.spark.mllib.pmml.export
1919

2020
import scala.{Array => SArray}
2121

22-
import org.dmg.pmml._
22+
import org.dmg.pmml.{DataDictionary, DataField, DataType, FieldName, MiningField,
23+
MiningFunction, MiningSchema, OpType}
24+
import org.dmg.pmml.regression.{NumericPredictor, RegressionModel, RegressionTable}
2325

2426
import org.apache.spark.mllib.regression.GeneralizedLinearModel
2527

@@ -45,7 +47,7 @@ private[mllib] class GeneralizedLinearPMMLModelExport(
4547
val miningSchema = new MiningSchema
4648
val regressionTable = new RegressionTable(model.intercept)
4749
val regressionModel = new RegressionModel()
48-
.setFunctionName(MiningFunctionType.REGRESSION)
50+
.setMiningFunction(MiningFunction.REGRESSION)
4951
.setMiningSchema(miningSchema)
5052
.setModelName(description)
5153
.addRegressionTables(regressionTable)
@@ -55,7 +57,7 @@ private[mllib] class GeneralizedLinearPMMLModelExport(
5557
dataDictionary.addDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
5658
miningSchema
5759
.addMiningFields(new MiningField(fields(i))
58-
.setUsageType(FieldUsageType.ACTIVE))
60+
.setUsageType(MiningField.UsageType.ACTIVE))
5961
regressionTable.addNumericPredictors(new NumericPredictor(fields(i), model.weights(i)))
6062
}
6163

@@ -64,7 +66,7 @@ private[mllib] class GeneralizedLinearPMMLModelExport(
6466
dataDictionary.addDataFields(new DataField(targetField, OpType.CONTINUOUS, DataType.DOUBLE))
6567
miningSchema
6668
.addMiningFields(new MiningField(targetField)
67-
.setUsageType(FieldUsageType.TARGET))
69+
.setUsageType(MiningField.UsageType.TARGET))
6870

6971
dataDictionary.setNumberOfFields(dataDictionary.getDataFields.size)
7072

mllib/src/main/scala/org/apache/spark/mllib/pmml/export/KMeansPMMLModelExport.scala

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@ package org.apache.spark.mllib.pmml.export
1919

2020
import scala.{Array => SArray}
2121

22-
import org.dmg.pmml._
22+
import org.dmg.pmml.{Array, CompareFunction, ComparisonMeasure, DataDictionary, DataField, DataType,
23+
FieldName, MiningField, MiningFunction, MiningSchema, OpType, SquaredEuclidean}
24+
import org.dmg.pmml.clustering.{Cluster, ClusteringField, ClusteringModel}
2325

2426
import org.apache.spark.mllib.clustering.KMeansModel
2527

@@ -48,7 +50,7 @@ private[mllib] class KMeansPMMLModelExport(model: KMeansModel) extends PMMLModel
4850
.setModelName("k-means")
4951
.setMiningSchema(miningSchema)
5052
.setComparisonMeasure(comparisonMeasure)
51-
.setFunctionName(MiningFunctionType.CLUSTERING)
53+
.setMiningFunction(MiningFunction.CLUSTERING)
5254
.setModelClass(ClusteringModel.ModelClass.CENTER_BASED)
5355
.setNumberOfClusters(model.clusterCenters.length)
5456

@@ -57,9 +59,9 @@ private[mllib] class KMeansPMMLModelExport(model: KMeansModel) extends PMMLModel
5759
dataDictionary.addDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
5860
miningSchema
5961
.addMiningFields(new MiningField(fields(i))
60-
.setUsageType(FieldUsageType.ACTIVE))
62+
.setUsageType(MiningField.UsageType.ACTIVE))
6163
clusteringModel.addClusteringFields(
62-
new ClusteringField(fields(i)).setCompareFunction(CompareFunctionType.ABS_DIFF))
64+
new ClusteringField(fields(i)).setCompareFunction(CompareFunction.ABS_DIFF))
6365
}
6466

6567
dataDictionary.setNumberOfFields(dataDictionary.getDataFields.size)

mllib/src/main/scala/org/apache/spark/mllib/pmml/export/PMMLModelExportFactory.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
package org.apache.spark.mllib.pmml.export
1919

20-
import org.dmg.pmml.RegressionNormalizationMethodType
20+
import org.dmg.pmml.regression.RegressionModel
2121

2222
import org.apache.spark.mllib.classification.LogisticRegressionModel
2323
import org.apache.spark.mllib.classification.SVMModel
@@ -44,12 +44,12 @@ private[mllib] object PMMLModelExportFactory {
4444
new GeneralizedLinearPMMLModelExport(lasso, "lasso regression")
4545
case svm: SVMModel =>
4646
new BinaryClassificationPMMLModelExport(
47-
svm, "linear SVM", RegressionNormalizationMethodType.NONE,
47+
svm, "linear SVM", RegressionModel.NormalizationMethod.NONE,
4848
svm.getThreshold.getOrElse(0.0))
4949
case logistic: LogisticRegressionModel =>
5050
if (logistic.numClasses == 2) {
5151
new BinaryClassificationPMMLModelExport(
52-
logistic, "logistic regression", RegressionNormalizationMethodType.LOGIT,
52+
logistic, "logistic regression", RegressionModel.NormalizationMethod.LOGIT,
5353
logistic.getThreshold.getOrElse(0.5))
5454
} else {
5555
throw new IllegalArgumentException(

mllib/src/test/scala/org/apache/spark/mllib/pmml/export/BinaryClassificationPMMLModelExportSuite.scala

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,7 @@
1717

1818
package org.apache.spark.mllib.pmml.export
1919

20-
import org.dmg.pmml.RegressionModel
21-
import org.dmg.pmml.RegressionNormalizationMethodType
20+
import org.dmg.pmml.regression.RegressionModel
2221

2322
import org.apache.spark.SparkFunSuite
2423
import org.apache.spark.mllib.classification.LogisticRegressionModel
@@ -51,7 +50,8 @@ class BinaryClassificationPMMLModelExportSuite extends SparkFunSuite {
5150
assert(pmmlRegressionModel.getRegressionTables.get(1).getTargetCategory === "0")
5251
assert(pmmlRegressionModel.getRegressionTables.get(1).getNumericPredictors.size === 0)
5352
// ensure logistic regression has normalization method set to LOGIT
54-
assert(pmmlRegressionModel.getNormalizationMethod() == RegressionNormalizationMethodType.LOGIT)
53+
assert(pmmlRegressionModel.getNormalizationMethod() ===
54+
RegressionModel.NormalizationMethod.LOGIT)
5555
}
5656

5757
test("linear SVM PMML export") {
@@ -78,7 +78,8 @@ class BinaryClassificationPMMLModelExportSuite extends SparkFunSuite {
7878
assert(pmmlRegressionModel.getRegressionTables.get(1).getTargetCategory === "0")
7979
assert(pmmlRegressionModel.getRegressionTables.get(1).getNumericPredictors.size === 0)
8080
// ensure linear SVM has normalization method set to NONE
81-
assert(pmmlRegressionModel.getNormalizationMethod() == RegressionNormalizationMethodType.NONE)
81+
assert(pmmlRegressionModel.getNormalizationMethod() ===
82+
RegressionModel.NormalizationMethod.NONE)
8283
}
8384

8485
}

mllib/src/test/scala/org/apache/spark/mllib/pmml/export/GeneralizedLinearPMMLModelExportSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
package org.apache.spark.mllib.pmml.export
1919

20-
import org.dmg.pmml.RegressionModel
20+
import org.dmg.pmml.regression.RegressionModel
2121

2222
import org.apache.spark.SparkFunSuite
2323
import org.apache.spark.mllib.regression.{LassoModel, LinearRegressionModel, RidgeRegressionModel}

mllib/src/test/scala/org/apache/spark/mllib/pmml/export/KMeansPMMLModelExportSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
package org.apache.spark.mllib.pmml.export
1919

20-
import org.dmg.pmml.ClusteringModel
20+
import org.dmg.pmml.clustering.ClusteringModel
2121

2222
import org.apache.spark.SparkFunSuite
2323
import org.apache.spark.mllib.clustering.KMeansModel

pom.xml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,18 @@
381381
<version>14.0.1</version>
382382
<scope>provided</scope>
383383
</dependency>
384+
<dependency>
385+
<groupId>org.jpmml</groupId>
386+
<artifactId>pmml-model</artifactId>
387+
<version>1.4.8</version>
388+
<scope>provided</scope>
389+
<exclusions>
390+
<exclusion>
391+
<groupId>org.jpmml</groupId>
392+
<artifactId>pmml-agent</artifactId>
393+
</exclusion>
394+
</exclusions>
395+
</dependency>
384396
<!-- End of shaded deps -->
385397
<dependency>
386398
<groupId>org.apache.commons</groupId>

0 commit comments

Comments
 (0)