Skip to content

Commit 9ed60c2

Browse files
committed
[MINOR][TEST][ML] Speed up some tests of ML regression by loosening tolerance
## What changes were proposed in this pull request? Loosen some tolerances in the ML regression-related tests, as they seem to account for some of the top slow tests in https://spark-tests.appspot.com/slow-tests These changes are good for about a 25 second speedup on my laptop. ## How was this patch tested? Existing tests Closes apache#24351 from srowen/SpeedReg. Authored-by: Sean Owen <[email protected]> Signed-off-by: Sean Owen <[email protected]>
1 parent 0407070 commit 9ed60c2

File tree

3 files changed

+25
-17
lines changed

3 files changed

+25
-17
lines changed

mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1140,8 +1140,10 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
11401140
test("binary logistic regression with intercept with ElasticNet regularization") {
11411141
val trainer1 = (new LogisticRegression).setFitIntercept(true).setMaxIter(120)
11421142
.setElasticNetParam(0.38).setRegParam(0.21).setStandardization(true).setWeightCol("weight")
1143+
.setTol(1e-5)
11431144
val trainer2 = (new LogisticRegression).setFitIntercept(true).setMaxIter(60)
11441145
.setElasticNetParam(0.38).setRegParam(0.21).setStandardization(false).setWeightCol("weight")
1146+
.setTol(1e-5)
11451147

11461148
val model1 = trainer1.fit(binaryDataset)
11471149
val model2 = trainer2.fit(binaryDataset)
@@ -1489,12 +1491,14 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
14891491
.setFitIntercept(true)
14901492
.setStandardization(true)
14911493
.setWeightCol("weight")
1494+
.setTol(1e-5)
14921495
val trainer2 = new LogisticRegression()
14931496
.setLowerBoundsOnCoefficients(lowerBoundsOnCoefficients)
14941497
.setLowerBoundsOnIntercepts(lowerBoundsOnIntercepts)
14951498
.setFitIntercept(true)
14961499
.setStandardization(false)
14971500
.setWeightCol("weight")
1501+
.setTol(1e-5)
14981502

14991503
val model1 = trainer1.fit(multinomialDataset)
15001504
val model2 = trainer2.fit(multinomialDataset)
@@ -1690,10 +1694,10 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
16901694
// use tighter constraints because OWL-QN solver takes longer to converge
16911695
val trainer1 = (new LogisticRegression).setFitIntercept(true)
16921696
.setElasticNetParam(1.0).setRegParam(0.05).setStandardization(true)
1693-
.setMaxIter(160).setTol(1e-10).setWeightCol("weight")
1697+
.setMaxIter(160).setTol(1e-5).setWeightCol("weight")
16941698
val trainer2 = (new LogisticRegression).setFitIntercept(true)
16951699
.setElasticNetParam(1.0).setRegParam(0.05).setStandardization(false)
1696-
.setMaxIter(110).setTol(1e-10).setWeightCol("weight")
1700+
.setMaxIter(110).setTol(1e-5).setWeightCol("weight")
16971701

16981702
val model1 = trainer1.fit(multinomialDataset)
16991703
val model2 = trainer2.fit(multinomialDataset)
@@ -1791,8 +1795,10 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
17911795
test("multinomial logistic regression without intercept with L1 regularization") {
17921796
val trainer1 = (new LogisticRegression).setFitIntercept(false)
17931797
.setElasticNetParam(1.0).setRegParam(0.05).setStandardization(true).setWeightCol("weight")
1798+
.setTol(1e-5)
17941799
val trainer2 = (new LogisticRegression).setFitIntercept(false)
17951800
.setElasticNetParam(1.0).setRegParam(0.05).setStandardization(false).setWeightCol("weight")
1801+
.setTol(1e-5)
17961802

17971803
val model1 = trainer1.fit(multinomialDataset)
17981804
val model2 = trainer2.fit(multinomialDataset)
@@ -2156,10 +2162,10 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
21562162
test("multinomial logistic regression with intercept with elasticnet regularization") {
21572163
val trainer1 = (new LogisticRegression).setFitIntercept(true).setWeightCol("weight")
21582164
.setElasticNetParam(0.5).setRegParam(0.1).setStandardization(true)
2159-
.setMaxIter(220).setTol(1e-10)
2165+
.setMaxIter(180).setTol(1e-5)
21602166
val trainer2 = (new LogisticRegression).setFitIntercept(true).setWeightCol("weight")
21612167
.setElasticNetParam(0.5).setRegParam(0.1).setStandardization(false)
2162-
.setMaxIter(220).setTol(1e-10)
2168+
.setMaxIter(150).setTol(1e-5)
21632169

21642170
val model1 = trainer1.fit(multinomialDataset)
21652171
val model2 = trainer2.fit(multinomialDataset)
@@ -2255,10 +2261,10 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
22552261
test("multinomial logistic regression without intercept with elasticnet regularization") {
22562262
val trainer1 = (new LogisticRegression).setFitIntercept(false).setWeightCol("weight")
22572263
.setElasticNetParam(0.5).setRegParam(0.1).setStandardization(true)
2258-
.setMaxIter(75).setTol(1e-10)
2264+
.setTol(1e-5)
22592265
val trainer2 = (new LogisticRegression).setFitIntercept(false).setWeightCol("weight")
22602266
.setElasticNetParam(0.5).setRegParam(0.1).setStandardization(false)
2261-
.setMaxIter(50).setTol(1e-10)
2267+
.setTol(1e-5)
22622268

22632269
val model1 = trainer1.fit(multinomialDataset)
22642270
val model2 = trainer2.fit(multinomialDataset)
@@ -2672,6 +2678,7 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
26722678
val trainer1 = new LogisticRegression()
26732679
.setRegParam(0.1)
26742680
.setElasticNetParam(1.0)
2681+
.setMaxIter(20)
26752682

26762683
// compressed row major is optimal
26772684
val model1 = trainer1.fit(multinomialDataset.limit(100))
@@ -2687,7 +2694,7 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
26872694

26882695
// coefficients are dense without L1 regularization
26892696
val trainer2 = new LogisticRegression()
2690-
.setElasticNetParam(0.0)
2697+
.setElasticNetParam(0.0).setMaxIter(1)
26912698
val model3 = trainer2.fit(multinomialDataset.limit(100))
26922699
assert(model3.coefficientMatrix.isInstanceOf[DenseMatrix])
26932700
}

mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ package org.apache.spark.ml.regression
1919

2020
import scala.util.Random
2121

22-
import org.apache.spark.{SparkConf, SparkFunSuite}
22+
import org.apache.spark.SparkConf
2323
import org.apache.spark.ml.classification.LogisticRegressionSuite._
2424
import org.apache.spark.ml.feature.{Instance, OffsetInstance}
2525
import org.apache.spark.ml.feature.{LabeledPoint, RFormula}
@@ -28,7 +28,6 @@ import org.apache.spark.ml.param.{ParamMap, ParamsSuite}
2828
import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, MLTestingUtils}
2929
import org.apache.spark.ml.util.TestingUtils._
3030
import org.apache.spark.mllib.random._
31-
import org.apache.spark.mllib.util.MLlibTestSparkContext
3231
import org.apache.spark.serializer.KryoSerializer
3332
import org.apache.spark.sql.{DataFrame, Row}
3433
import org.apache.spark.sql.functions._
@@ -269,7 +268,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
269268
("inverse", datasetGaussianInverse))) {
270269
for (fitIntercept <- Seq(false, true)) {
271270
val trainer = new GeneralizedLinearRegression().setFamily("gaussian").setLink(link)
272-
.setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction")
271+
.setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction").setTol(1e-3)
273272
val model = trainer.fit(dataset)
274273
val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1))
275274
assert(actual ~= expected(idx) absTol 1e-4, "Model mismatch: GLM with gaussian family, " +
@@ -328,7 +327,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
328327
for (fitIntercept <- Seq(false, true);
329328
regParam <- Seq(0.0, 0.1, 1.0)) {
330329
val trainer = new GeneralizedLinearRegression().setFamily("gaussian")
331-
.setFitIntercept(fitIntercept).setRegParam(regParam)
330+
.setFitIntercept(fitIntercept).setRegParam(regParam).setTol(1e-3)
332331
val model = trainer.fit(datasetGaussianIdentity)
333332
val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1))
334333
assert(actual ~= expected(idx) absTol 1e-4, "Model mismatch: GLM with gaussian family, " +
@@ -384,7 +383,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
384383
("cloglog", datasetBinomial))) {
385384
for (fitIntercept <- Seq(false, true)) {
386385
val trainer = new GeneralizedLinearRegression().setFamily("binomial").setLink(link)
387-
.setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction")
386+
.setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction").setTol(1e-3)
388387
val model = trainer.fit(dataset)
389388
val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1),
390389
model.coefficients(2), model.coefficients(3))
@@ -457,7 +456,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
457456
("sqrt", datasetPoissonSqrt))) {
458457
for (fitIntercept <- Seq(false, true)) {
459458
val trainer = new GeneralizedLinearRegression().setFamily("poisson").setLink(link)
460-
.setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction")
459+
.setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction").setTol(1e-3)
461460
val model = trainer.fit(dataset)
462461
val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1))
463462
assert(actual ~= expected(idx) absTol 1e-4, "Model mismatch: GLM with poisson family, " +
@@ -515,7 +514,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
515514
val dataset = datasetPoissonLogWithZero
516515
for (fitIntercept <- Seq(false, true)) {
517516
val trainer = new GeneralizedLinearRegression().setFamily("poisson").setLink(link)
518-
.setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction")
517+
.setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction").setTol(1e-3)
519518
val model = trainer.fit(dataset)
520519
val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1))
521520
assert(actual ~= expected(idx) absTol 1e-4, "Model mismatch: GLM with poisson family, " +
@@ -573,7 +572,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
573572
("identity", datasetGammaIdentity), ("log", datasetGammaLog))) {
574573
for (fitIntercept <- Seq(false, true)) {
575574
val trainer = new GeneralizedLinearRegression().setFamily("Gamma").setLink(link)
576-
.setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction")
575+
.setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction").setTol(1e-3)
577576
val model = trainer.fit(dataset)
578577
val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1))
579578
assert(actual ~= expected(idx) absTol 1e-4, "Model mismatch: GLM with gamma family, " +
@@ -659,7 +658,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
659658
variancePower <- Seq(1.6, 2.5)) {
660659
val trainer = new GeneralizedLinearRegression().setFamily("tweedie")
661660
.setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction")
662-
.setVariancePower(variancePower).setLinkPower(linkPower)
661+
.setVariancePower(variancePower).setLinkPower(linkPower).setTol(1e-4)
663662
val model = trainer.fit(datasetTweedie)
664663
val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1))
665664
assert(actual ~= expected(idx) absTol 1e-4, "Model mismatch: GLM with tweedie family, " +
@@ -736,7 +735,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
736735
for (variancePower <- Seq(0.0, 1.0, 2.0, 1.5)) {
737736
val trainer = new GeneralizedLinearRegression().setFamily("tweedie")
738737
.setFitIntercept(fitIntercept).setLinkPredictionCol("linkPrediction")
739-
.setVariancePower(variancePower)
738+
.setVariancePower(variancePower).setTol(1e-3)
740739
val model = trainer.fit(datasetTweedie)
741740
val actual = Vectors.dense(model.intercept, model.coefficients(0), model.coefficients(1))
742741
assert(actual ~= expected(idx) absTol 1e-4, "Model mismatch: GLM with tweedie family, " +

mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -892,6 +892,7 @@ class LinearRegressionSuite extends MLTest with DefaultReadWriteTest with PMMLRe
892892
.setRegParam(regParam)
893893
.setElasticNetParam(elasticNetParam)
894894
.setSolver(solver)
895+
.setMaxIter(1)
895896
MLTestingUtils.testArbitrarilyScaledWeights[LinearRegressionModel, LinearRegression](
896897
datasetWithStrongNoise.as[LabeledPoint], estimator, modelEquals)
897898
MLTestingUtils.testOutliersWithSmallWeights[LinearRegressionModel, LinearRegression](
@@ -908,6 +909,7 @@ class LinearRegressionSuite extends MLTest with DefaultReadWriteTest with PMMLRe
908909
.setFitIntercept(fitIntercept)
909910
.setStandardization(standardization)
910911
.setRegParam(regParam)
912+
.setMaxIter(1)
911913
MLTestingUtils.testArbitrarilyScaledWeights[LinearRegressionModel, LinearRegression](
912914
datasetWithOutlier.as[LabeledPoint], estimator, modelEquals)
913915
MLTestingUtils.testOutliersWithSmallWeights[LinearRegressionModel, LinearRegression](

0 commit comments

Comments
 (0)