[SPARK-28044][ML][PYTHON] MulticlassClassificationEvaluator support more metrics

zhengruifeng · srowen · commit 9ec049601a79 · 2019-06-19T08:56:15.000-05:00
## What changes were proposed in this pull request? expose more metrics in evaluator: weightedTruePositiveRate/weightedFalsePositiveRate/weightedFMeasure/truePositiveRateByLabel/falsePositiveRateByLabel/precisionByLabel/recallByLabel/fMeasureByLabel ## How was this patch tested? existing cases and add cases Closes apache#24868 from zhengruifeng/multi_class_support_bylabel. Authored-by: zhengruifeng <ruifengz@foxmail.com> Signed-off-by: Sean Owen <sean.owen@databricks.com>
diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.ml.evaluation
 
 import org.apache.spark.annotation.{Experimental, Since}
-import org.apache.spark.ml.param.{Param, ParamMap, ParamValidators}
+import org.apache.spark.ml.param.{DoubleParam, Param, ParamMap, ParamValidators}
 import org.apache.spark.ml.param.shared.{HasLabelCol, HasPredictionCol, HasWeightCol}
 import org.apache.spark.ml.util.{DefaultParamsReadable, DefaultParamsWritable, Identifiable, SchemaUtils}
 import org.apache.spark.mllib.evaluation.MulticlassMetrics
@@ -36,6 +36,8 @@ class MulticlassClassificationEvaluator @Since("1.5.0") (@Since("1.5.0") overrid
   extends Evaluator with HasPredictionCol with HasLabelCol
     with HasWeightCol with DefaultParamsWritable {
 
+  import MulticlassClassificationEvaluator.supportedMetricNames
+
   @Since("1.5.0")
   def this() = this(Identifiable.randomUID("mcEval"))
 
@@ -45,12 +47,9 @@ class MulticlassClassificationEvaluator @Since("1.5.0") (@Since("1.5.0") overrid
    * @group param
    */
   @Since("1.5.0")
-  val metricName: Param[String] = {
-    val allowedParams = ParamValidators.inArray(Array("f1", "weightedPrecision",
-      "weightedRecall", "accuracy"))
-    new Param(this, "metricName", "metric name in evaluation " +
-      "(f1|weightedPrecision|weightedRecall|accuracy)", allowedParams)
-  }
+  val metricName: Param[String] = new Param(this, "metricName",
+    s"metric name in evaluation ${supportedMetricNames.mkString("(", "|", ")")}",
+    ParamValidators.inArray(supportedMetricNames))
 
   /** @group getParam */
   @Since("1.5.0")
@@ -60,6 +59,8 @@ class MulticlassClassificationEvaluator @Since("1.5.0") (@Since("1.5.0") overrid
   @Since("1.5.0")
   def setMetricName(value: String): this.type = set(metricName, value)
 
+  setDefault(metricName -> "f1")
+
   /** @group setParam */
   @Since("1.5.0")
   def setPredictionCol(value: String): this.type = set(predictionCol, value)
@@ -72,7 +73,39 @@ class MulticlassClassificationEvaluator @Since("1.5.0") (@Since("1.5.0") overrid
   @Since("3.0.0")
   def setWeightCol(value: String): this.type = set(weightCol, value)
 
-  setDefault(metricName -> "f1")
+  @Since("3.0.0")
+  final val metricLabel: DoubleParam = new DoubleParam(this, "metricLabel",
+    "The class whose metric will be computed in " +
+      s"${supportedMetricNames.filter(_.endsWith("ByLabel")).mkString("(", "|", ")")}. " +
+      "Must be >= 0. The default value is 0.",
+    ParamValidators.gtEq(0.0))
+
+  /** @group getParam */
+  @Since("3.0.0")
+  def getMetricLabel: Double = $(metricLabel)
+
+  /** @group setParam */
+  @Since("3.0.0")
+  def setMetricLabel(value: Double): this.type = set(metricLabel, value)
+
+  setDefault(metricLabel -> 0.0)
+
+  @Since("3.0.0")
+  final val beta: DoubleParam = new DoubleParam(this, "beta",
+    "The beta value, which controls precision vs recall weighting, " +
+      "used in (weightedFMeasure|fMeasureByLabel). Must be > 0. The default value is 1.",
+    ParamValidators.gt(0.0))
+
+  /** @group getParam */
+  @Since("3.0.0")
+  def getBeta: Double = $(beta)
+
+  /** @group setParam */
+  @Since("3.0.0")
+  def setBeta(value: Double): this.type = set(beta, value)
+
+  setDefault(beta -> 1.0)
+
 
   @Since("2.0.0")
   override def evaluate(dataset: Dataset[_]): Double = {
@@ -87,17 +120,30 @@ class MulticlassClassificationEvaluator @Since("1.5.0") (@Since("1.5.0") overrid
         case Row(prediction: Double, label: Double, weight: Double) => (prediction, label, weight)
       }
     val metrics = new MulticlassMetrics(predictionAndLabelsWithWeights)
-    val metric = $(metricName) match {
+    $(metricName) match {
       case "f1" => metrics.weightedFMeasure
+      case "accuracy" => metrics.accuracy
       case "weightedPrecision" => metrics.weightedPrecision
       case "weightedRecall" => metrics.weightedRecall
-      case "accuracy" => metrics.accuracy
+      case "weightedTruePositiveRate" => metrics.weightedTruePositiveRate
+      case "weightedFalsePositiveRate" => metrics.weightedFalsePositiveRate
+      case "weightedFMeasure" => metrics.weightedFMeasure($(beta))
+      case "truePositiveRateByLabel" => metrics.truePositiveRate($(metricLabel))
+      case "falsePositiveRateByLabel" => metrics.falsePositiveRate($(metricLabel))
+      case "precisionByLabel" => metrics.precision($(metricLabel))
+      case "recallByLabel" => metrics.recall($(metricLabel))
+      case "fMeasureByLabel" => metrics.fMeasure($(metricLabel), $(beta))
     }
-    metric
   }
 
   @Since("1.5.0")
-  override def isLargerBetter: Boolean = true
+  override def isLargerBetter: Boolean = {
+    $(metricName) match {
+      case "weightedFalsePositiveRate" => false
+      case "falsePositiveRateByLabel" => false
+      case _ => true
+    }
+  }
 
   @Since("1.5.0")
   override def copy(extra: ParamMap): MulticlassClassificationEvaluator = defaultCopy(extra)
@@ -107,6 +153,11 @@ class MulticlassClassificationEvaluator @Since("1.5.0") (@Since("1.5.0") overrid
 object MulticlassClassificationEvaluator
   extends DefaultParamsReadable[MulticlassClassificationEvaluator] {
 
+  private val supportedMetricNames = Array("f1", "accuracy", "weightedPrecision", "weightedRecall",
+    "weightedTruePositiveRate", "weightedFalsePositiveRate", "weightedFMeasure",
+    "truePositiveRateByLabel", "falsePositiveRateByLabel", "precisionByLabel", "recallByLabel",
+    "fMeasureByLabel")
+
   @Since("1.6.0")
   override def load(path: String): MulticlassClassificationEvaluator = super.load(path)
 }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/MultilabelClassificationEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/MultilabelClassificationEvaluator.scala
@@ -63,18 +63,19 @@ class MultilabelClassificationEvaluator (override val uid: String)
 
   setDefault(metricName -> "f1Measure")
 
-  final val metricClass: DoubleParam = new DoubleParam(this, "metricClass",
-    "The class whose metric will be computed in precisionByLabel|recallByLabel|" +
-      "f1MeasureByLabel. Must be >= 0. The default value is 0.",
+  final val metricLabel: DoubleParam = new DoubleParam(this, "metricLabel",
+    "The class whose metric will be computed in " +
+      s"${supportedMetricNames.filter(_.endsWith("ByLabel")).mkString("(", "|", ")")}. " +
+      "Must be >= 0. The default value is 0.",
     ParamValidators.gtEq(0.0))
 
   /** @group getParam */
-  def getMetricClass: Double = $(metricClass)
+  def getMetricLabel: Double = $(metricLabel)
 
   /** @group setParam */
-  def setMetricClass(value: Double): this.type = set(metricClass, value)
+  def setMetricLabel(value: Double): this.type = set(metricLabel, value)
 
-  setDefault(metricClass -> 0.0)
+  setDefault(metricLabel -> 0.0)
 
   /** @group setParam */
   def setPredictionCol(value: String): this.type = set(predictionCol, value)
@@ -103,9 +104,9 @@ class MultilabelClassificationEvaluator (override val uid: String)
       case "precision" => metrics.precision
       case "recall" => metrics.recall
       case "f1Measure" => metrics.f1Measure
-      case "precisionByLabel" => metrics.precision($(metricClass))
-      case "recallByLabel" => metrics.recall($(metricClass))
-      case "f1MeasureByLabel" => metrics.f1Measure($(metricClass))
+      case "precisionByLabel" => metrics.precision($(metricLabel))
+      case "recallByLabel" => metrics.recall($(metricLabel))
+      case "f1MeasureByLabel" => metrics.f1Measure($(metricLabel))
       case "microPrecision" => metrics.microPrecision
       case "microRecall" => metrics.microRecall
       case "microF1Measure" => metrics.microF1Measure
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
@@ -230,9 +230,7 @@ class MulticlassMetrics @Since("1.1.0") (predictionAndLabels: RDD[_ <: Product])
    * Returns weighted averaged f1-measure
    */
   @Since("1.1.0")
-  lazy val weightedFMeasure: Double = labelCountByClass.map { case (category, count) =>
-    fMeasure(category, 1.0) * count.toDouble / labelCount
-  }.sum
+  lazy val weightedFMeasure: Double = weightedFMeasure(1.0)
 
   /**
    * Returns the sequence of labels in ascending order
diff --git a/mllib/src/test/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluatorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluatorSuite.scala
@@ -21,10 +21,13 @@ import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.param.ParamsSuite
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils}
 import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.mllib.util.TestingUtils._
 
 class MulticlassClassificationEvaluatorSuite
   extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
 
+  import testImplicits._
+
   test("params") {
     ParamsSuite.checkParams(new MulticlassClassificationEvaluator)
   }
@@ -34,10 +37,27 @@ class MulticlassClassificationEvaluatorSuite
       .setPredictionCol("myPrediction")
       .setLabelCol("myLabel")
       .setMetricName("accuracy")
+      .setMetricLabel(1.0)
+      .setBeta(2.0)
     testDefaultReadWrite(evaluator)
   }
 
   test("should support all NumericType labels and not support other types") {
     MLTestingUtils.checkNumericTypes(new MulticlassClassificationEvaluator, spark)
   }
+
+  test("evaluation metrics") {
+    val predictionAndLabels = Seq((0.0, 0.0), (0.0, 1.0),
+      (0.0, 0.0), (1.0, 0.0), (1.0, 1.0),
+      (1.0, 1.0), (1.0, 1.0), (2.0, 2.0), (2.0, 0.0)).toDF("prediction", "label")
+
+    val evaluator = new MulticlassClassificationEvaluator()
+      .setMetricName("precisionByLabel")
+      .setMetricLabel(0.0)
+    assert(evaluator.evaluate(predictionAndLabels) ~== 2.0 / 3 absTol 1e-5)
+
+    evaluator.setMetricName("truePositiveRateByLabel")
+      .setMetricLabel(1.0)
+    assert(evaluator.evaluate(predictionAndLabels) ~== 3.0 / 4 absTol 1e-5)
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/evaluation/MultilabelClassificationEvaluatorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/evaluation/MultilabelClassificationEvaluatorSuite.scala
@@ -47,15 +47,15 @@ class MultilabelClassificationEvaluatorSuite
     assert(evaluator.evaluate(scoreAndLabels) ~== 2.0 / 7 absTol 1e-5)
 
     evaluator.setMetricName("recallByLabel")
-      .setMetricClass(0.0)
+      .setMetricLabel(0.0)
     assert(evaluator.evaluate(scoreAndLabels) ~== 0.8 absTol 1e-5)
   }
 
   test("read/write") {
     val evaluator = new MultilabelClassificationEvaluator()
       .setPredictionCol("myPrediction")
       .setLabelCol("myLabel")
-      .setMetricClass(1.0)
+      .setMetricLabel(1.0)
       .setMetricName("precisionByLabel")
     testDefaultReadWrite(evaluator)
   }
diff --git a/python/pyspark/ml/evaluation.py b/python/pyspark/ml/evaluation.py
@@ -292,6 +292,9 @@ class MulticlassClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictio
     0.66...
     >>> evaluator.evaluate(dataset, {evaluator.metricName: "accuracy"})
     0.66...
+    >>> evaluator.evaluate(dataset, {evaluator.metricName: "truePositiveRateByLabel",
+    ...     evaluator.metricLabel: 1.0})
+    0.75...
     >>> mce_path = temp_path + "/mce"
     >>> evaluator.save(mce_path)
     >>> evaluator2 = MulticlassClassificationEvaluator.load(mce_path)
@@ -313,20 +316,31 @@ class MulticlassClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictio
     """
     metricName = Param(Params._dummy(), "metricName",
                        "metric name in evaluation "
-                       "(f1|weightedPrecision|weightedRecall|accuracy)",
+                       "(f1|accuracy|weightedPrecision|weightedRecall|weightedTruePositiveRate|"
+                       "weightedFalsePositiveRate|weightedFMeasure|truePositiveRateByLabel|"
+                       "falsePositiveRateByLabel|precisionByLabel|recallByLabel|fMeasureByLabel)",
                        typeConverter=TypeConverters.toString)
+    metricLabel = Param(Params._dummy(), "metricLabel",
+                        "The class whose metric will be computed in truePositiveRateByLabel|"
+                        "falsePositiveRateByLabel|precisionByLabel|recallByLabel|fMeasureByLabel."
+                        " Must be >= 0. The default value is 0.",
+                        typeConverter=TypeConverters.toFloat)
+    beta = Param(Params._dummy(), "beta",
+                 "The beta value used in weightedFMeasure|fMeasureByLabel."
+                 " Must be > 0. The default value is 1.",
+                 typeConverter=TypeConverters.toFloat)
 
     @keyword_only
     def __init__(self, predictionCol="prediction", labelCol="label",
-                 metricName="f1", weightCol=None):
+                 metricName="f1", weightCol=None, metricLabel=0.0, beta=1.0):
         """
         __init__(self, predictionCol="prediction", labelCol="label", \
-                 metricName="f1", weightCol=None)
+                 metricName="f1", weightCol=None, metricLabel=0.0, beta=1.0)
         """
         super(MulticlassClassificationEvaluator, self).__init__()
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator", self.uid)
-        self._setDefault(metricName="f1")
+        self._setDefault(metricName="f1", metricLabel=0.0, beta=1.0)
         kwargs = self._input_kwargs
         self._set(**kwargs)
 
@@ -344,13 +358,41 @@ def getMetricName(self):
         """
         return self.getOrDefault(self.metricName)
 
+    @since("3.0.0")
+    def setMetricLabel(self, value):
+        """
+        Sets the value of :py:attr:`metricLabel`.
+        """
+        return self._set(metricLabel=value)
+
+    @since("3.0.0")
+    def getMetricLabel(self):
+        """
+        Gets the value of metricLabel or its default value.
+        """
+        return self.getOrDefault(self.metricLabel)
+
+    @since("3.0.0")
+    def setBeta(self, value):
+        """
+        Sets the value of :py:attr:`beta`.
+        """
+        return self._set(beta=value)
+
+    @since("3.0.0")
+    def getBeta(self):
+        """
+        Gets the value of beta or its default value.
+        """
+        return self.getOrDefault(self.beta)
+
     @keyword_only
     @since("1.5.0")
     def setParams(self, predictionCol="prediction", labelCol="label",
-                  metricName="f1", weightCol=None):
+                  metricName="f1", weightCol=None, metricLabel=0.0, beta=1.0):
         """
         setParams(self, predictionCol="prediction", labelCol="label", \
-                  metricName="f1", weightCol=None)
+                  metricName="f1", weightCol=None, metricLabel=0.0, beta=1.0)
         Sets params for multiclass classification evaluator.
         """
         kwargs = self._input_kwargs
@@ -390,23 +432,23 @@ class MultilabelClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictio
                        "precisionByLabel|recallByLabel|f1MeasureByLabel|microPrecision|"
                        "microRecall|microF1Measure)",
                        typeConverter=TypeConverters.toString)
-    metricClass = Param(Params._dummy(), "metricClass",
-                        "The label whose metric will be computed in precisionByLabel|"
+    metricLabel = Param(Params._dummy(), "metricLabel",
+                        "The class whose metric will be computed in precisionByLabel|"
                         "recallByLabel|f1MeasureByLabel. "
                         "Must be >= 0. The default value is 0.",
                         typeConverter=TypeConverters.toFloat)
 
     @keyword_only
     def __init__(self, predictionCol="prediction", labelCol="label",
-                 metricName="f1Measure", metricClass=0.0):
+                 metricName="f1Measure", metricLabel=0.0):
         """
         __init__(self, predictionCol="prediction", labelCol="label", \
-                 metricName="f1Measure", metricClass=0.0)
+                 metricName="f1Measure", metricLabel=0.0)
         """
         super(MultilabelClassificationEvaluator, self).__init__()
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.evaluation.MultilabelClassificationEvaluator", self.uid)
-        self._setDefault(metricName="f1Measure", metricClass=0.0)
+        self._setDefault(metricName="f1Measure", metricLabel=0.0)
         kwargs = self._input_kwargs
         self._set(**kwargs)
 
@@ -425,26 +467,26 @@ def getMetricName(self):
         return self.getOrDefault(self.metricName)
 
     @since("3.0.0")
-    def setMetricClass(self, value):
+    def setMetricLabel(self, value):
         """
-        Sets the value of :py:attr:`metricClass`.
+        Sets the value of :py:attr:`metricLabel`.
         """
-        return self._set(metricClass=value)
+        return self._set(metricLabel=value)
 
     @since("3.0.0")
-    def getMetricClass(self):
+    def getMetricLabel(self):
         """
-        Gets the value of metricClass or its default value.
+        Gets the value of metricLabel or its default value.
         """
-        return self.getOrDefault(self.metricClass)
+        return self.getOrDefault(self.metricLabel)
 
     @keyword_only
     @since("3.0.0")
     def setParams(self, predictionCol="prediction", labelCol="label",
-                  metricName="f1Measure", metricClass=0.0):
+                  metricName="f1Measure", metricLabel=0.0):
         """
         setParams(self, predictionCol="prediction", labelCol="label", \
-                  metricName="f1Measure", metricClass=0.0)
+                  metricName="f1Measure", metricLabel=0.0)
         Sets params for multilabel classification evaluator.
         """
         kwargs = self._input_kwargs