scoring: find usable scorers for non-built-in problem types

JakaKokosar · JakaKokosar · commit fdcee4b0fa97 · 2022-02-17T15:52:12.000+01:00
diff --git a/Orange/evaluation/scoring.py b/Orange/evaluation/scoring.py
@@ -66,6 +66,13 @@ class Score(metaclass=ScoreMetaType):
     name = None
     long_name = None  #: A short user-readable name (e.g. a few words)
 
+    # definition of built-in Scorers:
+    is_built_in = True
+    # If true Scorer is shown in Scorer table by default.
+    shown_by_default = True
+    # Placeholder for a problem type of non-built-in Scorers
+    problem_type = "built-in"
+
     def __new__(cls, results=None, **kwargs):
         self = super().__new__(cls)
         if results is not None:
@@ -270,6 +277,7 @@ class LogLoss(ClassificationScore):
 
     """
     __wraps__ = skl_metrics.log_loss
+    shown_by_default = False
 
     def compute_score(self, results, eps=1e-15, normalize=True,
                       sample_weight=None):
@@ -285,6 +293,7 @@ def compute_score(self, results, eps=1e-15, normalize=True,
 
 class Specificity(ClassificationScore):
     is_binary = True
+    shown_by_default = False
 
     @staticmethod
     def calculate_weights(results):
@@ -360,6 +369,7 @@ class R2(RegressionScore):
 
 class CVRMSE(RegressionScore):
     long_name = "Coefficient of variation of the RMSE"
+    shown_by_default = False
 
     def compute_score(self, results):
         mean = np.nanmean(results.actual)
diff --git a/Orange/widgets/evaluate/owpredictions.py b/Orange/widgets/evaluate/owpredictions.py
@@ -283,7 +283,7 @@ def _call_predictors(self):
     def _update_scores(self):
         model = self.score_table.model
         model.clear()
-        scorers = usable_scorers(self.class_var) if self.class_var else []
+        scorers = usable_scorers(self.data)
         self.score_table.update_header(scorers)
         errors = []
         for pred in self.predictors:
diff --git a/Orange/widgets/evaluate/owtestandscore.py b/Orange/widgets/evaluate/owtestandscore.py
@@ -511,10 +511,10 @@ def _which_missing_data(self):
     # - we don't gain much with it
     # - it complicates the unit tests
     def _update_scorers(self):
-        if self.data and self.data.domain.class_var:
-            new_scorers = usable_scorers(self.data.domain.class_var)
-        else:
-            new_scorers = []
+        new_scorers = []
+        if self.data:
+            new_scorers = usable_scorers(self.data)
+
         # Don't unnecessarily reset the combo because this would always reset
         # comparison_criterion; we also set it explicitly, though, for clarity
         if new_scorers != self.scorers:
diff --git a/Orange/widgets/evaluate/utils.py b/Orange/widgets/evaluate/utils.py
@@ -1,6 +1,5 @@
 import warnings
 from functools import partial
-from itertools import chain
 
 import numpy as np
 
@@ -11,7 +10,7 @@
     QSortFilterProxyModel
 from sklearn.exceptions import UndefinedMetricWarning
 
-from Orange.data import Variable, DiscreteVariable, ContinuousVariable
+from Orange.data import Table, DiscreteVariable, ContinuousVariable
 from Orange.evaluation import scoring
 from Orange.widgets import gui
 from Orange.widgets.utils.tableview import table_selection_to_mime_data
@@ -78,14 +77,33 @@ def learner_name(learner):
     return getattr(learner, "name", type(learner).__name__)
 
 
-def usable_scorers(target: Variable):
-    order = {name: i
-             for i, name in enumerate(BUILTIN_SCORERS_ORDER[type(target)])}
+def usable_scorers(data: Table):
+    if not data:
+        return []
+
+    problem_type = data.attributes.get("problem_type", None)
+    target = data.domain.class_var
+
     # 'abstract' is retrieved from __dict__ to avoid inheriting
-    usable = (cls for cls in scoring.Score.registry.values()
-              if cls.is_scalar and not cls.__dict__.get("abstract")
-              and isinstance(target, cls.class_types))
-    return sorted(usable, key=lambda cls: order.get(cls.name, 99))
+    scorer_candidates = [cls for cls in scoring.Score.registry.values()
+                         if cls.is_scalar and not cls.__dict__.get("abstract")]
+
+    # If problem_type is not specified and 'domain.class_var' is set
+    # use builtin scorers and don't brake the default behaviour.
+    usable = []
+    if problem_type is None and target:
+        order = {name: i
+                 for i, name in enumerate(BUILTIN_SCORERS_ORDER[type(target)])}
+        usable = sorted((cls for cls in scorer_candidates
+                         if isinstance(target, cls.class_types) and cls.is_built_in),
+                        key=lambda cls: order.get(cls.name, 99))
+
+    elif problem_type and data.domain.class_vars:
+        usable = [cls for cls in scoring.Score.registry.values()
+                  if not cls.is_built_in and cls.problem_type == problem_type and
+                  all(isinstance(target, cls.class_types) for target in data.domain.class_vars)]
+
+    return usable
 
 
 def scorer_caller(scorer, ovr_results, target=None):
@@ -131,9 +149,9 @@ def is_bad(x):
 
 
 class ScoreTable(OWComponent, QObject):
-    shown_scores = \
-        Setting(set(chain(*BUILTIN_SCORERS_ORDER.values())))
-
+    shown_scores = Setting(set(scorer.name for scorer in
+                               scoring.Score.registry.values() if
+                               scorer.shown_by_default))
     shownScoresChanged = Signal()
 
     class ItemDelegate(QStyledItemDelegate):