Leaderboard rank fix (#1191)

eddiebergman · web-flow · commit 53daf7ea4730 · 2021-07-27T23:23:38.000+02:00
* Fixes for valid parameters not being tested

* flake8'd
diff --git a/autosklearn/estimators.py b/autosklearn/estimators.py
@@ -1,5 +1,5 @@
 # -*- encoding: utf-8 -*-
-from typing import Optional, Dict, List, Tuple, Union, Iterable, ClassVar
+from typing import Optional, Dict, List, Tuple, Union, Iterable
 from typing_extensions import Literal
 
 from ConfigSpace.configuration_space import Configuration
@@ -22,18 +22,6 @@
 
 
 class AutoSklearnEstimator(BaseEstimator):
-    # Constants used by `def leaderboard` for columns and their sort order
-    _leaderboard_columns: ClassVar[Dict[str, List[str]]] = {
-        "all": [
-            "model_id", "rank", "ensemble_weight", "type", "cost", "duration",
-            "config_id", "train_loss", "seed", "start_time", "end_time",
-            "budget", "status", "data_preprocessors", "feature_preprocessors",
-            "balancing_strategy", "config_origin"
-        ],
-        "simple": [
-            "model_id", "rank", "ensemble_weight", "type", "cost", "duration"
-        ]
-    }
 
     def __init__(
         self,
@@ -642,11 +630,7 @@ def leaderboard(
         # TODO validate that `self` is fitted. This is required for
         #      self.ensemble_ to get the identifiers of models it will generate
         #      weights for.
-        column_types = {
-            'all': AutoSklearnEstimator._leaderboard_columns['all'],
-            'simple': AutoSklearnEstimator._leaderboard_columns['simple'],
-            'detailed': AutoSklearnEstimator._leaderboard_columns['all']
-        }
+        column_types = AutoSklearnEstimator._leaderboard_columns()
 
         # Validation of top_k
         if (
@@ -661,6 +645,9 @@ def leaderboard(
         if isinstance(include, str):
             include = [include]
 
+        if include == ['model_id']:
+            raise ValueError('Must provide more than just `model_id`')
+
         if include is not None:
             columns = [*include]
 
@@ -784,10 +771,10 @@ def has_key(rv, key):
         # Add the `rank` column if needed, dropping `cost` if it's not
         # requested by the user
         if 'rank' in columns:
-            dataframe.sort_values(by='cost', ascending=False, inplace=True)
+            dataframe.sort_values(by='cost', ascending=True, inplace=True)
             dataframe.insert(column='rank',
                              value=range(1, len(dataframe) + 1),
-                             loc=list(columns).index('rank'))
+                             loc=list(columns).index('rank') - 1)  # account for `model_id`
 
             if 'cost' not in columns:
                 dataframe.drop('cost', inplace=True)
@@ -806,9 +793,15 @@ def has_key(rv, key):
                                          "'model_id'")
             sort_by = 'model_id'
 
-        dataframe.sort_values(by=sort_by,
-                              ascending=ascending_param,
-                              inplace=True)
+        # Cost can be the same but leave rank all over the place
+        if 'rank' in columns and sort_by == 'cost':
+            dataframe.sort_values(by=[sort_by, 'rank'],
+                                  ascending=[ascending_param, True],
+                                  inplace=True)
+        else:
+            dataframe.sort_values(by=sort_by,
+                                  ascending=ascending_param,
+                                  inplace=True)
 
         # Lastly, just grab the top_k
         if top_k == 'all' or top_k >= len(dataframe):
@@ -818,6 +811,20 @@ def has_key(rv, key):
 
         return dataframe
 
+    @staticmethod
+    def _leaderboard_columns() -> Dict[Literal['all', 'simple', 'detailed'], List[str]]:
+        all = [
+            "model_id", "rank", "ensemble_weight", "type", "cost", "duration",
+            "config_id", "train_loss", "seed", "start_time", "end_time",
+            "budget", "status", "data_preprocessors", "feature_preprocessors",
+            "balancing_strategy", "config_origin"
+        ]
+        simple = [
+            "model_id", "rank", "ensemble_weight", "type", "cost", "duration"
+        ]
+        detailed = all
+        return {'all': all, 'detailed': detailed, 'simple': simple}
+
     def _get_automl_class(self):
         raise NotImplementedError()
 
diff --git a/test/test_automl/test_estimators.py b/test/test_automl/test_estimators.py
@@ -331,22 +331,9 @@ def test_leaderboard(
 ):
     # Comprehensive test tasks a substantial amount of time, manually set if
     # required.
-    MAX_COMBO_SIZE_FOR_INCLUDE_PARAM = 2  # [0, len(valid_columns) + 1]
+    MAX_COMBO_SIZE_FOR_INCLUDE_PARAM = 3  # [0, len(valid_columns) + 1]
+    column_types = AutoSklearnEstimator._leaderboard_columns()
 
-    X_train, Y_train, _, _ = putil.get_dataset(dataset_name)
-    model = estimator_type(
-        time_left_for_this_task=30,
-        per_run_time_limit=5,
-        tmp_folder=tmp_dir,
-        seed=1
-    )
-    model.fit(X_train, Y_train)
-
-    column_types = {
-        'all': AutoSklearnEstimator._leaderboard_columns['all'],
-        'simple': AutoSklearnEstimator._leaderboard_columns['simple'],
-        'detailed': AutoSklearnEstimator._leaderboard_columns['all']
-    }
     # Create a dict of all possible param values for each param
     # with some invalid one's of the incorrect type
     include_combinations = itertools.chain(
@@ -357,7 +344,7 @@ def test_leaderboard(
         'detailed': [True, False],
         'ensemble_only': [True, False],
         'top_k': [-10, 0, 1, 10, 'all'],
-        'sort_by': [column_types['all'], 'invalid'],
+        'sort_by': [*column_types['all'], 'invalid'],
         'sort_order': ['ascending', 'descending', 'auto', 'invalid', None],
         'include': itertools.chain([None, 'invalid', 'type'], include_combinations),
     }
@@ -368,7 +355,19 @@ def test_leaderboard(
         for param_values in itertools.product(*valid_params.values())
     )
 
+    X_train, Y_train, _, _ = putil.get_dataset(dataset_name)
+    model = estimator_type(
+        time_left_for_this_task=30,
+        per_run_time_limit=5,
+        tmp_folder=tmp_dir,
+        seed=1
+    )
+    model.fit(X_train, Y_train)
+
     for params in params_generator:
+        # Convert from iterator to solid list
+        if params['include'] is not None and not isinstance(params['include'], str):
+            params['include'] = list(params['include'])
 
         # Invalid top_k should raise an error, is a positive int or 'all'
         if not (params['top_k'] == 'all' or params['top_k'] > 0):
@@ -385,26 +384,32 @@ def test_leaderboard(
             with pytest.raises(ValueError):
                 model.leaderboard(**params)
 
-        # Invalid include item in a list
-        elif params['include'] is not None:
-            # Crash if just a str but invalid column
-            if (
-                isinstance(params['include'], str)
-                and params['include'] not in column_types['all']
-            ):
-                with pytest.raises(ValueError):
-                    model.leaderboard(**params)
-            # Crash if list but contains invalid column
-            elif (
-                not isinstance(params['include'], str)
-                and len(set(params['include']) - set(column_types['all'])) != 0
-            ):
-                with pytest.raises(ValueError):
-                    model.leaderboard(**params)
+        # include is single str but not valid
+        elif (
+            isinstance(params['include'], str)
+            and params['include'] not in column_types['all']
+        ):
+            with pytest.raises(ValueError):
+                model.leaderboard(**params)
+
+        # Crash if include is list but contains invalid column
+        elif (
+            isinstance(params['include'], list)
+            and len(set(params['include']) - set(column_types['all'])) != 0
+        ):
+            with pytest.raises(ValueError):
+                model.leaderboard(**params)
+
+        # Can't have just model_id, in both single str and list case
+        elif (
+            params['include'] == 'model_id'
+            or params['include'] == ['model_id']
+        ):
+            with pytest.raises(ValueError):
+                model.leaderboard(**params)
 
-        # Should run without an error if all params are valid
+        # Else all valid combinations should be validated
         else:
-            # Validate the outputs
             leaderboard = model.leaderboard(**params)
 
             # top_k should never be less than the rows given back
@@ -413,14 +418,23 @@ def test_leaderboard(
                 assert params['top_k'] >= len(leaderboard)
 
             # Check the right columns are present and in the right order
-            # The id is set as the index but is not included in pandas columns
+            # The model_id is set as the index, not included in pandas columns
             columns = list(leaderboard.columns)
+
+            def exclude(lst, s):
+                return [x for x in lst if x != s]
+
             if params['include'] is not None:
-                assert columns == list(params['include'])
+                # Include with only single str should be the only column
+                if isinstance(params['include'], str):
+                    assert params['include'] in columns and len(columns) == 1
+                # Include as a list should have all the columns without model_id
+                else:
+                    assert columns == exclude(params['include'], 'model_id')
             elif params['detailed']:
-                assert columns == column_types['detailed']
+                assert columns == exclude(column_types['detailed'], 'model_id')
             else:
-                assert columns == column_types['simple']
+                assert columns == exclude(column_types['simple'], 'model_id')
 
             # Ensure that if it's ensemble only
             # Can only check if 'ensemble_weight' is present