Refactor if-statements (#1219)

PGijsbers · web-flow · commit 687a0f11e7ee · 2023-03-01T08:41:17.000+01:00
* Refactor if-statements

* Add explicit names to conditional expression

* Add 'dependencies' to better mimic OpenMLFlow
diff --git a/openml/_api_calls.py b/openml/_api_calls.py
@@ -303,9 +303,7 @@ def __is_checksum_equal(downloaded_file, md5_checksum=None):
     md5 = hashlib.md5()
     md5.update(downloaded_file.encode("utf-8"))
     md5_checksum_download = md5.hexdigest()
-    if md5_checksum == md5_checksum_download:
-        return True
-    return False
+    return md5_checksum == md5_checksum_download
 
 
 def _send_request(request_method, url, data, files=None, md5_checksum=None):
diff --git a/openml/datasets/dataset.py b/openml/datasets/dataset.py
@@ -275,7 +275,7 @@ def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
 
     def __eq__(self, other):
 
-        if type(other) != OpenMLDataset:
+        if not isinstance(other, OpenMLDataset):
             return False
 
         server_fields = {
@@ -287,14 +287,12 @@ def __eq__(self, other):
             "data_file",
         }
 
-        # check that the keys are identical
+        # check that common keys and values are identical
         self_keys = set(self.__dict__.keys()) - server_fields
         other_keys = set(other.__dict__.keys()) - server_fields
-        if self_keys != other_keys:
-            return False
-
-        # check that values of the common keys are identical
-        return all(self.__dict__[key] == other.__dict__[key] for key in self_keys)
+        return self_keys == other_keys and all(
+            self.__dict__[key] == other.__dict__[key] for key in self_keys
+        )
 
     def _download_data(self) -> None:
         """Download ARFF data file to standard cache directory. Set `self.data_file`."""
diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py
@@ -38,19 +38,16 @@
 
 logger = logging.getLogger(__name__)
 
-
 if sys.version_info >= (3, 5):
     from json.decoder import JSONDecodeError
 else:
     JSONDecodeError = ValueError
 
-
 DEPENDENCIES_PATTERN = re.compile(
     r"^(?P<name>[\w\-]+)((?P<operation>==|>=|>)"
     r"(?P<version>(\d+\.)?(\d+\.)?(\d+)?(dev)?[0-9]*))?$"
 )
 
-
 SIMPLE_NUMPY_TYPES = [
     nptype
     for type_cat, nptypes in np.sctypes.items()
@@ -580,15 +577,11 @@ def _is_cross_validator(self, o: Any) -> bool:
 
     @classmethod
     def _is_sklearn_flow(cls, flow: OpenMLFlow) -> bool:
-        if getattr(flow, "dependencies", None) is not None and "sklearn" in flow.dependencies:
-            return True
-        if flow.external_version is None:
-            return False
-        else:
-            return (
-                flow.external_version.startswith("sklearn==")
-                or ",sklearn==" in flow.external_version
-            )
+        sklearn_dependency = isinstance(flow.dependencies, str) and "sklearn" in flow.dependencies
+        sklearn_as_external = isinstance(flow.external_version, str) and (
+            flow.external_version.startswith("sklearn==") or ",sklearn==" in flow.external_version
+        )
+        return sklearn_dependency or sklearn_as_external
 
     def _get_sklearn_description(self, model: Any, char_lim: int = 1024) -> str:
         """Fetches the sklearn function docstring for the flow description
@@ -1867,24 +1860,22 @@ def is_subcomponent_specification(values):
                 # checks whether the current value can be a specification of
                 # subcomponents, as for example the value for steps parameter
                 # (in Pipeline) or transformers parameter (in
-                # ColumnTransformer). These are always lists/tuples of lists/
-                # tuples, size bigger than 2 and an OpenMLFlow item involved.
-                if not isinstance(values, (tuple, list)):
-                    return False
-                for item in values:
-                    if not isinstance(item, (tuple, list)):
-                        return False
-                    if len(item) < 2:
-                        return False
-                    if not isinstance(item[1], (openml.flows.OpenMLFlow, str)):
-                        if (
+                # ColumnTransformer).
+                return (
+                    # Specification requires list/tuple of list/tuple with
+                    # at least length 2.
+                    isinstance(values, (tuple, list))
+                    and all(isinstance(item, (tuple, list)) and len(item) > 1 for item in values)
+                    # And each component needs to be a flow or interpretable string
+                    and all(
+                        isinstance(item[1], openml.flows.OpenMLFlow)
+                        or (
                             isinstance(item[1], str)
                             and item[1] in SKLEARN_PIPELINE_STRING_COMPONENTS
-                        ):
-                            pass
-                        else:
-                            return False
-                return True
+                        )
+                        for item in values
+                    )
+                )
 
             # _flow is openml flow object, _param dict maps from flow name to flow
             # id for the main call, the param dict can be overridden (useful for
diff --git a/openml/flows/functions.py b/openml/flows/functions.py
@@ -261,10 +261,7 @@ def flow_exists(name: str, external_version: str) -> Union[int, bool]:
 
     result_dict = xmltodict.parse(xml_response)
     flow_id = int(result_dict["oml:flow_exists"]["oml:id"])
-    if flow_id > 0:
-        return flow_id
-    else:
-        return False
+    return flow_id if flow_id > 0 else False
 
 
 def get_flow_id(
diff --git a/openml/setups/functions.py b/openml/setups/functions.py
@@ -55,10 +55,7 @@ def setup_exists(flow) -> int:
     )
     result_dict = xmltodict.parse(result)
     setup_id = int(result_dict["oml:setup_exists"]["oml:id"])
-    if setup_id > 0:
-        return setup_id
-    else:
-        return False
+    return setup_id if setup_id > 0 else False
 
 
 def _get_cached_setup(setup_id):
diff --git a/openml/tasks/split.py b/openml/tasks/split.py
@@ -47,12 +47,10 @@ def __eq__(self, other):
             or self.name != other.name
             or self.description != other.description
             or self.split.keys() != other.split.keys()
-        ):
-            return False
-
-        if any(
-            self.split[repetition].keys() != other.split[repetition].keys()
-            for repetition in self.split
+            or any(
+                self.split[repetition].keys() != other.split[repetition].keys()
+                for repetition in self.split
+            )
         ):
             return False
 
diff --git a/openml/utils.py b/openml/utils.py
@@ -174,10 +174,7 @@ def _delete_entity(entity_type, entity_id):
     url_suffix = "%s/%d" % (entity_type, entity_id)
     result_xml = openml._api_calls._perform_api_call(url_suffix, "delete")
     result = xmltodict.parse(result_xml)
-    if "oml:%s_delete" % entity_type in result:
-        return True
-    else:
-        return False
+    return "oml:%s_delete" % entity_type in result
 
 
 def _list_all(listing_call, output_format="dict", *args, **filters):
diff --git a/tests/test_extensions/test_functions.py b/tests/test_extensions/test_functions.py
@@ -9,6 +9,7 @@
 
 class DummyFlow:
     external_version = "DummyFlow==0.1"
+    dependencies = None
 
 
 class DummyModel:
@@ -18,15 +19,11 @@ class DummyModel:
 class DummyExtension1:
     @staticmethod
     def can_handle_flow(flow):
-        if not inspect.stack()[2].filename.endswith("test_functions.py"):
-            return False
-        return True
+        return inspect.stack()[2].filename.endswith("test_functions.py")
 
     @staticmethod
     def can_handle_model(model):
-        if not inspect.stack()[2].filename.endswith("test_functions.py"):
-            return False
-        return True
+        return inspect.stack()[2].filename.endswith("test_functions.py")
 
 
 class DummyExtension2:
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
@@ -127,7 +127,7 @@ def _wait_for_processed_run(self, run_id, max_waiting_time_seconds):
             "evaluated correctly on the server".format(run_id)
         )
 
-    def _compare_predictions(self, predictions, predictions_prime):
+    def _assert_predictions_equal(self, predictions, predictions_prime):
         self.assertEqual(
             np.array(predictions_prime["data"]).shape, np.array(predictions["data"]).shape
         )
@@ -151,8 +151,6 @@ def _compare_predictions(self, predictions, predictions_prime):
                 else:
                     self.assertEqual(val_1, val_2)
 
-        return True
-
     def _rerun_model_and_compare_predictions(self, run_id, model_prime, seed, create_task_obj):
         run = openml.runs.get_run(run_id)
 
@@ -183,7 +181,7 @@ def _rerun_model_and_compare_predictions(self, run_id, model_prime, seed, create
 
         predictions_prime = run_prime._generate_arff_dict()
 
-        self._compare_predictions(predictions, predictions_prime)
+        self._assert_predictions_equal(predictions, predictions_prime)
         pd.testing.assert_frame_equal(
             run.predictions,
             run_prime.predictions,