Merge pull request #931 from automl/improve_file_output

franchuterivera · web-flow · commit 1126453429dc · 2020-08-28T15:12:58.000+02:00
Add additional output to log files
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -4,5 +4,6 @@ recursive-include autosklearn/metalearning/files *.txt
 include autosklearn/util/logging.yaml
 recursive-include autosklearn *.pyx
 include requirements.txt
+include autosklearn/requirements.txt
 recursive-include autosklearn/experimental/askl2_portfolios *.json
 include autosklearn/experimental/askl2_training_data.json
diff --git a/autosklearn/__init__.py b/autosklearn/__init__.py
@@ -1,21 +1,16 @@
 # -*- encoding: utf-8 -*-
 import os
+import pkg_resources
 import sys
 
 from autosklearn.util import dependencies
 from autosklearn.__version__ import __version__  # noqa (imported but unused)
 
 
-__MANDATORY_PACKAGES__ = '''
-numpy>=1.9
-scikit-learn>=0.22.0,<0.23
-lockfile>=0.10
-smac>=0.12
-pyrfr>=0.6.1,<0.8
-ConfigSpace>=0.4.0,<0.5
-'''
+requirements = pkg_resources.resource_string('autosklearn', 'requirements.txt')
+requirements = requirements.decode('utf-8')
 
-dependencies.verify_packages(__MANDATORY_PACKAGES__)
+dependencies.verify_packages(requirements)
 
 if os.name != 'posix':
     raise ValueError(
diff --git a/autosklearn/automl.py b/autosklearn/automl.py
@@ -2,7 +2,9 @@
 import io
 import json
 import multiprocessing
+import platform
 import os
+import sys
 from typing import Optional, List, Union
 import unittest.mock
 import warnings
@@ -11,6 +13,7 @@
 import numpy as np
 import numpy.ma as ma
 import pandas as pd
+import pkg_resources
 import scipy.stats
 from sklearn.base import BaseEstimator
 from sklearn.model_selection._split import _RepeatedSplits, \
@@ -33,14 +36,22 @@
 from autosklearn.metrics import calculate_score
 from autosklearn.util.stopwatch import StopWatch
 from autosklearn.util.logging_ import get_logger, setup_logger
-from autosklearn.util import pipeline
+from autosklearn.util import pipeline, RE_PATTERN
 from autosklearn.ensemble_builder import EnsembleBuilder
 from autosklearn.ensembles.singlebest_ensemble import SingleBest
 from autosklearn.smbo import AutoMLSMBO
 from autosklearn.util.hash import hash_array_or_matrix
 from autosklearn.metrics import f1_macro, accuracy, r2
 from autosklearn.constants import MULTILABEL_CLASSIFICATION, MULTICLASS_CLASSIFICATION, \
     REGRESSION_TASKS, REGRESSION, BINARY_CLASSIFICATION, MULTIOUTPUT_REGRESSION
+from autosklearn.pipeline.components.classification import ClassifierChoice
+from autosklearn.pipeline.components.regression import RegressorChoice
+from autosklearn.pipeline.components.feature_preprocessing import FeaturePreprocessorChoice
+from autosklearn.pipeline.components.data_preprocessing.categorical_encoding import OHEChoice
+from autosklearn.pipeline.components.data_preprocessing.minority_coalescense import (
+    CoalescenseChoice
+)
+from autosklearn.pipeline.components.data_preprocessing.rescaling import RescalingChoice
 
 
 def _model_predict(model, X, batch_size, logger, task):
@@ -356,6 +367,80 @@ def fit(
         elif feat_type is None and self.InputValidator.feature_types:
             feat_type = self.InputValidator.feature_types
 
+        # Produce debug information to the logfile
+        self._logger.debug('Starting to print environment information')
+        self._logger.debug('  Python version: %s', sys.version.split('\n'))
+        try:
+            self._logger.debug('  Distribution: %s', platform.linux_distribution())
+        except AttributeError:
+            # platform.linux_distribution() was removed in Python3.8
+            # We should move to the distro package as soon as it supports Windows and OSX
+            pass
+        self._logger.debug('  System: %s', platform.system())
+        self._logger.debug('  Machine: %s', platform.machine())
+        self._logger.debug('  Platform: %s', platform.platform())
+        # UNAME appears to leak sensible information
+        # self._logger.debug('  uname: %s', platform.uname())
+        self._logger.debug('  Version: %s', platform.version())
+        self._logger.debug('  Mac version: %s', platform.mac_ver())
+        requirements = pkg_resources.resource_string('autosklearn', 'requirements.txt')
+        requirements = requirements.decode('utf-8')
+        requirements = [requirement for requirement in requirements.split('\n')]
+        for requirement in requirements:
+            if not requirement:
+                continue
+            match = RE_PATTERN.match(requirement)
+            if match:
+                name = match.group('name')
+                module_dist = pkg_resources.get_distribution(name)
+                self._logger.debug('  %s', module_dist)
+            else:
+                raise ValueError('Unable to read requirement: %s' % requirement)
+        self._logger.debug('Done printing environment information')
+        self._logger.debug('Starting to print arguments to auto-sklearn')
+        self._logger.debug('  output_folder: %s', self._backend.context._output_directory)
+        self._logger.debug('  tmp_folder: %s', self._backend.context._temporary_directory)
+        self._logger.debug('  time_left_for_this_task: %f', self._time_for_task)
+        self._logger.debug('  per_run_time_limit: %f', self._per_run_time_limit)
+        self._logger.debug(
+            '  initial_configurations_via_metalearning: %d',
+            self._initial_configurations_via_metalearning,
+        )
+        self._logger.debug('  ensemble_size: %d', self._ensemble_size)
+        self._logger.debug('  ensemble_nbest: %f', self._ensemble_nbest)
+        self._logger.debug('  max_models_on_disc: %d', self._max_models_on_disc)
+        self._logger.debug('  ensemble_memory_limit: %d', self._ensemble_memory_limit)
+        self._logger.debug('  seed: %d', self._seed)
+        self._logger.debug('  ml_memory_limit: %d', self._ml_memory_limit)
+        self._logger.debug('  metadata_directory: %s', self._metadata_directory)
+        self._logger.debug('  debug_mode: %s', self._debug_mode)
+        self._logger.debug('  include_estimators: %s', str(self._include_estimators))
+        self._logger.debug('  exclude_estimators: %s', str(self._exclude_estimators))
+        self._logger.debug('  include_preprocessors: %s', str(self._include_preprocessors))
+        self._logger.debug('  exclude_preprocessors: %s', str(self._exclude_preprocessors))
+        self._logger.debug('  resampling_strategy: %s', str(self._resampling_strategy))
+        self._logger.debug('  resampling_strategy_arguments: %s',
+                           str(self._resampling_strategy_arguments))
+        self._logger.debug('  shared_mode: %s', str(self._shared_mode))
+        self._logger.debug('  precision: %s', str(self.precision))
+        self._logger.debug('  disable_evaluator_output: %s', str(self._disable_evaluator_output))
+        self._logger.debug('  get_smac_objective_callback: %s', str(self._get_smac_object_callback))
+        self._logger.debug('  smac_scenario_args: %s', str(self._smac_scenario_args))
+        self._logger.debug('  logging_config: %s', str(self.logging_config))
+        self._logger.debug('  metric: %s', str(self._metric))
+        self._logger.debug('Done printing arguments to auto-sklearn')
+        self._logger.debug('Starting to print available components')
+        for choice in (
+            ClassifierChoice, RegressorChoice, FeaturePreprocessorChoice,
+            OHEChoice, RescalingChoice, CoalescenseChoice,
+        ):
+            self._logger.debug(
+                '%s: %s',
+                choice.__name__,
+                choice.get_components(),
+            )
+        self._logger.debug('Done printing available components')
+
         datamanager = XYDataManager(
             X, y,
             X_test=X_test,
diff --git a/autosklearn/pipeline/components/classification/__init__.py b/autosklearn/pipeline/components/classification/__init__.py
@@ -21,6 +21,7 @@ def add_classifier(classifier):
 
 class ClassifierChoice(AutoSklearnChoice):
 
+    @classmethod
     def get_components(cls):
         components = OrderedDict()
         components.update(_classifiers)
diff --git a/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py b/autosklearn/pipeline/components/data_preprocessing/categorical_encoding/__init__.py
@@ -17,7 +17,9 @@ def add_ohe(ohe):
 
 
 class OHEChoice(AutoSklearnChoice):
-    def get_components(self):
+
+    @classmethod
+    def get_components(cls):
         components = OrderedDict()
         components.update(_ohes)
         components.update(_addons.components)
diff --git a/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py b/autosklearn/pipeline/components/data_preprocessing/minority_coalescense/__init__.py
@@ -16,7 +16,9 @@ def add_mc(mc):
 
 
 class CoalescenseChoice(AutoSklearnChoice):
-    def get_components(self):
+
+    @classmethod
+    def get_components(cls):
         components = OrderedDict()
         components.update(_mcs)
         components.update(_addons.components)
diff --git a/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py b/autosklearn/pipeline/components/data_preprocessing/rescaling/__init__.py
@@ -18,7 +18,9 @@ def add_rescaler(rescaler):
 
 
 class RescalingChoice(AutoSklearnChoice):
-    def get_components(self):
+
+    @classmethod
+    def get_components(cls):
         components = OrderedDict()
         components.update(_rescalers)
         components.update(_addons.components)
diff --git a/autosklearn/pipeline/components/feature_preprocessing/__init__.py b/autosklearn/pipeline/components/feature_preprocessing/__init__.py
@@ -19,7 +19,8 @@ def add_preprocessor(preprocessor):
 
 class FeaturePreprocessorChoice(AutoSklearnChoice):
 
-    def get_components(self):
+    @classmethod
+    def get_components(cls):
         components = OrderedDict()
         components.update(_preprocessors)
         components.update(_addons.components)
diff --git a/autosklearn/requirements.txt b/autosklearn/requirements.txt
@@ -0,0 +1 @@
+../requirements.txt
diff --git a/autosklearn/util/__init__.py b/autosklearn/util/__init__.py
@@ -1 +1,7 @@
 # -*- encoding: utf-8 -*-
+import re
+
+
+SUBPATTERN = r'((?P<operation%d>==|>=|>|<)(?P<version%d>(\d+)?(\.[a-zA-Z0-9]+)?(\.\d+)?))'
+RE_PATTERN = re.compile(
+    r'^(?P<name>[\w\-]+)%s?(,%s)?$' % (SUBPATTERN % (1, 1), SUBPATTERN % (2, 2)))
diff --git a/autosklearn/util/backend.py b/autosklearn/util/backend.py
@@ -92,7 +92,7 @@ def __init__(self,
         self._tmp_dir_created = False
         self._output_dir_created = False
 
-        self.__temporary_directory, self.__output_directory = (
+        self._temporary_directory, self._output_directory = (
             get_randomized_directory_names(
                 temporary_directory=temporary_directory,
                 output_directory=output_directory,
@@ -104,12 +104,12 @@ def __init__(self,
     @property
     def output_directory(self) -> str:
         # make sure that tilde does not appear on the path.
-        return os.path.expanduser(os.path.expandvars(self.__output_directory))
+        return os.path.expanduser(os.path.expandvars(self._output_directory))
 
     @property
     def temporary_directory(self) -> str:
         # make sure that tilde does not appear on the path.
-        return os.path.expanduser(os.path.expandvars(self.__temporary_directory))
+        return os.path.expanduser(os.path.expandvars(self._temporary_directory))
 
     def create_directories(self) -> None:
         if self.shared_mode:
diff --git a/autosklearn/util/dependencies.py b/autosklearn/util/dependencies.py
@@ -1,14 +1,10 @@
 import importlib
-import re
 from distutils.version import LooseVersion
 from typing import List, Optional, Union, no_type_check
 
 import pkg_resources
 
-
-SUBPATTERN = r'((?P<operation%d>==|>=|>|<)(?P<version%d>(\d+)?(\.[a-zA-Z0-9]+)?(\.\d+)?))'
-RE_PATTERN = re.compile(
-    r'^(?P<name>[\w\-]+)%s?(,%s)?$' % (SUBPATTERN % (1, 1), SUBPATTERN % (2, 2)))
+from autosklearn.util import RE_PATTERN
 
 
 def verify_packages(packages: Optional[Union[str, List[str]]]) -> None:
@@ -53,6 +49,8 @@ def _verify_package(name: str, operation: Optional[str], version: str) -> None:
         check = required_version == installed_version
     elif operation == '>':
         check = installed_version > required_version
+    elif operation == '<':
+        check = installed_version < required_version
     elif operation == '>=':
         check = installed_version > required_version or \
                 installed_version == required_version