|
2 | 2 | import io |
3 | 3 | import json |
4 | 4 | import multiprocessing |
| 5 | +import platform |
5 | 6 | import os |
| 7 | +import sys |
6 | 8 | from typing import Optional, List, Union |
7 | 9 | import unittest.mock |
8 | 10 | import warnings |
|
11 | 13 | import numpy as np |
12 | 14 | import numpy.ma as ma |
13 | 15 | import pandas as pd |
| 16 | +import pkg_resources |
14 | 17 | import scipy.stats |
15 | 18 | from sklearn.base import BaseEstimator |
16 | 19 | from sklearn.model_selection._split import _RepeatedSplits, \ |
|
33 | 36 | from autosklearn.metrics import calculate_score |
34 | 37 | from autosklearn.util.stopwatch import StopWatch |
35 | 38 | from autosklearn.util.logging_ import get_logger, setup_logger |
36 | | -from autosklearn.util import pipeline |
| 39 | +from autosklearn.util import pipeline, RE_PATTERN |
37 | 40 | from autosklearn.ensemble_builder import EnsembleBuilder |
38 | 41 | from autosklearn.ensembles.singlebest_ensemble import SingleBest |
39 | 42 | from autosklearn.smbo import AutoMLSMBO |
40 | 43 | from autosklearn.util.hash import hash_array_or_matrix |
41 | 44 | from autosklearn.metrics import f1_macro, accuracy, r2 |
42 | 45 | from autosklearn.constants import MULTILABEL_CLASSIFICATION, MULTICLASS_CLASSIFICATION, \ |
43 | 46 | REGRESSION_TASKS, REGRESSION, BINARY_CLASSIFICATION, MULTIOUTPUT_REGRESSION |
| 47 | +from autosklearn.pipeline.components.classification import ClassifierChoice |
| 48 | +from autosklearn.pipeline.components.regression import RegressorChoice |
| 49 | +from autosklearn.pipeline.components.feature_preprocessing import FeaturePreprocessorChoice |
| 50 | +from autosklearn.pipeline.components.data_preprocessing.categorical_encoding import OHEChoice |
| 51 | +from autosklearn.pipeline.components.data_preprocessing.minority_coalescense import ( |
| 52 | + CoalescenseChoice |
| 53 | +) |
| 54 | +from autosklearn.pipeline.components.data_preprocessing.rescaling import RescalingChoice |
44 | 55 |
|
45 | 56 |
|
46 | 57 | def _model_predict(model, X, batch_size, logger, task): |
@@ -356,6 +367,80 @@ def fit( |
356 | 367 | elif feat_type is None and self.InputValidator.feature_types: |
357 | 368 | feat_type = self.InputValidator.feature_types |
358 | 369 |
|
| 370 | + # Produce debug information to the logfile |
| 371 | + self._logger.debug('Starting to print environment information') |
| 372 | + self._logger.debug(' Python version: %s', sys.version.split('\n')) |
| 373 | + try: |
| 374 | + self._logger.debug(' Distribution: %s', platform.linux_distribution()) |
| 375 | + except AttributeError: |
| 376 | + # platform.linux_distribution() was removed in Python3.8 |
| 377 | + # We should move to the distro package as soon as it supports Windows and OSX |
| 378 | + pass |
| 379 | + self._logger.debug(' System: %s', platform.system()) |
| 380 | + self._logger.debug(' Machine: %s', platform.machine()) |
| 381 | + self._logger.debug(' Platform: %s', platform.platform()) |
| 382 | + # UNAME appears to leak sensible information |
| 383 | + # self._logger.debug(' uname: %s', platform.uname()) |
| 384 | + self._logger.debug(' Version: %s', platform.version()) |
| 385 | + self._logger.debug(' Mac version: %s', platform.mac_ver()) |
| 386 | + requirements = pkg_resources.resource_string('autosklearn', 'requirements.txt') |
| 387 | + requirements = requirements.decode('utf-8') |
| 388 | + requirements = [requirement for requirement in requirements.split('\n')] |
| 389 | + for requirement in requirements: |
| 390 | + if not requirement: |
| 391 | + continue |
| 392 | + match = RE_PATTERN.match(requirement) |
| 393 | + if match: |
| 394 | + name = match.group('name') |
| 395 | + module_dist = pkg_resources.get_distribution(name) |
| 396 | + self._logger.debug(' %s', module_dist) |
| 397 | + else: |
| 398 | + raise ValueError('Unable to read requirement: %s' % requirement) |
| 399 | + self._logger.debug('Done printing environment information') |
| 400 | + self._logger.debug('Starting to print arguments to auto-sklearn') |
| 401 | + self._logger.debug(' output_folder: %s', self._backend.context._output_directory) |
| 402 | + self._logger.debug(' tmp_folder: %s', self._backend.context._temporary_directory) |
| 403 | + self._logger.debug(' time_left_for_this_task: %f', self._time_for_task) |
| 404 | + self._logger.debug(' per_run_time_limit: %f', self._per_run_time_limit) |
| 405 | + self._logger.debug( |
| 406 | + ' initial_configurations_via_metalearning: %d', |
| 407 | + self._initial_configurations_via_metalearning, |
| 408 | + ) |
| 409 | + self._logger.debug(' ensemble_size: %d', self._ensemble_size) |
| 410 | + self._logger.debug(' ensemble_nbest: %f', self._ensemble_nbest) |
| 411 | + self._logger.debug(' max_models_on_disc: %d', self._max_models_on_disc) |
| 412 | + self._logger.debug(' ensemble_memory_limit: %d', self._ensemble_memory_limit) |
| 413 | + self._logger.debug(' seed: %d', self._seed) |
| 414 | + self._logger.debug(' ml_memory_limit: %d', self._ml_memory_limit) |
| 415 | + self._logger.debug(' metadata_directory: %s', self._metadata_directory) |
| 416 | + self._logger.debug(' debug_mode: %s', self._debug_mode) |
| 417 | + self._logger.debug(' include_estimators: %s', str(self._include_estimators)) |
| 418 | + self._logger.debug(' exclude_estimators: %s', str(self._exclude_estimators)) |
| 419 | + self._logger.debug(' include_preprocessors: %s', str(self._include_preprocessors)) |
| 420 | + self._logger.debug(' exclude_preprocessors: %s', str(self._exclude_preprocessors)) |
| 421 | + self._logger.debug(' resampling_strategy: %s', str(self._resampling_strategy)) |
| 422 | + self._logger.debug(' resampling_strategy_arguments: %s', |
| 423 | + str(self._resampling_strategy_arguments)) |
| 424 | + self._logger.debug(' shared_mode: %s', str(self._shared_mode)) |
| 425 | + self._logger.debug(' precision: %s', str(self.precision)) |
| 426 | + self._logger.debug(' disable_evaluator_output: %s', str(self._disable_evaluator_output)) |
| 427 | + self._logger.debug(' get_smac_objective_callback: %s', str(self._get_smac_object_callback)) |
| 428 | + self._logger.debug(' smac_scenario_args: %s', str(self._smac_scenario_args)) |
| 429 | + self._logger.debug(' logging_config: %s', str(self.logging_config)) |
| 430 | + self._logger.debug(' metric: %s', str(self._metric)) |
| 431 | + self._logger.debug('Done printing arguments to auto-sklearn') |
| 432 | + self._logger.debug('Starting to print available components') |
| 433 | + for choice in ( |
| 434 | + ClassifierChoice, RegressorChoice, FeaturePreprocessorChoice, |
| 435 | + OHEChoice, RescalingChoice, CoalescenseChoice, |
| 436 | + ): |
| 437 | + self._logger.debug( |
| 438 | + '%s: %s', |
| 439 | + choice.__name__, |
| 440 | + choice.get_components(), |
| 441 | + ) |
| 442 | + self._logger.debug('Done printing available components') |
| 443 | + |
359 | 444 | datamanager = XYDataManager( |
360 | 445 | X, y, |
361 | 446 | X_test=X_test, |
|
0 commit comments