|
| 1 | +# -*- encoding: utf-8 -*- |
| 2 | +""" |
| 3 | +===================== |
| 4 | +Logging and debugging |
| 5 | +===================== |
| 6 | +
|
| 7 | +This example shows how to provide a custom logging configuration to *auto-sklearn*. |
| 8 | +We will be fitting 2 pipelines and showing any INFO-level msg on console. |
| 9 | +Even if you do not provide a logging_configuration, autosklearn creates a log file |
| 10 | +in the temporal working directory. This directory can be specified via the `tmp_folder` |
| 11 | +as exemplified below. |
| 12 | +
|
| 13 | +This example also highlights additional information about *auto-sklearn* internal |
| 14 | +directory structure. |
| 15 | +""" |
| 16 | +import pathlib |
| 17 | + |
| 18 | +import sklearn.datasets |
| 19 | +import sklearn.metrics |
| 20 | +import sklearn.model_selection |
| 21 | + |
| 22 | +import autosklearn.classification |
| 23 | + |
| 24 | + |
| 25 | +############################################################################ |
| 26 | +# Data Loading |
| 27 | +# ============ |
| 28 | +# Load kr-vs-kp dataset from https://www.openml.org/d/3 |
| 29 | +X, y = data = sklearn.datasets.fetch_openml(data_id=3, return_X_y=True, as_frame=True) |
| 30 | + |
| 31 | +X_train, X_test, y_train, y_test = \ |
| 32 | + sklearn.model_selection.train_test_split(X, y, random_state=1) |
| 33 | + |
| 34 | + |
| 35 | +############################################################################ |
| 36 | +# Create a logging config |
| 37 | +# ======================= |
| 38 | +# *auto-sklearn* uses a default |
| 39 | +# `logging config <https://github.com/automl/auto-sklearn/blob/master/autosklearn/util/logging.yaml>`_ |
| 40 | +# We will instead create a custom one as follows: |
| 41 | + |
| 42 | +logging_config = { |
| 43 | + 'version': 1, |
| 44 | + 'disable_existing_loggers': True, |
| 45 | + 'formatters': { |
| 46 | + 'custom': { |
| 47 | + # More format options are available in the official |
| 48 | + # `documentation <https://docs.python.org/3/howto/logging-cookbook.html>`_ |
| 49 | + 'format': '%(asctime)s - %(name)s - %(levelname)s - %(message)s' |
| 50 | + } |
| 51 | + }, |
| 52 | + |
| 53 | + # Any INFO level msg will be printed to the console |
| 54 | + 'handlers': { |
| 55 | + 'console': { |
| 56 | + 'level': 'INFO', |
| 57 | + 'formatter': 'custom', |
| 58 | + 'class': 'logging.StreamHandler', |
| 59 | + 'stream': 'ext://sys.stdout', |
| 60 | + }, |
| 61 | + }, |
| 62 | + |
| 63 | + 'loggers': { |
| 64 | + '': { # root logger |
| 65 | + 'level': 'DEBUG', |
| 66 | + }, |
| 67 | + 'Client-EnsembleBuilder': { |
| 68 | + 'level': 'DEBUG', |
| 69 | + 'handlers': ['console'], |
| 70 | + }, |
| 71 | + }, |
| 72 | +} |
| 73 | + |
| 74 | + |
| 75 | +############################################################################ |
| 76 | +# Build and fit a classifier |
| 77 | +# ========================== |
| 78 | +cls = autosklearn.classification.AutoSklearnClassifier( |
| 79 | + time_left_for_this_task=30, |
| 80 | + # Bellow two flags are provided to speed up calculations |
| 81 | + # Not recommended for a real implementation |
| 82 | + initial_configurations_via_metalearning=0, |
| 83 | + smac_scenario_args={'runcount_limit': 2}, |
| 84 | + # Pass the config file we created |
| 85 | + logging_config=logging_config, |
| 86 | + # *auto-sklearn* generates temporal files under tmp_folder |
| 87 | + tmp_folder='./tmp_folder', |
| 88 | + # By default tmp_folder is deleted. We will preserve it |
| 89 | + # for debug purposes |
| 90 | + delete_tmp_folder_after_terminate=False, |
| 91 | +) |
| 92 | +cls.fit(X_train, y_train, X_test, y_test) |
| 93 | + |
| 94 | +# *auto-sklearn* generates intermediate files which can be of interest |
| 95 | +# Dask multiprocessing information. Useful on multi-core runs: |
| 96 | +# * tmp_folder/distributed.log |
| 97 | +# The individual fitted estimators are written to disk on: |
| 98 | +# * tmp_folder/.auto-sklearn/runs |
| 99 | +# SMAC output is stored in this directory. |
| 100 | +# For more info, you can check the `SMAC documentation <https://github.com/automl/SMAC3>`_ |
| 101 | +# * tmp_folder/smac3-output |
| 102 | +# Auto-sklearn always outputs to this log file |
| 103 | +# tmp_folder/AutoML*.log |
| 104 | +for filename in pathlib.Path('./tmp_folder').glob('*'): |
| 105 | + print(filename) |
0 commit comments