automl
diff --git a/‎doc/conf.py‎
Lines changed: 1 addition & 0 deletions b/‎doc/conf.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎doc/index.rst‎
Lines changed: 8 additions & 7 deletions b/‎doc/index.rst‎
Lines changed: 8 additions & 7 deletions
diff --git a/‎examples/20_basic/example_classification.py‎
Lines changed: 33 additions & 32 deletions b/‎examples/20_basic/example_classification.py‎
Lines changed: 33 additions & 32 deletions
diff --git a/‎examples/20_basic/example_multilabel_classification.py‎
Lines changed: 59 additions & 59 deletions b/‎examples/20_basic/example_multilabel_classification.py‎
Lines changed: 59 additions & 59 deletions
diff --git a/‎examples/20_basic/example_regression.py‎
Lines changed: 26 additions & 25 deletions b/‎examples/20_basic/example_regression.py‎
Lines changed: 26 additions & 25 deletions
diff --git a/‎examples/40_advanced/__init__.py‎ b/‎examples/40_advanced/__init__.py‎
diff --git a/‎examples/40_advanced/custom_metrics.py‎
Lines changed: 31 additions & 0 deletions b/‎examples/40_advanced/custom_metrics.py‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎examples/40_advanced/example_feature_types.py‎
Lines changed: 38 additions & 37 deletions b/‎examples/40_advanced/example_feature_types.py‎
Lines changed: 38 additions & 37 deletions
@@ -71,6 +71,7 @@
     #},
     'backreferences_dir': None,
     'filename_pattern': 'example.*.py$',
+    'ignore_pattern': r'custom_metrics\.py|__init__\.py'
 }
 
 # Add any paths that contain templates here, relative to this directory.
 
@@ -41,13 +41,14 @@ Example
     >>> import sklearn.model_selection
     >>> import sklearn.datasets
     >>> import sklearn.metrics
-    >>> X, y = sklearn.datasets.load_digits(return_X_y=True)
-    >>> X_train, X_test, y_train, y_test = \
-            sklearn.model_selection.train_test_split(X, y, random_state=1)
-    >>> automl = autosklearn.classification.AutoSklearnClassifier()
-    >>> automl.fit(X_train, y_train)
-    >>> y_hat = automl.predict(X_test)
-    >>> print("Accuracy score", sklearn.metrics.accuracy_score(y_test, y_hat))
+    >>> if __name__ == "__main__":
+    >>>     X, y = sklearn.datasets.load_digits(return_X_y=True)
+    >>>     X_train, X_test, y_train, y_test = \
+                sklearn.model_selection.train_test_split(X, y, random_state=1)
+    >>>     automl = autosklearn.classification.AutoSklearnClassifier()
+    >>>     automl.fit(X_train, y_train)
+    >>>     y_hat = automl.predict(X_test)
+    >>>     print("Accuracy score", sklearn.metrics.accuracy_score(y_test, y_hat))
 
 
 This will run for one hour and should result in an accuracy above 0.98.
 
@@ -13,35 +13,36 @@
 import autosklearn.classification
 
 
-############################################################################
-# Data Loading
-# ============
-
-X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
-X_train, X_test, y_train, y_test = \
-    sklearn.model_selection.train_test_split(X, y, random_state=1)
-
-############################################################################
-# Build and fit a regressor
-# =========================
-
-automl = autosklearn.classification.AutoSklearnClassifier(
-    time_left_for_this_task=120,
-    per_run_time_limit=30,
-    tmp_folder='/tmp/autosklearn_classification_example_tmp',
-    output_folder='/tmp/autosklearn_classification_example_out',
-)
-automl.fit(X_train, y_train, dataset_name='breast_cancer')
-
-############################################################################
-# Print the final ensemble constructed by auto-sklearn
-# ====================================================
-
-print(automl.show_models())
-
-###########################################################################
-# Get the Score of the final ensemble
-# ===================================
-
-predictions = automl.predict(X_test)
-print("Accuracy score:", sklearn.metrics.accuracy_score(y_test, predictions))
+if __name__ == "__main__":
+    ############################################################################
+    # Data Loading
+    # ============
+
+    X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
+    X_train, X_test, y_train, y_test = \
+        sklearn.model_selection.train_test_split(X, y, random_state=1)
+
+    ############################################################################
+    # Build and fit a regressor
+    # =========================
+
+    automl = autosklearn.classification.AutoSklearnClassifier(
+        time_left_for_this_task=120,
+        per_run_time_limit=30,
+        tmp_folder='/tmp/autosklearn_classification_example_tmp',
+        output_folder='/tmp/autosklearn_classification_example_out',
+    )
+    automl.fit(X_train, y_train, dataset_name='breast_cancer')
+
+    ############################################################################
+    # Print the final ensemble constructed by auto-sklearn
+    # ====================================================
+
+    print(automl.show_models())
+
+    ###########################################################################
+    # Get the Score of the final ensemble
+    # ===================================
+
+    predictions = automl.predict(X_test)
+    print("Accuracy score:", sklearn.metrics.accuracy_score(y_test, predictions))
@@ -13,65 +13,65 @@
 import sklearn.metrics
 from sklearn.utils.multiclass import type_of_target
 
-
 import autosklearn.classification
 
 
-############################################################################
-# Data Loading
-# ============
-
-# Using reuters multilabel dataset -- https://www.openml.org/d/40594
-X, y = sklearn.datasets.fetch_openml(data_id=40594, return_X_y=True, as_frame=False)
-
-# fetch openml downloads a numpy array with TRUE/FALSE strings. Re-map it to
-# integer dtype with ones and zeros
-# This is to comply with Scikit-learn requirement:
-# "Positive classes are indicated with 1 and negative classes with 0 or -1."
-# More information on: https://scikit-learn.org/stable/modules/multiclass.html
-y[y == 'TRUE'] = 1
-y[y == 'FALSE'] = 0
-y = y.astype(np.int)
-
-# Using type of target is a good way to make sure your data
-# is properly formatted
-print(f"type_of_target={type_of_target(y)}")
-
-X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
-    X, y, random_state=1
-)
-
-############################################################################
-# Building the classifier
-# =======================
-
-automl = autosklearn.classification.AutoSklearnClassifier(
-    time_left_for_this_task=60,
-    per_run_time_limit=30,
-    # Bellow two flags are provided to speed up calculations
-    # Not recommended for a real implementation
-    initial_configurations_via_metalearning=0,
-    smac_scenario_args={'runcount_limit': 1},
-)
-automl.fit(X_train, y_train, dataset_name='reuters')
-
-############################################################################
-# Print the final ensemble constructed by auto-sklearn
-# ====================================================
-
-print(automl.show_models())
-
-############################################################################
-# Print statistics about the auto-sklearn run
-# ===========================================
-
-# Print statistics about the auto-sklearn run such as number of
-# iterations, number of models failed with a time out.
-print(automl.sprint_statistics())
-
-############################################################################
-# Get the Score of the final ensemble
-# ===================================
-
-predictions = automl.predict(X_test)
-print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))
+if __name__ == "__main__":
+    ############################################################################
+    # Data Loading
+    # ============
+
+    # Using reuters multilabel dataset -- https://www.openml.org/d/40594
+    X, y = sklearn.datasets.fetch_openml(data_id=40594, return_X_y=True, as_frame=False)
+
+    # fetch openml downloads a numpy array with TRUE/FALSE strings. Re-map it to
+    # integer dtype with ones and zeros
+    # This is to comply with Scikit-learn requirement:
+    # "Positive classes are indicated with 1 and negative classes with 0 or -1."
+    # More information on: https://scikit-learn.org/stable/modules/multiclass.html
+    y[y == 'TRUE'] = 1
+    y[y == 'FALSE'] = 0
+    y = y.astype(np.int)
+
+    # Using type of target is a good way to make sure your data
+    # is properly formatted
+    print(f"type_of_target={type_of_target(y)}")
+
+    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+        X, y, random_state=1
+    )
+
+    ############################################################################
+    # Building the classifier
+    # =======================
+
+    automl = autosklearn.classification.AutoSklearnClassifier(
+        time_left_for_this_task=60,
+        per_run_time_limit=30,
+        # Bellow two flags are provided to speed up calculations
+        # Not recommended for a real implementation
+        initial_configurations_via_metalearning=0,
+        smac_scenario_args={'runcount_limit': 1},
+    )
+    automl.fit(X_train, y_train, dataset_name='reuters')
+
+    ############################################################################
+    # Print the final ensemble constructed by auto-sklearn
+    # ====================================================
+
+    print(automl.show_models())
+
+    ############################################################################
+    # Print statistics about the auto-sklearn run
+    # ===========================================
+
+    # Print statistics about the auto-sklearn run such as number of
+    # iterations, number of models failed with a time out.
+    print(automl.sprint_statistics())
+
+    ############################################################################
+    # Get the Score of the final ensemble
+    # ===================================
+
+    predictions = automl.predict(X_test)
+    print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))
@@ -13,36 +13,37 @@
 import autosklearn.regression
 
 
-############################################################################
-# Data Loading
-# ============
+if __name__ == "__main__":
+    ############################################################################
+    # Data Loading
+    # ============
 
-X, y = sklearn.datasets.load_boston(return_X_y=True)
+    X, y = sklearn.datasets.load_boston(return_X_y=True)
 
-X_train, X_test, y_train, y_test = \
-    sklearn.model_selection.train_test_split(X, y, random_state=1)
+    X_train, X_test, y_train, y_test = \
+        sklearn.model_selection.train_test_split(X, y, random_state=1)
 
-############################################################################
-# Build and fit a regressor
-# =========================
+    ############################################################################
+    # Build and fit a regressor
+    # =========================
 
-automl = autosklearn.regression.AutoSklearnRegressor(
-    time_left_for_this_task=120,
-    per_run_time_limit=30,
-    tmp_folder='/tmp/autosklearn_regression_example_tmp',
-    output_folder='/tmp/autosklearn_regression_example_out',
-)
-automl.fit(X_train, y_train, dataset_name='boston')
+    automl = autosklearn.regression.AutoSklearnRegressor(
+        time_left_for_this_task=120,
+        per_run_time_limit=30,
+        tmp_folder='/tmp/autosklearn_regression_example_tmp',
+        output_folder='/tmp/autosklearn_regression_example_out',
+    )
+    automl.fit(X_train, y_train, dataset_name='boston')
 
-############################################################################
-# Print the final ensemble constructed by auto-sklearn
-# ====================================================
+    ############################################################################
+    # Print the final ensemble constructed by auto-sklearn
+    # ====================================================
 
-print(automl.show_models())
+    print(automl.show_models())
 
-###########################################################################
-# Get the Score of the final ensemble
-# ===================================
+    ###########################################################################
+    # Get the Score of the final ensemble
+    # ===================================
 
-predictions = automl.predict(X_test)
-print("R2 score:", sklearn.metrics.r2_score(y_test, predictions))
+    predictions = automl.predict(X_test)
+    print("R2 score:", sklearn.metrics.r2_score(y_test, predictions))
@@ -0,0 +1,31 @@
+"""Custom metrics to be used by example_metrics.py
+
+They reside in a different file so they can be used by Auto-sklearn."""
+
+import numpy as np
+
+
+############################################################################
+# Custom metrics definition
+# =========================
+
+def accuracy(solution, prediction):
+    # custom function defining accuracy
+    return np.mean(solution == prediction)
+
+
+def error(solution, prediction):
+    # custom function defining error
+    return np.mean(solution != prediction)
+
+
+def accuracy_wk(solution, prediction, dummy):
+    # custom function defining accuracy and accepting an additional argument
+    assert dummy is None
+    return np.mean(solution == prediction)
+
+
+def error_wk(solution, prediction, dummy):
+    # custom function defining error and accepting an additional argument
+    assert dummy is None
+    return np.mean(solution != prediction)
@@ -21,40 +21,41 @@
 import autosklearn.classification
 
 
-############################################################################
-# Data Loading
-# ============
-# Load Australian dataset from https://www.openml.org/d/40981
-bunch = data = sklearn.datasets.fetch_openml(data_id=40981, as_frame=True)
-y = bunch['target'].to_numpy()
-X = bunch['data'].to_numpy(np.float)
-
-X_train, X_test, y_train, y_test = \
-     sklearn.model_selection.train_test_split(X, y, random_state=1)
-
-# Auto-sklearn can automatically recognize categorical/numerical data from a pandas
-# DataFrame. This example highlights how the user can provide the feature types,
-# when using numpy arrays, as there is no per-column dtype in this case.
-# feat_type is a list that tags each column from a DataFrame/ numpy array / list
-# with the case-insensitive string categorical or numerical, accordingly.
-feat_type = ['Categorical' if x.name == 'category' else 'Numerical' for x in bunch['data'].dtypes]
-
-############################################################################
-# Build and fit a classifier
-# ==========================
-
-cls = autosklearn.classification.AutoSklearnClassifier(
-    time_left_for_this_task=30,
-    # Bellow two flags are provided to speed up calculations
-    # Not recommended for a real implementation
-    initial_configurations_via_metalearning=0,
-    smac_scenario_args={'runcount_limit': 1},
-)
-cls.fit(X_train, y_train, X_test, y_test, feat_type=feat_type)
-
-###########################################################################
-# Get the Score of the final ensemble
-# ===================================
-
-predictions = cls.predict(X_test)
-print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))
+if __name__ == "__main__":
+    ############################################################################
+    # Data Loading
+    # ============
+    # Load Australian dataset from https://www.openml.org/d/40981
+    bunch = data = sklearn.datasets.fetch_openml(data_id=40981, as_frame=True)
+    y = bunch['target'].to_numpy()
+    X = bunch['data'].to_numpy(np.float)
+
+    X_train, X_test, y_train, y_test = \
+         sklearn.model_selection.train_test_split(X, y, random_state=1)
+
+    # Auto-sklearn can automatically recognize categorical/numerical data from a pandas
+    # DataFrame. This example highlights how the user can provide the feature types,
+    # when using numpy arrays, as there is no per-column dtype in this case.
+    # feat_type is a list that tags each column from a DataFrame/ numpy array / list
+    # with the case-insensitive string categorical or numerical, accordingly.
+    feat_type = ['Categorical' if x.name == 'category' else 'Numerical' for x in bunch['data'].dtypes]
+
+    ############################################################################
+    # Build and fit a classifier
+    # ==========================
+
+    cls = autosklearn.classification.AutoSklearnClassifier(
+        time_left_for_this_task=30,
+        # Bellow two flags are provided to speed up calculations
+        # Not recommended for a real implementation
+        initial_configurations_via_metalearning=0,
+        smac_scenario_args={'runcount_limit': 1},
+    )
+    cls.fit(X_train, y_train, X_test, y_test, feat_type=feat_type)
+
+    ###########################################################################
+    # Get the Score of the final ensemble
+    # ===================================
+
+    predictions = cls.predict(X_test)
+    print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))
Original file line number	Diff line number	Diff line change
`@@ -71,6 +71,7 @@`
`71`	`71`	`#},`
`72`	`72`	`'backreferences_dir': None,`
`73`	`73`	`'filename_pattern': 'example.*.py$',`
	`74`	`+ 'ignore_pattern': r'custom_metrics\.py\|__init__\.py'`
`74`	`75`	`}`
`75`	`76`
`76`	`77`	`# Add any paths that contain templates here, relative to this directory.`