|
| 1 | +""" |
| 2 | +Example demonstrating the use of FLAML's preprocess() API. |
| 3 | +
|
| 4 | +This script shows how to use both task-level and estimator-level preprocessing |
| 5 | +APIs exposed by FLAML AutoML. |
| 6 | +""" |
| 7 | + |
| 8 | +from flaml import AutoML |
| 9 | +from sklearn.datasets import load_breast_cancer |
| 10 | +from sklearn.model_selection import train_test_split |
| 11 | +import numpy as np |
| 12 | + |
| 13 | +# Load and split data |
| 14 | +print("Loading breast cancer dataset...") |
| 15 | +X, y = load_breast_cancer(return_X_y=True) |
| 16 | +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) |
| 17 | + |
| 18 | +print(f"Training data shape: {X_train.shape}") |
| 19 | +print(f"Test data shape: {X_test.shape}") |
| 20 | + |
| 21 | +# Train AutoML model |
| 22 | +print("\nTraining AutoML model...") |
| 23 | +automl = AutoML() |
| 24 | +automl_settings = { |
| 25 | + "time_budget": 10, # 10 seconds |
| 26 | + "task": "classification", |
| 27 | + "metric": "accuracy", |
| 28 | + "estimator_list": ["lgbm", "xgboost"], |
| 29 | + "verbose": 0, |
| 30 | +} |
| 31 | +automl.fit(X_train, y_train, **automl_settings) |
| 32 | + |
| 33 | +print(f"Best estimator: {automl.best_estimator}") |
| 34 | +print(f"Best accuracy: {1 - automl.best_loss:.4f}") |
| 35 | + |
| 36 | +# Example 1: Using task-level preprocessing |
| 37 | +print("\n" + "=" * 60) |
| 38 | +print("Example 1: Task-level preprocessing") |
| 39 | +print("=" * 60) |
| 40 | +X_test_task = automl.preprocess(X_test) |
| 41 | +print(f"Original test data shape: {X_test.shape}") |
| 42 | +print(f"After task preprocessing: {X_test_task.shape}") |
| 43 | + |
| 44 | +# Example 2: Using estimator-level preprocessing |
| 45 | +print("\n" + "=" * 60) |
| 46 | +print("Example 2: Estimator-level preprocessing") |
| 47 | +print("=" * 60) |
| 48 | +estimator = automl.model |
| 49 | +X_test_estimator = estimator.preprocess(X_test_task) |
| 50 | +print(f"After estimator preprocessing: {X_test_estimator.shape}") |
| 51 | + |
| 52 | +# Example 3: Complete preprocessing pipeline |
| 53 | +print("\n" + "=" * 60) |
| 54 | +print("Example 3: Complete preprocessing pipeline") |
| 55 | +print("=" * 60) |
| 56 | +# Apply both levels of preprocessing |
| 57 | +X_preprocessed = automl.preprocess(X_test) |
| 58 | +X_final = automl.model.preprocess(X_preprocessed) |
| 59 | + |
| 60 | +# Manual prediction using fully preprocessed data |
| 61 | +y_pred_manual = automl.model._model.predict(X_final) |
| 62 | + |
| 63 | +# Compare with AutoML's predict method (which does preprocessing internally) |
| 64 | +y_pred_auto = automl.predict(X_test) |
| 65 | + |
| 66 | +print(f"Predictions match: {np.array_equal(y_pred_manual, y_pred_auto)}") |
| 67 | +print(f"Manual prediction sample: {y_pred_manual[:5]}") |
| 68 | +print(f"Auto prediction sample: {y_pred_auto[:5]}") |
| 69 | + |
| 70 | +# Example 4: Using preprocessing for custom inference |
| 71 | +print("\n" + "=" * 60) |
| 72 | +print("Example 4: Custom inference with preprocessing") |
| 73 | +print("=" * 60) |
| 74 | +# You might want to apply preprocessing separately for: |
| 75 | +# - Debugging |
| 76 | +# - Custom inference pipelines |
| 77 | +# - Integration with other tools |
| 78 | + |
| 79 | +# Get preprocessed features |
| 80 | +X_features = automl.preprocess(X_test) |
| 81 | +X_features = automl.model.preprocess(X_features) |
| 82 | + |
| 83 | +# Now you can use these features with the underlying model or for analysis |
| 84 | +print(f"Preprocessed features ready for custom use: {X_features.shape}") |
| 85 | +print(f"Feature statistics - Mean: {np.mean(X_features):.4f}, Std: {np.std(X_features):.4f}") |
| 86 | + |
| 87 | +print("\n" + "=" * 60) |
| 88 | +print("Summary") |
| 89 | +print("=" * 60) |
| 90 | +print("The preprocess() API allows you to:") |
| 91 | +print("1. Apply task-level preprocessing with automl.preprocess()") |
| 92 | +print("2. Apply estimator-level preprocessing with estimator.preprocess()") |
| 93 | +print("3. Chain both for complete preprocessing pipeline") |
| 94 | +print("4. Use preprocessed data for custom inference or analysis") |
| 95 | +print("\nNote: Task-level preprocessing should always be applied before") |
| 96 | +print(" estimator-level preprocessing.") |
0 commit comments