Skip to content

Commit bdf6c53

Browse files
Copilotthinkall
andcommitted
Add example script demonstrating preprocess() API usage
Co-authored-by: thinkall <3197038+thinkall@users.noreply.github.com>
1 parent 382350d commit bdf6c53

File tree

1 file changed

+96
-0
lines changed

1 file changed

+96
-0
lines changed

notebook/preprocess_api_example.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
"""
2+
Example demonstrating the use of FLAML's preprocess() API.
3+
4+
This script shows how to use both task-level and estimator-level preprocessing
5+
APIs exposed by FLAML AutoML.
6+
"""
7+
8+
from flaml import AutoML
9+
from sklearn.datasets import load_breast_cancer
10+
from sklearn.model_selection import train_test_split
11+
import numpy as np
12+
13+
# Load and split data
14+
print("Loading breast cancer dataset...")
15+
X, y = load_breast_cancer(return_X_y=True)
16+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
17+
18+
print(f"Training data shape: {X_train.shape}")
19+
print(f"Test data shape: {X_test.shape}")
20+
21+
# Train AutoML model
22+
print("\nTraining AutoML model...")
23+
automl = AutoML()
24+
automl_settings = {
25+
"time_budget": 10, # 10 seconds
26+
"task": "classification",
27+
"metric": "accuracy",
28+
"estimator_list": ["lgbm", "xgboost"],
29+
"verbose": 0,
30+
}
31+
automl.fit(X_train, y_train, **automl_settings)
32+
33+
print(f"Best estimator: {automl.best_estimator}")
34+
print(f"Best accuracy: {1 - automl.best_loss:.4f}")
35+
36+
# Example 1: Using task-level preprocessing
37+
print("\n" + "=" * 60)
38+
print("Example 1: Task-level preprocessing")
39+
print("=" * 60)
40+
X_test_task = automl.preprocess(X_test)
41+
print(f"Original test data shape: {X_test.shape}")
42+
print(f"After task preprocessing: {X_test_task.shape}")
43+
44+
# Example 2: Using estimator-level preprocessing
45+
print("\n" + "=" * 60)
46+
print("Example 2: Estimator-level preprocessing")
47+
print("=" * 60)
48+
estimator = automl.model
49+
X_test_estimator = estimator.preprocess(X_test_task)
50+
print(f"After estimator preprocessing: {X_test_estimator.shape}")
51+
52+
# Example 3: Complete preprocessing pipeline
53+
print("\n" + "=" * 60)
54+
print("Example 3: Complete preprocessing pipeline")
55+
print("=" * 60)
56+
# Apply both levels of preprocessing
57+
X_preprocessed = automl.preprocess(X_test)
58+
X_final = automl.model.preprocess(X_preprocessed)
59+
60+
# Manual prediction using fully preprocessed data
61+
y_pred_manual = automl.model._model.predict(X_final)
62+
63+
# Compare with AutoML's predict method (which does preprocessing internally)
64+
y_pred_auto = automl.predict(X_test)
65+
66+
print(f"Predictions match: {np.array_equal(y_pred_manual, y_pred_auto)}")
67+
print(f"Manual prediction sample: {y_pred_manual[:5]}")
68+
print(f"Auto prediction sample: {y_pred_auto[:5]}")
69+
70+
# Example 4: Using preprocessing for custom inference
71+
print("\n" + "=" * 60)
72+
print("Example 4: Custom inference with preprocessing")
73+
print("=" * 60)
74+
# You might want to apply preprocessing separately for:
75+
# - Debugging
76+
# - Custom inference pipelines
77+
# - Integration with other tools
78+
79+
# Get preprocessed features
80+
X_features = automl.preprocess(X_test)
81+
X_features = automl.model.preprocess(X_features)
82+
83+
# Now you can use these features with the underlying model or for analysis
84+
print(f"Preprocessed features ready for custom use: {X_features.shape}")
85+
print(f"Feature statistics - Mean: {np.mean(X_features):.4f}, Std: {np.std(X_features):.4f}")
86+
87+
print("\n" + "=" * 60)
88+
print("Summary")
89+
print("=" * 60)
90+
print("The preprocess() API allows you to:")
91+
print("1. Apply task-level preprocessing with automl.preprocess()")
92+
print("2. Apply estimator-level preprocessing with estimator.preprocess()")
93+
print("3. Chain both for complete preprocessing pipeline")
94+
print("4. Use preprocessed data for custom inference or analysis")
95+
print("\nNote: Task-level preprocessing should always be applied before")
96+
print(" estimator-level preprocessing.")

0 commit comments

Comments
 (0)