Skip to content

Commit aa17295

Browse files
Removed first batch from default algorithm (#4215)
* Initial commit * Fixed failing tests * Updated test case * Updated release notes * Reduced processing time * Test updates
1 parent 2481ed1 commit aa17295

File tree

10 files changed

+61
-65
lines changed

10 files changed

+61
-65
lines changed

docs/source/demos/lead_scoring.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@
171171
" objective=lead_scoring_objective,\n",
172172
" additional_objectives=[\"auc\"],\n",
173173
" allowed_model_families=[\"extra_trees\", \"linear_model\"],\n",
174-
" max_batches=3,\n",
174+
" max_batches=2,\n",
175175
" verbose=True,\n",
176176
")\n",
177177
"\n",
@@ -273,7 +273,7 @@
273273
" objective=\"auc\",\n",
274274
" additional_objectives=[lead_scoring_objective],\n",
275275
" allowed_model_families=[\"extra_trees\", \"linear_model\"],\n",
276-
" max_batches=3,\n",
276+
" max_batches=2,\n",
277277
" verbose=True,\n",
278278
")\n",
279279
"\n",

docs/source/release_notes.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ Release Notes
2828

2929
**Breaking Changes**
3030
* Removed Decision Tree and CatBoost Estimators from AutoML search :pr:`4205`
31+
* Removed first batch from default algorithm :pr:`4215`
3132

3233

3334
**v0.77.0 Jun. 07, 2023**

docs/source/start.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@
127127
" y_train=y_train,\n",
128128
" problem_type=\"binary\",\n",
129129
" objective=\"f1\",\n",
130-
" max_batches=3,\n",
130+
" max_batches=2,\n",
131131
" verbose=True,\n",
132132
")"
133133
]
@@ -170,7 +170,7 @@
170170
" y_train=y_train,\n",
171171
" problem_type=\"binary\",\n",
172172
" objective=\"f1\",\n",
173-
" max_batches=3,\n",
173+
" max_batches=2,\n",
174174
" verbose=False,\n",
175175
")\n",
176176
"automl.search()"

evalml/automl/automl_algorithm/default_algorithm.py

Lines changed: 15 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -169,11 +169,9 @@ def __init__(
169169
def default_max_batches(self):
170170
"""Returns the number of max batches AutoMLSearch should run by default."""
171171
if self.ensembling:
172-
return 4
173-
elif is_time_series(self.problem_type):
174-
return 2 # we do not run feature selection for time series
175-
else:
176172
return 3
173+
else:
174+
return 2
177175

178176
def num_pipelines_per_batch(self, batch_number):
179177
"""Return the number of pipelines in the nth batch.
@@ -184,17 +182,17 @@ def num_pipelines_per_batch(self, batch_number):
184182
Returns:
185183
int: number of pipelines in the given batch.
186184
"""
187-
if batch_number == 0 or batch_number == 1:
185+
if batch_number == 0:
188186
return len(self._naive_estimators())
189-
elif batch_number == 2:
187+
elif batch_number == 1:
190188
return len(self._non_naive_estimators())
191189
if self.ensembling:
192-
if batch_number % 2 != 0:
190+
if batch_number % 2 == 0:
193191
return 1
194-
elif batch_number == 4:
192+
elif batch_number == 3:
195193
return self.num_long_explore_pipelines * self.top_n
196194
else:
197-
if batch_number == 3:
195+
if batch_number == 2:
198196
return self.num_long_explore_pipelines * self.top_n
199197
return self.num_long_pipelines_per_batch * self.top_n
200198

@@ -452,20 +450,18 @@ def next_batch(self):
452450
"""
453451
if self.ensembling:
454452
if self._batch_number == 0:
455-
next_batch = self._create_naive_pipelines()
456-
elif self._batch_number == 1:
457453
next_batch = self._create_naive_pipelines(
458454
use_features=self.run_feature_selection,
459455
)
460-
elif self._batch_number == 2:
456+
elif self._batch_number == 1:
461457
next_batch = self._create_fast_final()
462-
elif self.batch_number == 3:
458+
elif self.batch_number == 2:
463459
next_batch = self._create_ensemble(
464460
self._pipeline_parameters.get("Label Encoder", {}),
465461
)
466-
elif self.batch_number == 4:
462+
elif self.batch_number == 3:
467463
next_batch = self._create_long_exploration(n=self.top_n)
468-
elif self.batch_number % 2 != 0:
464+
elif self.batch_number % 2 == 0:
469465
next_batch = self._create_ensemble(
470466
self._pipeline_parameters.get("Label Encoder", {}),
471467
)
@@ -489,14 +485,12 @@ def next_batch(self):
489485
)
490486
else:
491487
if self._batch_number == 0:
492-
next_batch = self._create_naive_pipelines()
493-
elif self._batch_number == 1:
494488
next_batch = self._create_naive_pipelines(
495489
use_features=self.run_feature_selection,
496490
)
497-
elif self._batch_number == 2:
491+
elif self._batch_number == 1:
498492
next_batch = self._create_fast_final()
499-
elif self.batch_number == 3:
493+
elif self.batch_number == 2:
500494
next_batch = self._create_long_exploration(n=self.top_n)
501495
else:
502496
next_batch = self._create_n_pipelines(
@@ -583,15 +577,15 @@ def add_result(
583577
"""
584578
cached_data = cached_data or {}
585579
if pipeline.model_family != ModelFamily.ENSEMBLE:
586-
if self.batch_number >= 3:
580+
if self.batch_number >= 2:
587581
super().add_result(
588582
score_to_minimize,
589583
pipeline,
590584
trained_pipeline_results,
591585
)
592586

593587
if (
594-
self.batch_number == 2
588+
self.batch_number == 1
595589
and self._selected_cols is None
596590
and not is_time_series(self.problem_type)
597591
):

evalml/automl/automl_search.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ def search(
180180

181181
max_batches = None
182182
if mode == "fast":
183-
max_batches = 4 # corresponds to end of 'fast' mode
183+
max_batches = 3 # corresponds to end of 'fast' mode
184184
elif mode == "long" and max_time:
185185
max_batches = 999 # defers to stopping criterion
186186
elif mode == "long" and max_time is None:

evalml/tests/automl_tests/test_automl.py

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -266,8 +266,7 @@ def test_search_batch_times(caplog, X_y_binary, AutoMLTestEnv):
266266

267267
assert len(batch_times) == 3
268268
assert len(batch_times[1]) == 2
269-
assert len(batch_times[2]) == 2
270-
assert len(batch_times[3]) == 6
269+
assert len(batch_times[2]) == 6
271270

272271
assert "Batch Time Stats" in out
273272
assert "Batch 1 time stats" in out
@@ -363,11 +362,11 @@ def test_pipeline_limits(
363362
automl.search()
364363
out = caplog.text
365364
if verbose:
366-
assert "Using default limit of max_batches=3." in out
367-
assert "Searching up to 3 batches for a total of" in out
365+
assert "Using default limit of max_batches=2." in out
366+
assert "Searching up to 2 batches for a total of" in out
368367
else:
369-
assert "Using default limit of max_batches=3." not in out
370-
assert "Searching up to 3 batches for a total of" not in out
368+
assert "Using default limit of max_batches=2." not in out
369+
assert "Searching up to 2 batches for a total of" not in out
371370
assert len(automl.results["pipeline_results"]) > 0
372371

373372
caplog.clear()
@@ -1806,6 +1805,7 @@ def test_pipelines_in_batch_return_nan(
18061805
y_train=y,
18071806
problem_type="binary",
18081807
max_batches=3,
1808+
automl_algorithm="iterative",
18091809
allowed_component_graphs={"Name": [dummy_classifier_estimator_class]},
18101810
n_jobs=1,
18111811
)
@@ -1819,7 +1819,10 @@ def test_pipelines_in_batch_return_nan(
18191819
automl.search()
18201820
assert len(automl.errors) > 0
18211821
for pipeline_name, pipeline_error in automl.errors.items():
1822-
assert "Label Encoder" in pipeline_error["Parameters"]
1822+
assert (
1823+
"Label Encoder" in pipeline_error["Parameters"]
1824+
or "Mock Classifier" in pipeline_error["Parameters"]
1825+
)
18231826
assert isinstance(pipeline_error["Exception"], TypeError)
18241827
assert "line" in pipeline_error["Traceback"]
18251828

@@ -1858,7 +1861,8 @@ def test_pipelines_in_batch_return_none(
18581861
X_train=X,
18591862
y_train=y,
18601863
problem_type="binary",
1861-
max_batches=3,
1864+
max_batches=2,
1865+
automl_algorithm="iterative",
18621866
allowed_component_graphs={"Name": [dummy_classifier_estimator_class]},
18631867
n_jobs=1,
18641868
)
@@ -2295,7 +2299,7 @@ def test_time_series_regression_with_parameters(ts_data):
22952299
allowed_component_graphs={"Name_0": ["Imputer", "Linear Regressor"]},
22962300
objective="auto",
22972301
problem_configuration=problem_configuration,
2298-
max_batches=3,
2302+
max_batches=2,
22992303
)
23002304
assert (
23012305
automl.automl_algorithm.search_parameters["pipeline"] == problem_configuration
@@ -2340,7 +2344,7 @@ def test_automl_accepts_component_graphs(graph_type, X_y_binary):
23402344
problem_type="binary",
23412345
allowed_component_graphs={"Dummy_Name": component_graph},
23422346
objective="auto",
2343-
max_batches=3,
2347+
max_batches=2,
23442348
)
23452349
for pipeline_ in automl.allowed_pipelines:
23462350
assert isinstance(pipeline_, BinaryClassificationPipeline)
@@ -4154,7 +4158,7 @@ def test_automl_drop_unknown_columns(columns, AutoMLTestEnv, X_y_binary, caplog)
41544158
y_train=y,
41554159
problem_type="binary",
41564160
optimize_thresholds=False,
4157-
max_batches=3,
4161+
max_batches=2,
41584162
verbose=True,
41594163
)
41604164
env = AutoMLTestEnv("binary")
@@ -4296,7 +4300,7 @@ def dummy_mock_get_preprocessing_components(*args, **kwargs):
42964300
max_batches=1,
42974301
verbose=verbose,
42984302
)
4299-
env = AutoMLTestEnv("binary")
4303+
env = AutoMLTestEnv("regression")
43004304
with env.test_context(score_return_value={automl.objective.name: 1.0}):
43014305
automl.search()
43024306

@@ -4466,7 +4470,7 @@ def test_automl_ensembler_allowed_component_graphs(
44664470
problem_type="regression",
44674471
allowed_component_graphs=component_graphs,
44684472
ensembling=True,
4469-
max_batches=4,
4473+
max_batches=3,
44704474
verbose=True,
44714475
)
44724476
automl.search()
@@ -4569,7 +4573,7 @@ def test_automl_passes_known_in_advance_pipeline_parameters_to_all_pipelines(
45694573
X_train=X,
45704574
y_train=y,
45714575
problem_type=problem_type,
4572-
max_batches=3,
4576+
max_batches=2,
45734577
problem_configuration={
45744578
"time_index": "date",
45754579
"max_delay": 3,
@@ -4620,7 +4624,7 @@ def test_cv_ranking_scores(
46204624
X_train=X,
46214625
y_train=y,
46224626
problem_type="binary",
4623-
max_batches=3,
4627+
max_batches=2,
46244628
data_splitter=data_splitter,
46254629
allowed_component_graphs={"Name": [dummy_classifier_estimator_class]},
46264630
n_jobs=1,
@@ -4724,7 +4728,7 @@ def test_search_parameters_held_automl(
47244728
],
47254729
},
47264730
}
4727-
batches = 2 if algorithm == "default" else batches
4731+
batches = 2 if algorithm == "default" else batches
47284732

47294733
search_parameters = {
47304734
"Imputer": {"numeric_impute_strategy": parameter},
@@ -4814,7 +4818,7 @@ def test_automl_accepts_features(
48144818
y_train=y,
48154819
problem_type="binary",
48164820
optimize_thresholds=False,
4817-
max_batches=3,
4821+
max_batches=2,
48184822
features=features,
48194823
automl_algorithm=automl_algorithm,
48204824
)
@@ -4858,7 +4862,7 @@ def test_automl_with_empty_features_list(
48584862
y_train=y,
48594863
problem_type="binary",
48604864
optimize_thresholds=False,
4861-
max_batches=3,
4865+
max_batches=2,
48624866
features=[],
48634867
automl_algorithm=automl_algorithm,
48644868
)
@@ -5071,7 +5075,7 @@ def test_default_algorithm_uses_n_jobs(X_y_binary, AutoMLTestEnv):
50715075
X_train=X,
50725076
y_train=y,
50735077
problem_type="binary",
5074-
max_batches=3,
5078+
max_batches=2,
50755079
automl_algorithm="default",
50765080
n_jobs=2,
50775081
)
@@ -5521,7 +5525,7 @@ def test_holdout_set_results_and_rankings(caplog, AutoMLTestEnv):
55215525
X_train=X,
55225526
y_train=y,
55235527
problem_type="binary",
5524-
max_batches=3,
5528+
max_batches=2,
55255529
automl_algorithm="default",
55265530
verbose=True,
55275531
holdout_set_size=0.1,

evalml/tests/automl_tests/test_automl_search_classification.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1022,7 +1022,7 @@ def test_automl_search_dictionary_undersampler(
10221022
optimize_thresholds=False,
10231023
sampler_method="Undersampler",
10241024
search_parameters=search_parameters,
1025-
max_batches=3,
1025+
max_batches=2,
10261026
)
10271027
# check that the sampling dict got set properly
10281028
automl.search()
@@ -1077,7 +1077,7 @@ def test_automl_search_dictionary_oversampler(
10771077
sampler_method="Oversampler",
10781078
optimize_thresholds=False,
10791079
search_parameters=search_parameters,
1080-
max_batches=3,
1080+
max_batches=2,
10811081
)
10821082
# check that the sampling dict got set properly
10831083
pipelines = automl.allowed_pipelines
@@ -1122,7 +1122,7 @@ def test_automl_search_sampler_dictionary_keys(
11221122
sampler_method=sampler,
11231123
optimize_thresholds=False,
11241124
search_parameters=search_parameters,
1125-
max_batches=3,
1125+
max_batches=2,
11261126
)
11271127
if errors:
11281128
with pytest.raises(
@@ -1254,7 +1254,7 @@ def test_automl_passes_allow_long_running_models(
12541254
objective="Log Loss Multiclass",
12551255
allow_long_running_models=allow_long_running_models,
12561256
automl_algorithm=algo,
1257-
max_batches=3,
1257+
max_batches=2,
12581258
verbose=True,
12591259
)
12601260
assert (
@@ -1280,7 +1280,7 @@ def test_automl_threshold_score(fraud_100):
12801280
X_train,
12811281
y_train,
12821282
problem_type="binary",
1283-
max_batches=4,
1283+
max_batches=2,
12841284
ensembling=True,
12851285
verbose=False,
12861286
automl_algorithm="default",

0 commit comments

Comments
 (0)