Skip to content

Commit f94672e

Browse files
authored
Measuring runtimes (#1031)
* [skip ci] addressing #248 * Unit test to test existence of refit time * Refactoring unit test * Fixing unit test failures * Unit test fixing + removing redundant parameter * Debugging stochastic failure of test_joblib_backends unit test * Unit test fix with decorators * Flaky for failing unit test * Adding flaky reruns for unit tests * Fixing setup big * pytest rerun debug * Fixing coverage failure * Debugging coverage failure * Debugging coverage failure * Adding __init__ files in test/ for pytest-cov * Debugging coverage failure * Debugging lean unit test * Debugging loky failure in unit tests * Clean up of debugging stuff
1 parent 4aec00a commit f94672e

File tree

10 files changed

+49
-46
lines changed

10 files changed

+49
-46
lines changed

.github/workflows/ubuntu-test.yml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ jobs:
2929

3030
steps:
3131
- uses: actions/checkout@v2
32+
with:
33+
fetch-depth: 2
3234
- name: Setup Python ${{ matrix.python-version }}
3335
uses: actions/setup-python@v2
3436
with:
@@ -51,7 +53,7 @@ jobs:
5153
- name: Run tests
5254
run: |
5355
if [ ${{ matrix.code-cov }} ]; then codecov='--cov=openml --long --cov-report=xml'; fi
54-
pytest -n 4 --durations=20 --timeout=600 --timeout-method=thread --dist load -sv $codecov
56+
pytest -n 4 --durations=20 --timeout=600 --timeout-method=thread --dist load -sv $codecov --reruns 5 --reruns-delay 1
5557
- name: Check for files left behind by test
5658
if: ${{ always() }}
5759
run: |
@@ -67,5 +69,6 @@ jobs:
6769
if: matrix.code-cov && always()
6870
uses: codecov/codecov-action@v1
6971
with:
72+
files: coverage.xml
7073
fail_ci_if_error: true
71-
verbose: true
74+
verbose: true

openml/config.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -211,15 +211,6 @@ def _setup(config=None):
211211
else:
212212
cache_exists = True
213213

214-
if cache_exists:
215-
_create_log_handlers()
216-
else:
217-
_create_log_handlers(create_file_handler=False)
218-
openml_logger.warning(
219-
"No permission to create OpenML directory at %s! This can result in OpenML-Python "
220-
"not working properly." % config_dir
221-
)
222-
223214
if config is None:
224215
config = _parse_config(config_file)
225216

@@ -240,6 +231,15 @@ def _get(config, key):
240231
connection_n_retries = int(_get(config, "connection_n_retries"))
241232
max_retries = int(_get(config, "max_retries"))
242233

234+
if cache_exists:
235+
_create_log_handlers()
236+
else:
237+
_create_log_handlers(create_file_handler=False)
238+
openml_logger.warning(
239+
"No permission to create OpenML directory at %s! This can result in OpenML-Python "
240+
"not working properly." % config_dir
241+
)
242+
243243
cache_directory = os.path.expanduser(short_cache_dir)
244244
# create the cache subdirectory
245245
if not os.path.exists(cache_directory):

openml/extensions/sklearn/extension.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1744,6 +1744,8 @@ def _prediction_to_probabilities(
17441744
user_defined_measures["usercpu_time_millis_training"] = modelfit_dur_cputime
17451745

17461746
modelfit_dur_walltime = (time.time() - modelfit_start_walltime) * 1000
1747+
if hasattr(model_copy, "refit_time_"):
1748+
modelfit_dur_walltime += model_copy.refit_time_
17471749
if can_measure_wallclocktime:
17481750
user_defined_measures["wall_clock_time_millis_training"] = modelfit_dur_walltime
17491751

openml/runs/functions.py

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,6 @@ def run_flow_on_task(
271271

272272
# execute the run
273273
res = _run_task_get_arffcontent(
274-
flow=flow,
275274
model=flow.model,
276275
task=task,
277276
extension=flow.extension,
@@ -432,7 +431,6 @@ def run_exists(task_id: int, setup_id: int) -> Set[int]:
432431

433432

434433
def _run_task_get_arffcontent(
435-
flow: OpenMLFlow,
436434
model: Any,
437435
task: OpenMLTask,
438436
extension: "Extension",
@@ -476,7 +474,6 @@ def _run_task_get_arffcontent(
476474
job_rvals = Parallel(verbose=0, n_jobs=n_jobs)(
477475
delayed(_run_task_get_arffcontent_parallel_helper)(
478476
extension=extension,
479-
flow=flow,
480477
fold_no=fold_no,
481478
model=model,
482479
rep_no=rep_no,
@@ -613,7 +610,6 @@ def _calculate_local_measure(sklearn_fn, openml_name):
613610

614611
def _run_task_get_arffcontent_parallel_helper(
615612
extension: "Extension",
616-
flow: OpenMLFlow,
617613
fold_no: int,
618614
model: Any,
619615
rep_no: int,
@@ -661,12 +657,13 @@ def _run_task_get_arffcontent_parallel_helper(
661657
else:
662658
raise NotImplementedError(task.task_type)
663659
config.logger.info(
664-
"Going to execute flow '%s' on task %d for repeat %d fold %d sample %d.",
665-
flow.name,
666-
task.task_id,
667-
rep_no,
668-
fold_no,
669-
sample_no,
660+
"Going to run model {} on dataset {} for repeat {} fold {} sample {}".format(
661+
str(model),
662+
openml.datasets.get_dataset(task.dataset_id).name,
663+
rep_no,
664+
fold_no,
665+
sample_no,
666+
)
670667
)
671668
pred_y, proba_y, user_defined_measures_fold, trace, = extension._run_model_on_fold(
672669
model=model,

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
"flaky",
7070
"pre-commit",
7171
"pytest-cov",
72+
"pytest-rerunfailures",
7273
"mypy",
7374
],
7475
"examples": [

tests/test_evaluations/__init__.py

Whitespace-only changes.

tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1254,7 +1254,7 @@ def test_paralizable_check(self):
12541254
# using this param distribution should raise an exception
12551255
illegal_param_dist = {"base__n_jobs": [-1, 0, 1]}
12561256
# using this param distribution should not raise an exception
1257-
legal_param_dist = {"base__max_depth": [2, 3, 4]}
1257+
legal_param_dist = {"n_estimators": [2, 3, 4]}
12581258

12591259
legal_models = [
12601260
sklearn.ensemble.RandomForestClassifier(),
@@ -1282,12 +1282,19 @@ def test_paralizable_check(self):
12821282

12831283
can_measure_cputime_answers = [True, False, False, True, False, False, True, False, False]
12841284
can_measure_walltime_answers = [True, True, False, True, True, False, True, True, False]
1285+
if LooseVersion(sklearn.__version__) < "0.20":
1286+
has_refit_time = [False, False, False, False, False, False, False, False, False]
1287+
else:
1288+
has_refit_time = [False, False, False, False, False, False, True, True, False]
12851289

1286-
for model, allowed_cputime, allowed_walltime in zip(
1287-
legal_models, can_measure_cputime_answers, can_measure_walltime_answers
1290+
X, y = sklearn.datasets.load_iris(return_X_y=True)
1291+
for model, allowed_cputime, allowed_walltime, refit_time in zip(
1292+
legal_models, can_measure_cputime_answers, can_measure_walltime_answers, has_refit_time
12881293
):
12891294
self.assertEqual(self.extension._can_measure_cputime(model), allowed_cputime)
12901295
self.assertEqual(self.extension._can_measure_wallclocktime(model), allowed_walltime)
1296+
model.fit(X, y)
1297+
self.assertEqual(refit_time, hasattr(model, "refit_time_"))
12911298

12921299
for model in illegal_models:
12931300
with self.assertRaises(PyOpenMLError):

tests/test_runs/test_run_functions.py

Lines changed: 11 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import unittest.mock
1111

1212
import numpy as np
13+
import joblib
1314
from joblib import parallel_backend
1415

1516
import openml
@@ -1187,13 +1188,10 @@ def test__run_task_get_arffcontent(self):
11871188
num_folds = 10
11881189
num_repeats = 1
11891190

1190-
flow = unittest.mock.Mock()
1191-
flow.name = "dummy"
11921191
clf = make_pipeline(
11931192
OneHotEncoder(handle_unknown="ignore"), SGDClassifier(loss="log", random_state=1)
11941193
)
11951194
res = openml.runs.functions._run_task_get_arffcontent(
1196-
flow=flow,
11971195
extension=self.extension,
11981196
model=clf,
11991197
task=task,
@@ -1404,8 +1402,6 @@ def test_run_on_dataset_with_missing_labels_dataframe(self):
14041402
# Check that _run_task_get_arffcontent works when one of the class
14051403
# labels only declared in the arff file, but is not present in the
14061404
# actual data
1407-
flow = unittest.mock.Mock()
1408-
flow.name = "dummy"
14091405
task = openml.tasks.get_task(2) # anneal; crossvalidation
14101406

14111407
from sklearn.compose import ColumnTransformer
@@ -1420,7 +1416,6 @@ def test_run_on_dataset_with_missing_labels_dataframe(self):
14201416
) # build a sklearn classifier
14211417

14221418
data_content, _, _, _ = _run_task_get_arffcontent(
1423-
flow=flow,
14241419
model=model,
14251420
task=task,
14261421
extension=self.extension,
@@ -1442,8 +1437,6 @@ def test_run_on_dataset_with_missing_labels_array(self):
14421437
# Check that _run_task_get_arffcontent works when one of the class
14431438
# labels only declared in the arff file, but is not present in the
14441439
# actual data
1445-
flow = unittest.mock.Mock()
1446-
flow.name = "dummy"
14471440
task = openml.tasks.get_task(2) # anneal; crossvalidation
14481441
# task_id=2 on test server has 38 columns with 6 numeric columns
14491442
cont_idx = [3, 4, 8, 32, 33, 34]
@@ -1465,7 +1458,6 @@ def test_run_on_dataset_with_missing_labels_array(self):
14651458
) # build a sklearn classifier
14661459

14671460
data_content, _, _, _ = _run_task_get_arffcontent(
1468-
flow=flow,
14691461
model=model,
14701462
task=task,
14711463
extension=self.extension,
@@ -1581,20 +1573,18 @@ def test_format_prediction_task_regression(self):
15811573
LooseVersion(sklearn.__version__) < "0.21",
15821574
reason="couldn't perform local tests successfully w/o bloating RAM",
15831575
)
1584-
@unittest.mock.patch("openml.extensions.sklearn.SklearnExtension._run_model_on_fold")
1576+
@unittest.mock.patch("openml.extensions.sklearn.SklearnExtension._prevent_optimize_n_jobs")
15851577
def test__run_task_get_arffcontent_2(self, parallel_mock):
15861578
""" Tests if a run executed in parallel is collated correctly. """
15871579
task = openml.tasks.get_task(7) # Supervised Classification on kr-vs-kp
15881580
x, y = task.get_X_and_y(dataset_format="dataframe")
15891581
num_instances = x.shape[0]
15901582
line_length = 6 + len(task.class_labels)
1591-
flow = unittest.mock.Mock()
1592-
flow.name = "dummy"
15931583
clf = SGDClassifier(loss="log", random_state=1)
15941584
n_jobs = 2
1595-
with parallel_backend("loky", n_jobs=n_jobs):
1585+
backend = "loky" if LooseVersion(joblib.__version__) > "0.11" else "multiprocessing"
1586+
with parallel_backend(backend, n_jobs=n_jobs):
15961587
res = openml.runs.functions._run_task_get_arffcontent(
1597-
flow=flow,
15981588
extension=self.extension,
15991589
model=clf,
16001590
task=task,
@@ -1606,6 +1596,9 @@ def test__run_task_get_arffcontent_2(self, parallel_mock):
16061596
# function _run_model_on_fold is being mocked out. However, for a new spawned worker, it
16071597
# is not and the mock call_count should remain 0 while the subsequent check of actual
16081598
# results should also hold, only on successful distribution of tasks to workers.
1599+
# The _prevent_optimize_n_jobs() is a function executed within the _run_model_on_fold()
1600+
# block and mocking this function doesn't affect rest of the pipeline, but is adequately
1601+
# indicative if _run_model_on_fold() is being called or not.
16091602
self.assertEqual(parallel_mock.call_count, 0)
16101603
self.assertIsInstance(res[0], list)
16111604
self.assertEqual(len(res[0]), num_instances)
@@ -1638,13 +1631,12 @@ def test_joblib_backends(self, parallel_mock):
16381631
x, y = task.get_X_and_y(dataset_format="dataframe")
16391632
num_instances = x.shape[0]
16401633
line_length = 6 + len(task.class_labels)
1641-
flow = unittest.mock.Mock()
1642-
flow.name = "dummy"
16431634

1635+
backend_choice = "loky" if LooseVersion(joblib.__version__) > "0.11" else "multiprocessing"
16441636
for n_jobs, backend, len_time_stats, call_count in [
1645-
(1, "loky", 7, 10),
1646-
(2, "loky", 4, 10),
1647-
(-1, "loky", 1, 10),
1637+
(1, backend_choice, 7, 10),
1638+
(2, backend_choice, 4, 10),
1639+
(-1, backend_choice, 1, 10),
16481640
(1, "threading", 7, 20),
16491641
(-1, "threading", 1, 30),
16501642
(1, "sequential", 7, 40),
@@ -1668,7 +1660,6 @@ def test_joblib_backends(self, parallel_mock):
16681660
)
16691661
with parallel_backend(backend, n_jobs=n_jobs):
16701662
res = openml.runs.functions._run_task_get_arffcontent(
1671-
flow=flow,
16721663
extension=self.extension,
16731664
model=clf,
16741665
task=task,

tests/test_study/__init__.py

Whitespace-only changes.

tests/test_study/test_study_functions.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import openml.study
55
from openml.testing import TestBase
66
import pandas as pd
7+
import pytest
78

89

910
class TestStudyFunctions(TestBase):
@@ -113,6 +114,7 @@ def test_publish_benchmark_suite(self):
113114
self.assertEqual(study_downloaded.status, "deactivated")
114115
# can't delete study, now it's not longer in preparation
115116

117+
@pytest.mark.flaky()
116118
def test_publish_study(self):
117119
# get some random runs to attach
118120
run_list = openml.evaluations.list_evaluations("predictive_accuracy", size=10)
@@ -133,8 +135,8 @@ def test_publish_study(self):
133135
run_ids=list(run_list.keys()),
134136
)
135137
study.publish()
136-
# not tracking upload for delete since _delete_entity called end of function
137-
# asserting return status from openml.study.delete_study()
138+
TestBase._mark_entity_for_removal("study", study.id)
139+
TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], study.id))
138140
self.assertGreater(study.id, 0)
139141
study_downloaded = openml.study.get_study(study.id)
140142
self.assertEqual(study_downloaded.alias, fixt_alias)

0 commit comments

Comments
 (0)