Skip to content

Commit 3c4264b

Browse files
committed
CrossValidation & co.: ensure exec'd code was run as __main__
1 parent 22d7caa commit 3c4264b

File tree

1 file changed

+44
-14
lines changed

1 file changed

+44
-14
lines changed

Orange/evaluation/testing.py

Lines changed: 44 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ def __init__(self, data=None, nmethods=0, *, learners=None, train_data=None,
8585
store_data=False, store_models=False,
8686
domain=None, actual=None, row_indices=None,
8787
predicted=None, probabilities=None,
88-
preprocessor=None, callback=None):
88+
preprocessor=None, callback=None, n_jobs=-1):
8989
"""
9090
Construct an instance with default values: `None` for :obj:`data` and
9191
:obj:`models`.
@@ -120,10 +120,15 @@ def __init__(self, data=None, nmethods=0, *, learners=None, train_data=None,
120120
:param callback: Function for reporting back the progress as a value
121121
between 0 and 1
122122
:type callback: callable
123+
:param n_jobs: The number of processes to parallelize the evaluation
124+
on. -1 to parallelize on all but one CPUs (the default). 1 for no
125+
parallelization.
126+
:type n_jobs: int
123127
"""
124128
self.store_data = store_data
125129
self.store_models = store_models
126130
self.dtype = np.float32
131+
self.n_jobs = max(1, joblib.cpu_count() - 1 if n_jobs < 0 else n_jobs)
127132

128133
self.models = None
129134
self.folds = None
@@ -313,7 +318,7 @@ def fit(self, train_data, test_data=None):
313318
self._prepare_arrays(test_data)
314319

315320
n_callbacks = len(self.learners) * len(self.indices)
316-
n_jobs = max(1, min(joblib.cpu_count(), n_callbacks) - 1)
321+
n_jobs = max(1, min(self.n_jobs, n_callbacks))
317322

318323
def _is_picklable(obj):
319324
try:
@@ -329,9 +334,33 @@ def _is_picklable(obj):
329334
# Workaround for NumPy locking on Macintosh.
330335
# https://pythonhosted.org/joblib/parallel.html#bad-interaction-of-multiprocessing-and-third-party-libraries
331336
mp_ctx = mp.get_context(
332-
'forkserver' if sys.platform == 'darwin' else None)
337+
'forkserver' if sys.platform == 'darwin' and n_jobs > 1 else None)
333338

334-
mp_queue = mp_ctx.Manager().Queue()
339+
try:
340+
# Use context-adapted Queue or just the regular Queue if no
341+
# multiprocessing (otherwise it shits itself at least on Windos)
342+
mp_queue = mp_ctx.Manager().Queue() if n_jobs > 1 else mp.Queue()
343+
except (EOFError, RuntimeError):
344+
raise RuntimeError('''
345+
346+
Can't run multiprocessing code without a __main__ guard.
347+
348+
Multiprocessing strategies 'forkserver' (used by Orange's evaluation
349+
methods by default on Mac OS X) and 'spawn' (default on Windos)
350+
require the main code entry point be guarded with:
351+
352+
if __name__ == '__main__':
353+
import multiprocessing as mp
354+
mp.freeze_support() # Needed only on Windos
355+
... # Rest of your code
356+
... # See: https://docs.python.org/3/library/__main__.html
357+
358+
Otherwise, as the module is re-imported in another process, infinite
359+
recursion ensues.
360+
361+
Guard your executed code with above Python idiom, or pass n_jobs=1
362+
to evaluation methods, i.e. {}(..., n_jobs=1).
363+
'''.format(self.__class__.__name__)) from None
335364

336365
data_splits = (
337366
(fold_i, self.preprocessor(train_data[train_i]), test_data[test_i])
@@ -452,7 +481,8 @@ class CrossValidation(Results):
452481
453482
"""
454483
def __init__(self, data, learners, k=10, stratified=True, random_state=0, store_data=False,
455-
store_models=False, preprocessor=None, callback=None, warnings=None):
484+
store_models=False, preprocessor=None, callback=None, warnings=None,
485+
n_jobs=-1):
456486
self.k = k
457487
self.stratified = stratified
458488
self.random_state = random_state
@@ -463,7 +493,7 @@ def __init__(self, data, learners, k=10, stratified=True, random_state=0, store_
463493

464494
super().__init__(data, learners=learners, store_data=store_data,
465495
store_models=store_models, preprocessor=preprocessor,
466-
callback=callback)
496+
callback=callback, n_jobs=n_jobs)
467497

468498
def setup_indices(self, train_data, test_data):
469499
self.indices = None
@@ -485,10 +515,10 @@ class LeaveOneOut(Results):
485515
score_by_folds = False
486516

487517
def __init__(self, data, learners, store_data=False, store_models=False,
488-
preprocessor=None, callback=None):
518+
preprocessor=None, callback=None, n_jobs=-1):
489519
super().__init__(data, learners=learners, store_data=store_data,
490520
store_models=store_models, preprocessor=preprocessor,
491-
callback=callback)
521+
callback=callback, n_jobs=n_jobs)
492522

493523
def setup_indices(self, train_data, test_data):
494524
self.indices = skl_cross_validation.LeaveOneOut(len(test_data))
@@ -503,7 +533,7 @@ def prepare_arrays(self, test_data):
503533
class ShuffleSplit(Results):
504534
def __init__(self, data, learners, n_resamples=10, train_size=None,
505535
test_size=0.1, stratified=True, random_state=0, store_data=False,
506-
store_models=False, preprocessor=None, callback=None):
536+
store_models=False, preprocessor=None, callback=None, n_jobs=-1):
507537
self.n_resamples = n_resamples
508538
self.train_size = train_size
509539
self.test_size = test_size
@@ -512,7 +542,7 @@ def __init__(self, data, learners, n_resamples=10, train_size=None,
512542

513543
super().__init__(data, learners=learners, store_data=store_data,
514544
store_models=store_models, preprocessor=preprocessor,
515-
callback=callback)
545+
callback=callback, n_jobs=n_jobs)
516546

517547
def setup_indices(self, train_data, test_data):
518548
if self.stratified and test_data.domain.has_discrete_class:
@@ -532,11 +562,11 @@ class TestOnTestData(Results):
532562
Test on a separate test data set.
533563
"""
534564
def __init__(self, train_data, test_data, learners, store_data=False,
535-
store_models=False, preprocessor=None, callback=None):
565+
store_models=False, preprocessor=None, callback=None, n_jobs=-1):
536566
super().__init__(test_data, train_data=train_data, learners=learners,
537567
store_data=store_data,
538568
store_models=store_models, preprocessor=preprocessor,
539-
callback=callback)
569+
callback=callback, n_jobs=n_jobs)
540570

541571
def setup_indices(self, train_data, test_data):
542572
self.indices = ((Ellipsis, Ellipsis),)
@@ -553,14 +583,14 @@ class TestOnTrainingData(TestOnTestData):
553583
"""
554584

555585
def __init__(self, data, learners, store_data=False, store_models=False,
556-
preprocessor=None, callback=None):
586+
preprocessor=None, callback=None, n_jobs=-1):
557587

558588
if preprocessor is not None:
559589
data = preprocessor(data)
560590

561591
super().__init__(train_data=data, test_data=data, learners=learners,
562592
store_data=store_data, store_models=store_models,
563-
preprocessor=None, callback=callback)
593+
preprocessor=None, callback=callback, n_jobs=n_jobs)
564594
self.preprocessor = preprocessor
565595

566596

0 commit comments

Comments
 (0)