Skip to content

Commit a4c0937

Browse files
authored
Alternative to 1115 (#1124)
* Alternative to 1115 * fix meta-data generation
1 parent c2b0e73 commit a4c0937

File tree

2 files changed

+86
-88
lines changed

2 files changed

+86
-88
lines changed

autosklearn/smbo.py

Lines changed: 84 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -69,61 +69,73 @@
6969
}
7070

7171

72+
def get_send_warnings_to_logger(logger):
73+
def _send_warnings_to_log(message, category, filename, lineno, file, line):
74+
logger.debug('%s:%s: %s:%s', filename, lineno, category.__name__, message)
75+
return _send_warnings_to_log
76+
77+
7278
# metalearning helpers
7379
def _calculate_metafeatures(data_feat_type, data_info_task, basename,
74-
x_train, y_train, watcher, logger):
75-
# == Calculate metafeatures
76-
task_name = 'CalculateMetafeatures'
77-
watcher.start_task(task_name)
78-
categorical = [True if feat_type.lower() in ['categorical'] else False
79-
for feat_type in data_feat_type]
80+
x_train, y_train, watcher, logger_):
81+
with warnings.catch_warnings():
82+
warnings.showwarning = get_send_warnings_to_logger(logger_)
83+
84+
# == Calculate metafeatures
85+
task_name = 'CalculateMetafeatures'
86+
watcher.start_task(task_name)
87+
categorical = [True if feat_type.lower() in ['categorical'] else False
88+
for feat_type in data_feat_type]
89+
90+
EXCLUDE_META_FEATURES = EXCLUDE_META_FEATURES_CLASSIFICATION \
91+
if data_info_task in CLASSIFICATION_TASKS else EXCLUDE_META_FEATURES_REGRESSION
92+
93+
if data_info_task in [MULTICLASS_CLASSIFICATION, BINARY_CLASSIFICATION,
94+
MULTILABEL_CLASSIFICATION, REGRESSION,
95+
MULTIOUTPUT_REGRESSION]:
96+
logger_.info('Start calculating metafeatures for %s', basename)
97+
result = calculate_all_metafeatures_with_labels(
98+
x_train, y_train, categorical=categorical,
99+
dataset_name=basename,
100+
dont_calculate=EXCLUDE_META_FEATURES, logger=logger_)
101+
for key in list(result.metafeature_values.keys()):
102+
if result.metafeature_values[key].type_ != 'METAFEATURE':
103+
del result.metafeature_values[key]
80104

81-
EXCLUDE_META_FEATURES = EXCLUDE_META_FEATURES_CLASSIFICATION \
82-
if data_info_task in CLASSIFICATION_TASKS else EXCLUDE_META_FEATURES_REGRESSION
105+
else:
106+
result = None
107+
logger_.info('Metafeatures not calculated')
108+
watcher.stop_task(task_name)
109+
logger_.info(
110+
'Calculating Metafeatures (categorical attributes) took %5.2f',
111+
watcher.wall_elapsed(task_name))
112+
return result
83113

84-
if data_info_task in [MULTICLASS_CLASSIFICATION, BINARY_CLASSIFICATION,
85-
MULTILABEL_CLASSIFICATION, REGRESSION,
86-
MULTIOUTPUT_REGRESSION]:
87-
logger.info('Start calculating metafeatures for %s', basename)
88-
result = calculate_all_metafeatures_with_labels(
89-
x_train, y_train, categorical=categorical,
90-
dataset_name=basename,
91-
dont_calculate=EXCLUDE_META_FEATURES, logger=logger)
92-
for key in list(result.metafeature_values.keys()):
93-
if result.metafeature_values[key].type_ != 'METAFEATURE':
94-
del result.metafeature_values[key]
95114

96-
else:
97-
result = None
98-
logger.info('Metafeatures not calculated')
99-
watcher.stop_task(task_name)
100-
logger.info(
101-
'Calculating Metafeatures (categorical attributes) took %5.2f',
102-
watcher.wall_elapsed(task_name))
103-
return result
115+
def _calculate_metafeatures_encoded(data_feat_type, basename, x_train, y_train, watcher,
116+
task, logger_):
117+
with warnings.catch_warnings():
118+
warnings.showwarning = get_send_warnings_to_logger(logger_)
104119

120+
EXCLUDE_META_FEATURES = EXCLUDE_META_FEATURES_CLASSIFICATION \
121+
if task in CLASSIFICATION_TASKS else EXCLUDE_META_FEATURES_REGRESSION
105122

106-
def _calculate_metafeatures_encoded(data_feat_type, basename, x_train, y_train, watcher,
107-
task, logger):
108-
EXCLUDE_META_FEATURES = EXCLUDE_META_FEATURES_CLASSIFICATION \
109-
if task in CLASSIFICATION_TASKS else EXCLUDE_META_FEATURES_REGRESSION
123+
task_name = 'CalculateMetafeaturesEncoded'
124+
watcher.start_task(task_name)
125+
categorical = [True if feat_type.lower() in ['categorical'] else False
126+
for feat_type in data_feat_type]
110127

111-
task_name = 'CalculateMetafeaturesEncoded'
112-
watcher.start_task(task_name)
113-
categorical = [True if feat_type.lower() in ['categorical'] else False
114-
for feat_type in data_feat_type]
115-
116-
result = calculate_all_metafeatures_encoded_labels(
117-
x_train, y_train, categorical=categorical,
118-
dataset_name=basename, dont_calculate=EXCLUDE_META_FEATURES, logger=logger)
119-
for key in list(result.metafeature_values.keys()):
120-
if result.metafeature_values[key].type_ != 'METAFEATURE':
121-
del result.metafeature_values[key]
122-
watcher.stop_task(task_name)
123-
logger.info(
124-
'Calculating Metafeatures (encoded attributes) took %5.2fsec',
125-
watcher.wall_elapsed(task_name))
126-
return result
128+
result = calculate_all_metafeatures_encoded_labels(
129+
x_train, y_train, categorical=categorical,
130+
dataset_name=basename, dont_calculate=EXCLUDE_META_FEATURES, logger=logger_)
131+
for key in list(result.metafeature_values.keys()):
132+
if result.metafeature_values[key].type_ != 'METAFEATURE':
133+
del result.metafeature_values[key]
134+
watcher.stop_task(task_name)
135+
logger_.info(
136+
'Calculating Metafeatures (encoded attributes) took %5.2fsec',
137+
watcher.wall_elapsed(task_name))
138+
return result
127139

128140

129141
def _get_metalearning_configurations(meta_base, basename, metric,
@@ -286,11 +298,6 @@ def __init__(self, config_space, dataset_name,
286298
port=self.port,
287299
)
288300

289-
def _send_warnings_to_log(self, message, category, filename, lineno,
290-
file=None, line=None):
291-
self.logger.debug('%s:%s: %s:%s', filename, lineno, category.__name__,
292-
message)
293-
294301
def reset_data_manager(self, max_mem=None):
295302
if max_mem is None:
296303
max_mem = self.data_memory_limit
@@ -323,20 +330,6 @@ def collect_metalearning_suggestions(self, meta_base):
323330

324331
return metalearning_configurations
325332

326-
def _calculate_metafeatures(self):
327-
with warnings.catch_warnings():
328-
warnings.showwarning = self._send_warnings_to_log
329-
330-
meta_features = _calculate_metafeatures(
331-
data_feat_type=self.datamanager.feat_type,
332-
data_info_task=self.datamanager.info['task'],
333-
x_train=self.datamanager.data['X_train'],
334-
y_train=self.datamanager.data['Y_train'],
335-
basename=self.dataset_name,
336-
watcher=self.watcher,
337-
logger=self.logger)
338-
return meta_features
339-
340333
def _calculate_metafeatures_with_limits(self, time_limit):
341334
res = None
342335
time_limit = max(time_limit, 1)
@@ -348,37 +341,42 @@ def _calculate_metafeatures_with_limits(self, time_limit):
348341
grace_period_in_s=30,
349342
context=context,
350343
logger=self.logger)(
351-
self._calculate_metafeatures)
352-
res = safe_mf()
344+
_calculate_metafeatures)
345+
res = safe_mf(
346+
data_feat_type=self.datamanager.feat_type,
347+
data_info_task=self.datamanager.info['task'],
348+
x_train=self.datamanager.data['X_train'],
349+
y_train=self.datamanager.data['Y_train'],
350+
basename=self.dataset_name,
351+
watcher=self.watcher,
352+
logger_=self.logger
353+
)
353354
except Exception as e:
354355
self.logger.error('Error getting metafeatures: %s', str(e))
355356

356357
return res
357358

358-
def _calculate_metafeatures_encoded(self):
359-
with warnings.catch_warnings():
360-
warnings.showwarning = self._send_warnings_to_log
361-
362-
meta_features_encoded = _calculate_metafeatures_encoded(
363-
self.datamanager.feat_type,
364-
self.dataset_name,
365-
self.datamanager.data['X_train'],
366-
self.datamanager.data['Y_train'],
367-
self.watcher,
368-
self.datamanager.info['task'],
369-
self.logger)
370-
return meta_features_encoded
371-
372359
def _calculate_metafeatures_encoded_with_limits(self, time_limit):
373360
res = None
374361
time_limit = max(time_limit, 1)
375362
try:
363+
context = multiprocessing.get_context(self.pynisher_context)
364+
preload_modules(context)
376365
safe_mf = pynisher.enforce_limits(mem_in_mb=self.memory_limit,
377366
wall_time_in_s=int(time_limit),
378367
grace_period_in_s=30,
368+
context=context,
379369
logger=self.logger)(
380-
self._calculate_metafeatures_encoded)
381-
res = safe_mf()
370+
_calculate_metafeatures_encoded)
371+
res = safe_mf(
372+
data_feat_type=self.datamanager.feat_type,
373+
task=self.datamanager.info['task'],
374+
x_train=self.datamanager.data['X_train'],
375+
y_train=self.datamanager.data['Y_train'],
376+
basename=self.dataset_name,
377+
watcher=self.watcher,
378+
logger_=self.logger
379+
)
382380
except Exception as e:
383381
self.logger.error('Error getting metafeatures (encoded) : %s',
384382
str(e))
@@ -612,7 +610,7 @@ def get_metalearning_suggestions(self):
612610
meta_features_encoded = None
613611
else:
614612
with warnings.catch_warnings():
615-
warnings.showwarning = self._send_warnings_to_log
613+
warnings.showwarning = get_send_warnings_to_logger(self.logger)
616614
meta_features_encoded = \
617615
self._calculate_metafeatures_encoded_with_limits(
618616
metafeature_calculation_time_limit)
@@ -636,7 +634,7 @@ def get_metalearning_suggestions(self):
636634
inplace=True)
637635

638636
with warnings.catch_warnings():
639-
warnings.showwarning = self._send_warnings_to_log
637+
warnings.showwarning = get_send_warnings_to_logger(self.logger)
640638
metalearning_configurations = self.collect_metalearning_suggestions(
641639
meta_base)
642640
if metalearning_configurations is None:

scripts/03_calculate_metafeatures.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,13 @@ def calculate_metafeatures(task_id):
3838

3939
_metafeatures_labels = _calculate_metafeatures(
4040
x_train=X_train, y_train=y_train, data_feat_type=cat,
41-
data_info_task=task_type, basename=dataset_name, logger=logger,
41+
data_info_task=task_type, basename=dataset_name, logger_=logger,
4242
watcher=watch,
4343
)
4444

4545
_metafeatures_encoded_labels = _calculate_metafeatures_encoded(
4646
x_train=X_train, y_train=y_train, data_feat_type=cat,
47-
task=task_type, basename=dataset_name, logger=logger,
47+
task=task_type, basename=dataset_name, logger_=logger,
4848
watcher=watch,
4949
)
5050

0 commit comments

Comments
 (0)