77import inspect
88import json
99import json .decoder
10+ import logging
1011import re
1112import six
1213import warnings
@@ -92,7 +93,8 @@ def _is_cross_validator(o):
9293 return isinstance (o , sklearn .model_selection .BaseCrossValidator )
9394
9495
95- def flow_to_sklearn (o , components = None , initialize_with_defaults = False ):
96+ def flow_to_sklearn (o , components = None , initialize_with_defaults = False ,
97+ recursion_depth = 0 ):
9698 """Initializes a sklearn model based on a flow.
9799
98100 Parameters
@@ -108,11 +110,19 @@ def flow_to_sklearn(o, components=None, initialize_with_defaults=False):
108110 If this flag is set, the hyperparameter values of flows will be
109111 ignored and a flow with its defaults is returned.
110112
113+ recursion_depth : int
114+ The depth at which this flow is called, mostly for debugging
115+ purposes
116+
111117 Returns
112118 -------
113119 mixed
114120
115121 """
122+ logging .info ('-%s flow_to_sklearn START o=%s, components=%s, '
123+ 'init_defaults=%s' % ('-' * recursion_depth , o , components ,
124+ initialize_with_defaults ))
125+ depth_pp = recursion_depth + 1 # shortcut var, depth plus plus
116126
117127 # First, we need to check whether the presented object is a json string.
118128 # JSON strings are used to encoder parameter values. By passing around
@@ -139,10 +149,14 @@ def flow_to_sklearn(o, components=None, initialize_with_defaults=False):
139149 elif serialized_type == 'function' :
140150 rval = deserialize_function (value )
141151 elif serialized_type == 'component_reference' :
142- value = flow_to_sklearn (value )
152+ value = flow_to_sklearn (value , recursion_depth = depth_pp )
143153 step_name = value ['step_name' ]
144154 key = value ['key' ]
145- component = flow_to_sklearn (components [key ], initialize_with_defaults = initialize_with_defaults )
155+ component = flow_to_sklearn (
156+ components [key ],
157+ initialize_with_defaults = initialize_with_defaults ,
158+ recursion_depth = depth_pp
159+ )
146160 # The component is now added to where it should be used
147161 # later. It should not be passed to the constructor of the
148162 # main flow object.
@@ -154,25 +168,39 @@ def flow_to_sklearn(o, components=None, initialize_with_defaults=False):
154168 else :
155169 rval = (step_name , component , value ['argument_1' ])
156170 elif serialized_type == 'cv_object' :
157- rval = _deserialize_cross_validator (value )
171+ rval = _deserialize_cross_validator (
172+ value , recursion_depth = recursion_depth
173+ )
158174 else :
159175 raise ValueError ('Cannot flow_to_sklearn %s' % serialized_type )
160176
161177 else :
162- rval = OrderedDict ((flow_to_sklearn (key , components , initialize_with_defaults ),
163- flow_to_sklearn (value , components , initialize_with_defaults ))
178+ rval = OrderedDict ((flow_to_sklearn (key ,
179+ components ,
180+ initialize_with_defaults ,
181+ recursion_depth = depth_pp ),
182+ flow_to_sklearn (value ,
183+ components ,
184+ initialize_with_defaults ,
185+ recursion_depth = depth_pp ))
164186 for key , value in sorted (o .items ()))
165187 elif isinstance (o , (list , tuple )):
166- rval = [flow_to_sklearn (element , components , initialize_with_defaults ) for element in o ]
188+ rval = [flow_to_sklearn (element ,
189+ components ,
190+ initialize_with_defaults ,
191+ depth_pp ) for element in o ]
167192 if isinstance (o , tuple ):
168193 rval = tuple (rval )
169194 elif isinstance (o , (bool , int , float , six .string_types )) or o is None :
170195 rval = o
171196 elif isinstance (o , OpenMLFlow ):
172- rval = _deserialize_model (o , initialize_with_defaults )
197+ rval = _deserialize_model (o ,
198+ initialize_with_defaults ,
199+ recursion_depth = recursion_depth )
173200 else :
174201 raise TypeError (o )
175-
202+ logging .info ('-%s flow_to_sklearn END o=%s, rval=%s'
203+ % ('-' * recursion_depth , o , rval ))
176204 return rval
177205
178206
@@ -207,6 +235,143 @@ def openml_param_name_to_sklearn(openml_parameter, flow):
207235 return '__' .join (flow_structure [name ] + [openml_parameter .parameter_name ])
208236
209237
238+ def obtain_parameter_values (flow ):
239+ """
240+ Extracts all parameter settings from the model inside a flow in OpenML
241+ format.
242+
243+ Parameters
244+ ----------
245+ flow : OpenMLFlow
246+ openml flow object (containing flow ids, i.e., it has to be downloaded
247+ from the server)
248+
249+ Returns
250+ -------
251+ list
252+ A list of dicts, where each dict has the following names:
253+ - oml:name (str): The OpenML parameter name
254+ - oml:value (mixed): A representation of the parameter value
255+ - oml:component (int): flow id to which the parameter belongs
256+ """
257+
258+ openml .flows .functions ._check_flow_for_server_id (flow )
259+
260+ def get_flow_dict (_flow ):
261+ flow_map = {_flow .name : _flow .flow_id }
262+ for subflow in _flow .components :
263+ flow_map .update (get_flow_dict (_flow .components [subflow ]))
264+ return flow_map
265+
266+ def extract_parameters (_flow , _flow_dict , component_model ,
267+ _main_call = False , main_id = None ):
268+ def is_subcomponent_specification (values ):
269+ # checks whether the current value can be a specification of
270+ # subcomponents, as for example the value for steps parameter
271+ # (in Pipeline) or transformers parameter (in
272+ # ColumnTransformer). These are always lists/tuples of lists/
273+ # tuples, size bigger than 2 and an OpenMLFlow item involved.
274+ if not isinstance (values , (tuple , list )):
275+ return False
276+ for item in values :
277+ if not isinstance (item , (tuple , list )):
278+ return False
279+ if len (item ) < 2 :
280+ return False
281+ if not isinstance (item [1 ], openml .flows .OpenMLFlow ):
282+ return False
283+ return True
284+
285+ # _flow is openml flow object, _param dict maps from flow name to flow
286+ # id for the main call, the param dict can be overridden (useful for
287+ # unit tests / sentinels) this way, for flows without subflows we do
288+ # not have to rely on _flow_dict
289+ exp_parameters = set (_flow .parameters )
290+ exp_components = set (_flow .components )
291+ model_parameters = set ([mp for mp in component_model .get_params ()
292+ if '__' not in mp ])
293+ if len ((exp_parameters | exp_components ) ^ model_parameters ) != 0 :
294+ flow_params = sorted (exp_parameters | exp_components )
295+ model_params = sorted (model_parameters )
296+ raise ValueError ('Parameters of the model do not match the '
297+ 'parameters expected by the '
298+ 'flow:\n expected flow parameters: '
299+ '%s\n model parameters: %s' % (flow_params ,
300+ model_params ))
301+
302+ _params = []
303+ for _param_name in _flow .parameters :
304+ _current = OrderedDict ()
305+ _current ['oml:name' ] = _param_name
306+
307+ current_param_values = openml .flows .sklearn_to_flow (
308+ component_model .get_params ()[_param_name ])
309+
310+ # Try to filter out components (a.k.a. subflows) which are
311+ # handled further down in the code (by recursively calling
312+ # this function)!
313+ if isinstance (current_param_values , openml .flows .OpenMLFlow ):
314+ continue
315+
316+ if is_subcomponent_specification (current_param_values ):
317+ # complex parameter value, with subcomponents
318+ parsed_values = list ()
319+ for subcomponent in current_param_values :
320+ # scikit-learn stores usually tuples in the form
321+ # (name (str), subcomponent (mixed), argument
322+ # (mixed)). OpenML replaces the subcomponent by an
323+ # OpenMLFlow object.
324+ if len (subcomponent ) < 2 or len (subcomponent ) > 3 :
325+ raise ValueError ('Component reference should be '
326+ 'size {2,3}. ' )
327+
328+ subcomponent_identifier = subcomponent [0 ]
329+ subcomponent_flow = subcomponent [1 ]
330+ if not isinstance (subcomponent_identifier , six .string_types ):
331+ raise TypeError ('Subcomponent identifier should be '
332+ 'string' )
333+ if not isinstance (subcomponent_flow ,
334+ openml .flows .OpenMLFlow ):
335+ raise TypeError ('Subcomponent flow should be string' )
336+
337+ current = {
338+ "oml-python:serialized_object" : "component_reference" ,
339+ "value" : {
340+ "key" : subcomponent_identifier ,
341+ "step_name" : subcomponent_identifier
342+ }
343+ }
344+ if len (subcomponent ) == 3 :
345+ if not isinstance (subcomponent [2 ], list ):
346+ raise TypeError ('Subcomponent argument should be'
347+ 'list' )
348+ current ['value' ]['argument_1' ] = subcomponent [2 ]
349+ parsed_values .append (current )
350+ parsed_values = json .dumps (parsed_values )
351+ else :
352+ # vanilla parameter value
353+ parsed_values = json .dumps (current_param_values )
354+
355+ _current ['oml:value' ] = parsed_values
356+ if _main_call :
357+ _current ['oml:component' ] = main_id
358+ else :
359+ _current ['oml:component' ] = _flow_dict [_flow .name ]
360+ _params .append (_current )
361+
362+ for _identifier in _flow .components :
363+ subcomponent_model = component_model .get_params ()[_identifier ]
364+ _params .extend (extract_parameters (_flow .components [_identifier ],
365+ _flow_dict , subcomponent_model ))
366+ return _params
367+
368+ flow_dict = get_flow_dict (flow )
369+ parameters = extract_parameters (flow , flow_dict , flow .model ,
370+ True , flow .flow_id )
371+
372+ return parameters
373+
374+
210375def _serialize_model (model ):
211376 """Create an OpenMLFlow.
212377
@@ -466,8 +631,8 @@ def _get_fn_arguments_with_defaults(fn_name):
466631 return params_with_defaults , params_without_defaults
467632
468633
469- def _deserialize_model (flow , keep_defaults ):
470-
634+ def _deserialize_model (flow , keep_defaults , recursion_depth ):
635+ logging . info ( '-%s deserialize %s' % ( '-' * recursion_depth , flow . name ))
471636 model_name = flow .class_name
472637 _check_dependencies (flow .dependencies )
473638
@@ -484,7 +649,12 @@ def _deserialize_model(flow, keep_defaults):
484649
485650 for name in parameters :
486651 value = parameters .get (name )
487- rval = flow_to_sklearn (value , components = components_ , initialize_with_defaults = keep_defaults )
652+ logging .info ('--%s flow_parameter=%s, value=%s' %
653+ ('-' * recursion_depth , name , value ))
654+ rval = flow_to_sklearn (value ,
655+ components = components_ ,
656+ initialize_with_defaults = keep_defaults ,
657+ recursion_depth = recursion_depth + 1 )
488658 parameter_dict [name ] = rval
489659
490660 for name in components :
@@ -493,7 +663,10 @@ def _deserialize_model(flow, keep_defaults):
493663 if name not in components_ :
494664 continue
495665 value = components [name ]
496- rval = flow_to_sklearn (value , ** kwargs )
666+ logging .info ('--%s flow_component=%s, value=%s'
667+ % ('-' * recursion_depth , name , value ))
668+ rval = flow_to_sklearn (value ,
669+ recursion_depth = recursion_depth + 1 )
497670 parameter_dict [name ] = rval
498671
499672 module_name = model_name .rsplit ('.' , 1 )
@@ -723,15 +896,17 @@ def check(param_grid, restricted_parameter_name, legal_values):
723896 return check (model .get_params (), 'n_jobs' , [1 , None ])
724897
725898
726- def _deserialize_cross_validator (value ):
899+ def _deserialize_cross_validator (value , recursion_depth ):
727900 model_name = value ['name' ]
728901 parameters = value ['parameters' ]
729902
730903 module_name = model_name .rsplit ('.' , 1 )
731904 model_class = getattr (importlib .import_module (module_name [0 ]),
732905 module_name [1 ])
733906 for parameter in parameters :
734- parameters [parameter ] = flow_to_sklearn (parameters [parameter ])
907+ parameters [parameter ] = flow_to_sklearn (
908+ parameters [parameter ], recursion_depth = recursion_depth + 1
909+ )
735910 return model_class (** parameters )
736911
737912
0 commit comments