5151from bigml .multivotelist import MultiVoteList
5252from bigml .util import cast , check_no_missing_numerics , use_cache , load , \
5353 dump , dumps , NUMERIC
54+ from bigml .constants import DECIMALS
5455from bigml .supervised import SupervisedModel
5556from bigml .modelfields import ModelFields
5657from bigml .tree_utils import add_distribution
@@ -248,7 +249,7 @@ def predict_probability(self, input_data,
248249 each possible output class, based on input values. The input
249250 fields must be a dictionary keyed by field name or field ID.
250251
251- For regressions, the output is a single element list
252+ For regressions, the output is a single element
252253 containing the prediction.
253254
254255 :param input_data: Input data to be predicted
@@ -264,6 +265,7 @@ def predict_probability(self, input_data,
264265 if not self .missing_numerics :
265266 check_no_missing_numerics (input_data , self .model_fields )
266267
268+ weights = []
267269 for models_split in self .models_splits :
268270 models = []
269271 for model in models_split :
@@ -287,35 +289,34 @@ def predict_probability(self, input_data,
287289 continue
288290 if self .regression :
289291 prediction = prediction [0 ]
290- if self .weights is not None :
291- prediction = self .weigh (prediction , model .resource_id )
292- else :
293- if self .weights is not None :
294- prediction = self .weigh ( \
295- prediction , model .resource_id )
296- # we need to check that all classes in the fusion
297- # are also in the composing model
298- if not self .regression and \
299- self .class_names != model .class_names :
300- try :
301- prediction = rearrange_prediction ( \
302- model .class_names ,
303- self .class_names ,
304- prediction )
305- except AttributeError :
306- # class_names should be defined, but just in case
307- pass
292+ if self .weights is not None :
293+ weights .append (1 if not self .weights else self .weights [
294+ self .model_ids .index (model .resource_id )])
295+ prediction = self .weigh (prediction , model .resource_id )
296+ # we need to check that all classes in the fusion
297+ # are also in the composing model
298+ if not self .regression and \
299+ self .class_names != model .class_names :
300+ try :
301+ prediction = rearrange_prediction ( \
302+ model .class_names ,
303+ self .class_names ,
304+ prediction )
305+ except AttributeError :
306+ # class_names should be defined, but just in case
307+ pass
308308 votes_split .append (prediction )
309309 votes .extend (votes_split )
310310 if self .regression :
311- total_weight = len (votes .predictions ) if self .weights is None \
312- else sum (self .weights )
313- prediction = sum (votes .predictions ) / float (total_weight )
311+ prediction = 0
312+ total_weight = sum (weights )
313+ for index , pred in enumerate (votes .predictions ):
314+ prediction += pred # the weight is already considered in pred
315+ prediction /= float (total_weight )
314316 if compact :
315317 output = [prediction ]
316318 else :
317319 output = {"prediction" : prediction }
318-
319320 else :
320321 output = votes .combine_to_distribution (normalize = True )
321322 if not compact :
@@ -326,6 +327,97 @@ def predict_probability(self, input_data,
326327
327328 return output
328329
330+ def predict_confidence (self , input_data ,
331+ missing_strategy = LAST_PREDICTION ,
332+ compact = False ):
333+
334+ """For classification models, Predicts a confidence for
335+ each possible output class, based on input values. The input
336+ fields must be a dictionary keyed by field name or field ID.
337+
338+ For regressions, the output is a single element
339+ containing the prediction and the associated confidence.
340+
341+ WARNING: Only decision-tree based models in the Fusion object will
342+ have an associated confidence, so the result for fusions that don't
343+ contain such models can be None.
344+
345+ :param input_data: Input data to be predicted
346+ :param missing_strategy: LAST_PREDICTION|PROPORTIONAL missing strategy
347+ for missing fields
348+ :param compact: If False, prediction is returned as a list of maps, one
349+ per class, with the keys "prediction" and "confidence"
350+ mapped to the name of the class and it's confidence,
351+ respectively. If True, returns a list of confidences
352+ ordered by the sorted order of the class names.
353+ """
354+ if not self .missing_numerics :
355+ check_no_missing_numerics (input_data , self .model_fields )
356+
357+ predictions = []
358+ weights = []
359+ for models_split in self .models_splits :
360+ models = []
361+ for model in models_split :
362+ model_type = get_resource_type (model )
363+ if model_type == "fusion" :
364+ models .append (Fusion (model , api = self .api ))
365+ else :
366+ models .append (SupervisedModel (model , api = self .api ))
367+ votes_split = []
368+ for model in models :
369+ try :
370+ kwargs = {"compact" : False }
371+ if model_type in ["model" , "ensemble" , "fusion" ]:
372+ kwargs .update ({"missing_strategy" : missing_strategy })
373+ prediction = model .predict_confidence ( \
374+ input_data , ** kwargs )
375+ except Exception as exc :
376+ # logistic regressions can raise this error if they
377+ # have missing_numerics=False and some numeric missings
378+ # are found and Linear Regressions have no confidence
379+ continue
380+ predictions .append (prediction )
381+ weights .append (1 if not self .weights else self .weights [
382+ self .model_ids .index (model .resource_id )])
383+ if self .regression :
384+ prediction = prediction ["prediction" ]
385+ if self .regression :
386+ prediction = 0
387+ confidence = 0
388+ total_weight = sum (weights )
389+ for index , pred in enumerate (predictions ):
390+ prediction += pred .get ("prediction" ) * weights [index ]
391+ confidence += pred .get ("confidence" )
392+ prediction /= float (total_weight )
393+ confidence /= float (len (predictions ))
394+ if compact :
395+ output = [prediction , confidence ]
396+ else :
397+ output = {"prediction" : prediction , "confidence" : confidence }
398+ else :
399+ output = self ._combine_confidences (predictions )
400+ if not compact :
401+ output = [{'category' : class_name ,
402+ 'confidence' : confidence }
403+ for class_name , confidence in
404+ zip (self .class_names , output )]
405+ return output
406+
407+ def _combine_confidences (self , predictions ):
408+ """Combining the confidences per class of classification models"""
409+ output = []
410+ count = float (len (predictions ))
411+ for class_name in self .class_names :
412+ confidence = 0
413+ for prediction in predictions :
414+ for category_info in prediction :
415+ if category_info ["category" ] == class_name :
416+ confidence += category_info .get ("confidence" )
417+ break
418+ output .append (round (confidence / count , DECIMALS ))
419+ return output
420+
329421 def weigh (self , prediction , model_id ):
330422 """Weighs the prediction according to the weight associated to the
331423 current model in the fusion.
@@ -421,16 +513,28 @@ def _predict(self, input_data, missing_strategy=LAST_PREDICTION,
421513 missing_strategy = missing_strategy ,
422514 operating_point = operating_point )
423515 return prediction
424-
425516 result = self .predict_probability ( \
426517 input_data ,
427518 missing_strategy = missing_strategy ,
428519 compact = False )
520+ confidence_result = self .predict_confidence ( \
521+ input_data ,
522+ missing_strategy = missing_strategy ,
523+ compact = False )
429524
430525 if not self .regression :
526+ try :
527+ for index , value in enumerate (result ):
528+ result [index ].update (
529+ {"confidence" : confidence_result [index ]["confidence" ]})
530+ except Exception as exc :
531+ pass
431532 result = sorted (result , key = lambda x : - x ["probability" ])[0 ]
432533 result ["prediction" ] = result ["category" ]
433534 del result ["category" ]
535+ else :
536+ result .update (
537+ {"confidence" : confidence_result ["confidence" ]})
434538
435539 # adding unused fields, if any
436540 if unused_fields :
0 commit comments