Skip to content

Commit fcaf51b

Browse files
committed
Adding confidence to the output of local Fusions
1 parent 66c1e9b commit fcaf51b

File tree

9 files changed

+190
-29
lines changed

9 files changed

+190
-29
lines changed

HISTORY.rst

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,16 @@
33
History
44
-------
55

6+
9.5.0 (2023-06-16)
7+
------------------
8+
9+
- Extending Local Fusions output to include confidence.
10+
611
9.4.0 (2023-06-14)
712
------------------
813

914
- Extending LocalModel class to handle Time Series locally.
1015

11-
1216
9.3.0 (2023-06-09)
1317
------------------
1418

bigml/deepnet.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,8 @@ def predict(self, input_data, operating_point=None, operating_kind=None,
397397
if not isinstance(prediction, dict):
398398
prediction = {"prediction": round(prediction, DECIMALS)}
399399
prediction.update({"unused_fields": unused_fields})
400+
if "probability" in prediction:
401+
prediction["confidence"] = prediction.get("probability")
400402
else:
401403
if isinstance(prediction, dict):
402404
prediction = prediction["prediction"]
@@ -489,6 +491,16 @@ def predict_probability(self, input_data, compact=False):
489491
return [category['probability'] for category in distribution]
490492
return distribution
491493

494+
def predict_confidence(self, input_data, compact=False):
495+
"""Uses probability as a confidence
496+
"""
497+
if compact or self.regression:
498+
return self.predict_probability(input_data, compact=compact)
499+
return [{"category": pred["category"],
500+
"confidence": pred["probability"]}
501+
for pred in self.predict_probability(input_data,
502+
compact=compact)]
503+
492504
#pylint: disable=locally-disabled,invalid-name
493505
def _sort_predictions(self, a, b, criteria):
494506
"""Sorts the categories in the predicted node according to the
@@ -516,6 +528,8 @@ def predict_operating_kind(self, input_data, operating_kind=None):
516528
prediction = predictions[0]
517529
prediction["prediction"] = prediction["category"]
518530
del prediction["category"]
531+
if "probability" in prediction:
532+
prediction["confidence"] = prediction.get("probability")
519533
return prediction
520534

521535
def predict_operating(self, input_data, operating_point=None):
@@ -543,6 +557,8 @@ def predict_operating(self, input_data, operating_point=None):
543557
prediction = prediction[0]
544558
prediction["prediction"] = prediction["category"]
545559
del prediction["category"]
560+
if "probability" in prediction:
561+
prediction["confidence"] = prediction.get("probability")
546562
return prediction
547563

548564
def data_transformations(self):

bigml/ensemble.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -860,6 +860,8 @@ def predict(self, input_data, method=None,
860860
set(prediction.get("unused_fields", [])))
861861
if not isinstance(result, dict):
862862
result = {"prediction": round(result, DECIMALS)}
863+
if "probability" in result and "confidence" not in result:
864+
result["confidence"] = result["probability"]
863865
result['unused_fields'] = list(unused_fields)
864866

865867
return result

bigml/fusion.py

Lines changed: 128 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
from bigml.multivotelist import MultiVoteList
5252
from bigml.util import cast, check_no_missing_numerics, use_cache, load, \
5353
dump, dumps, NUMERIC
54+
from bigml.constants import DECIMALS
5455
from bigml.supervised import SupervisedModel
5556
from bigml.modelfields import ModelFields
5657
from bigml.tree_utils import add_distribution
@@ -248,7 +249,7 @@ def predict_probability(self, input_data,
248249
each possible output class, based on input values. The input
249250
fields must be a dictionary keyed by field name or field ID.
250251
251-
For regressions, the output is a single element list
252+
For regressions, the output is a single element
252253
containing the prediction.
253254
254255
:param input_data: Input data to be predicted
@@ -264,6 +265,7 @@ def predict_probability(self, input_data,
264265
if not self.missing_numerics:
265266
check_no_missing_numerics(input_data, self.model_fields)
266267

268+
weights = []
267269
for models_split in self.models_splits:
268270
models = []
269271
for model in models_split:
@@ -287,35 +289,34 @@ def predict_probability(self, input_data,
287289
continue
288290
if self.regression:
289291
prediction = prediction[0]
290-
if self.weights is not None:
291-
prediction = self.weigh(prediction, model.resource_id)
292-
else:
293-
if self.weights is not None:
294-
prediction = self.weigh( \
295-
prediction, model.resource_id)
296-
# we need to check that all classes in the fusion
297-
# are also in the composing model
298-
if not self.regression and \
299-
self.class_names != model.class_names:
300-
try:
301-
prediction = rearrange_prediction( \
302-
model.class_names,
303-
self.class_names,
304-
prediction)
305-
except AttributeError:
306-
# class_names should be defined, but just in case
307-
pass
292+
if self.weights is not None:
293+
weights.append(1 if not self.weights else self.weights[
294+
self.model_ids.index(model.resource_id)])
295+
prediction = self.weigh(prediction, model.resource_id)
296+
# we need to check that all classes in the fusion
297+
# are also in the composing model
298+
if not self.regression and \
299+
self.class_names != model.class_names:
300+
try:
301+
prediction = rearrange_prediction( \
302+
model.class_names,
303+
self.class_names,
304+
prediction)
305+
except AttributeError:
306+
# class_names should be defined, but just in case
307+
pass
308308
votes_split.append(prediction)
309309
votes.extend(votes_split)
310310
if self.regression:
311-
total_weight = len(votes.predictions) if self.weights is None \
312-
else sum(self.weights)
313-
prediction = sum(votes.predictions) / float(total_weight)
311+
prediction = 0
312+
total_weight = sum(weights)
313+
for index, pred in enumerate(votes.predictions):
314+
prediction += pred # the weight is already considered in pred
315+
prediction /= float(total_weight)
314316
if compact:
315317
output = [prediction]
316318
else:
317319
output = {"prediction": prediction}
318-
319320
else:
320321
output = votes.combine_to_distribution(normalize=True)
321322
if not compact:
@@ -326,6 +327,97 @@ def predict_probability(self, input_data,
326327

327328
return output
328329

330+
def predict_confidence(self, input_data,
331+
missing_strategy=LAST_PREDICTION,
332+
compact=False):
333+
334+
"""For classification models, Predicts a confidence for
335+
each possible output class, based on input values. The input
336+
fields must be a dictionary keyed by field name or field ID.
337+
338+
For regressions, the output is a single element
339+
containing the prediction and the associated confidence.
340+
341+
WARNING: Only decision-tree based models in the Fusion object will
342+
have an associated confidence, so the result for fusions that don't
343+
contain such models can be None.
344+
345+
:param input_data: Input data to be predicted
346+
:param missing_strategy: LAST_PREDICTION|PROPORTIONAL missing strategy
347+
for missing fields
348+
:param compact: If False, prediction is returned as a list of maps, one
349+
per class, with the keys "prediction" and "confidence"
350+
mapped to the name of the class and it's confidence,
351+
respectively. If True, returns a list of confidences
352+
ordered by the sorted order of the class names.
353+
"""
354+
if not self.missing_numerics:
355+
check_no_missing_numerics(input_data, self.model_fields)
356+
357+
predictions = []
358+
weights = []
359+
for models_split in self.models_splits:
360+
models = []
361+
for model in models_split:
362+
model_type = get_resource_type(model)
363+
if model_type == "fusion":
364+
models.append(Fusion(model, api=self.api))
365+
else:
366+
models.append(SupervisedModel(model, api=self.api))
367+
votes_split = []
368+
for model in models:
369+
try:
370+
kwargs = {"compact": False}
371+
if model_type in ["model", "ensemble", "fusion"]:
372+
kwargs.update({"missing_strategy": missing_strategy})
373+
prediction = model.predict_confidence( \
374+
input_data, **kwargs)
375+
except Exception as exc:
376+
# logistic regressions can raise this error if they
377+
# have missing_numerics=False and some numeric missings
378+
# are found and Linear Regressions have no confidence
379+
continue
380+
predictions.append(prediction)
381+
weights.append(1 if not self.weights else self.weights[
382+
self.model_ids.index(model.resource_id)])
383+
if self.regression:
384+
prediction = prediction["prediction"]
385+
if self.regression:
386+
prediction = 0
387+
confidence = 0
388+
total_weight = sum(weights)
389+
for index, pred in enumerate(predictions):
390+
prediction += pred.get("prediction") * weights[index]
391+
confidence += pred.get("confidence")
392+
prediction /= float(total_weight)
393+
confidence /= float(len(predictions))
394+
if compact:
395+
output = [prediction, confidence]
396+
else:
397+
output = {"prediction": prediction, "confidence": confidence}
398+
else:
399+
output = self._combine_confidences(predictions)
400+
if not compact:
401+
output = [{'category': class_name,
402+
'confidence': confidence}
403+
for class_name, confidence in
404+
zip(self.class_names, output)]
405+
return output
406+
407+
def _combine_confidences(self, predictions):
408+
"""Combining the confidences per class of classification models"""
409+
output = []
410+
count = float(len(predictions))
411+
for class_name in self.class_names:
412+
confidence = 0
413+
for prediction in predictions:
414+
for category_info in prediction:
415+
if category_info["category"] == class_name:
416+
confidence += category_info.get("confidence")
417+
break
418+
output.append(round(confidence / count, DECIMALS))
419+
return output
420+
329421
def weigh(self, prediction, model_id):
330422
"""Weighs the prediction according to the weight associated to the
331423
current model in the fusion.
@@ -421,16 +513,28 @@ def _predict(self, input_data, missing_strategy=LAST_PREDICTION,
421513
missing_strategy=missing_strategy,
422514
operating_point=operating_point)
423515
return prediction
424-
425516
result = self.predict_probability( \
426517
input_data,
427518
missing_strategy=missing_strategy,
428519
compact=False)
520+
confidence_result = self.predict_confidence( \
521+
input_data,
522+
missing_strategy=missing_strategy,
523+
compact=False)
429524

430525
if not self.regression:
526+
try:
527+
for index, value in enumerate(result):
528+
result[index].update(
529+
{"confidence": confidence_result[index]["confidence"]})
530+
except Exception as exc:
531+
pass
431532
result = sorted(result, key=lambda x: - x["probability"])[0]
432533
result["prediction"] = result["category"]
433534
del result["category"]
535+
else:
536+
result.update(
537+
{"confidence": confidence_result["confidence"]})
434538

435539
# adding unused fields, if any
436540
if unused_fields:

bigml/logistic.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,17 @@ def predict_probability(self, input_data, compact=False):
264264
return [category['probability'] for category in distribution]
265265
return distribution
266266

267+
def predict_confidence(self, input_data, compact=False):
268+
"""For logistic regressions we assume that probability can be used
269+
as confidence.
270+
"""
271+
if compact:
272+
return self.predict_probability(input_data, compact=compact)
273+
return [{"category": pred["category"],
274+
"confidence": pred["probability"]}
275+
for pred in self.predict_probability(input_data,
276+
compact=compact)]
277+
267278
def predict_operating(self, input_data,
268279
operating_point=None):
269280
"""Computes the prediction based on a user-given operating point.
@@ -290,6 +301,7 @@ def predict_operating(self, input_data,
290301
prediction = prediction[0]
291302
prediction["prediction"] = prediction["category"]
292303
del prediction["category"]
304+
prediction['confidence'] = prediction['probability']
293305
return prediction
294306

295307
def predict_operating_kind(self, input_data,
@@ -310,6 +322,7 @@ def predict_operating_kind(self, input_data,
310322
prediction = predictions[0]
311323
prediction["prediction"] = prediction["category"]
312324
del prediction["category"]
325+
prediction['confidence'] = prediction['probability']
313326
return prediction
314327

315328
#pylint: disable=locally-disabled,consider-using-dict-items
@@ -422,7 +435,8 @@ def predict(self, input_data,
422435
for category, probability in predictions]}
423436

424437
if full:
425-
result.update({'unused_fields': unused_fields})
438+
result.update({'unused_fields': unused_fields, 'confidence':
439+
result['probability']})
426440
else:
427441
result = result["prediction"]
428442

bigml/supervised.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,16 @@ def predict_probability(self, *args, **kwargs):
154154
del new_kwargs["missing_strategy"]
155155
return self.local_model.predict_probability(*args, **new_kwargs)
156156

157+
def predict_confidence(self, *args, **kwargs):
158+
"""Delegating method to local model object"""
159+
new_kwargs = {}
160+
new_kwargs.update(kwargs)
161+
try:
162+
return self.local_model.predict_confidence(*args, **new_kwargs)
163+
except TypeError:
164+
del new_kwargs["missing_strategy"]
165+
return self.local_model.predict_confidence(*args, **new_kwargs)
166+
157167
def data_transformations(self):
158168
"""Returns the pipeline transformations previous to the modeling
159169
step as a pipeline, so that they can be used in local predictions.

bigml/tests/compare_predictions_steps.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,14 @@ def the_local_probability_is(step, probability):
474474
eq_(local_probability, probability, precision=4)
475475

476476

477+
def the_local_confidence_is(step, confidence):
478+
"""Step: the local confidence is <confidence>"""
479+
local_confidence = step.bigml["local_prediction"]["confidence"]
480+
if isinstance(confidence, str):
481+
confidence = float(confidence)
482+
eq_(local_confidence, confidence, precision=4)
483+
484+
477485
def eq_local_and_remote_probability(step):
478486
"""Step: check local and remote probability"""
479487
local_probability = round(step.bigml["local_prediction"]["probability"], 3)

bigml/tests/test_39_optiml_fusion.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -252,17 +252,18 @@ def test_scenario4(self):
252252
And I create a local fusion prediction for "<input_data>"
253253
Then the local fusion prediction is "<prediction>"
254254
And the local fusion probability for the prediction is "<probability>"
255+
And the local fusion confidence for the prediction is "<confidence>"
255256
"""
256257
show_doc(self.test_scenario4)
257258
headers = ["data", "source_wait", "dataset_wait", "model_wait",
258259
"fusion_wait", "model_conf", "tag", "input_data",
259-
"objective_id", "prediction", "probability"]
260+
"objective_id", "prediction", "probability", "confidence"]
260261
examples = [
261262
['data/iris.csv', '10', '10', '30', '30',
262263
'{"tags":["my_fusion_4_tag"], "missing_numerics": true}',
263264
'my_fusion_4_tag',
264265
'{"petal width": 1.75, "petal length": 2.45}', "000004",
265-
"Iris-setosa", '0.4726']]
266+
"Iris-setosa", '0.4726', '0.4726']]
266267
for example in examples:
267268
example = dict(zip(headers, example))
268269
show_method(self, self.bigml["method"], example)
@@ -299,6 +300,8 @@ def test_scenario4(self):
299300
self, example["prediction"])
300301
compare_pred.the_local_probability_is(
301302
self, example["probability"])
303+
compare_pred.the_local_confidence_is(
304+
self, example["confidence"])
302305

303306
def test_scenario5(self):
304307
"""

bigml/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = '9.4.0'
1+
__version__ = '9.5.0'

0 commit comments

Comments
 (0)