add: functions for calculating utility measures directly from classification probabilities

cosmic-cortex · cosmic-cortex · commit 47fd12bc62c6 · 2018-11-28T16:46:21.000+01:00
diff --git a/modAL/uncertainty.py b/modAL/uncertainty.py
@@ -12,6 +12,54 @@
 from modAL.utils.data import modALinput
 
 
+def _proba_uncertainty(proba: np.ndarray) -> np.ndarray:
+    """
+    Calculates the uncertainty of the prediction probabilities.
+
+    Args:
+        proba: Prediction probabilities.
+
+    Returns:
+        Uncertainty of the prediction probabilities.
+    """
+
+    return 1 - np.max(proba, axis=1)
+
+
+def _proba_margin(proba: np.ndarray) -> np.ndarray:
+    """
+    Calculates the margin of the prediction probabilities.
+
+    Args:
+        proba: Prediction probabilities.
+
+    Returns:
+        Margin of the prediction probabilities.
+    """
+
+    if proba.shape[1] == 1:
+        return np.zeros(shape=len(proba))
+
+    part = np.partition(-proba, 1, axis=1)
+    margin = - part[:, 0] + part[:, 1]
+
+    return margin
+
+
+def _proba_entropy(proba: np.ndarray) -> np.ndarray:
+    """
+    Calculates the entropy of the prediction probabilities.
+
+    Args:
+        proba: Prediction probabilities.
+
+    Returns:
+        Uncertainty of the prediction probabilities.
+    """
+
+    return np.transpose(entropy(np.transpose(proba)))
+
+
 def classifier_uncertainty(classifier: BaseEstimator, X: modALinput, **predict_proba_kwargs) -> np.ndarray:
     """
     Classification uncertainty of the classifier for the provided samples.
diff --git a/tests/core_tests.py b/tests/core_tests.py
@@ -411,6 +411,12 @@ def test_classifier_uncertainty(self):
         test_cases = (Test(p * np.ones(shape=(k, l)), (1 - p) * np.ones(shape=(k, )))
                       for k in range(1, 100) for l in range(1, 10) for p in np.linspace(0, 1, 11))
         for case in test_cases:
+            # testing _proba_uncertainty
+            np.testing.assert_almost_equal(
+                modAL.uncertainty._proba_uncertainty(case.input),
+                case.output
+            )
+
             # fitted estimator
             fitted_estimator = mock.MockEstimator(predict_proba_return=case.input)
             np.testing.assert_almost_equal(
@@ -432,6 +438,12 @@ def test_classifier_margin(self):
                              p * np.ones(shape=(l, ))*int(k!=1))
                         for k in range(1, 10) for l in range(1, 100) for p in np.linspace(0, 1, 11))
         for case in chain(test_cases_1, test_cases_2):
+            # _proba_margin
+            np.testing.assert_almost_equal(
+                modAL.uncertainty._proba_margin(case.input),
+                case.output
+            )
+
             # fitted estimator
             fitted_estimator = mock.MockEstimator(predict_proba_return=case.input)
             np.testing.assert_almost_equal(
@@ -453,6 +465,12 @@ def test_classifier_entropy(self):
                 for sample_idx in range(n_samples):
                     proba[sample_idx, np.random.choice(range(n_classes))] = 1.0
 
+                # _proba_entropy
+                np.testing.assert_almost_equal(
+                    modAL.uncertainty._proba_entropy(proba),
+                    np.zeros(shape=(n_samples,))
+                )
+
                 # fitted estimator
                 fitted_estimator = mock.MockEstimator(predict_proba_return=proba)
                 np.testing.assert_equal(