ENH: Ensure BA data point sufficiency for salient point identification

jhlegarreta · jhlegarreta · commit b0a5e7b882cc · 2025-05-07T21:56:29.000-04:00
Ensure BA data point sufficiency for salient point identification.

Add the corresponding tests.
diff --git a/src/nifreeze/analysis/measure_agreement.py b/src/nifreeze/analysis/measure_agreement.py
@@ -245,6 +245,18 @@ def identify_bland_altman_salient_data(
     reliability_mask = get_reliability_mask(diff, loa_lower, loa_upper)
     reliability_idx = np.where(reliability_mask)[0]
 
+    # Check that there are enough data points left to identify the requested
+    # number of salient data points
+    reliability_point_count = len(reliability_idx)
+    salient_point_count = 2 * top_n
+    if reliability_point_count < salient_point_count:
+        raise ValueError(
+            f"Too few reliable data points ({reliability_point_count}) to "
+            f"identify the requested Bland-Altman salient points "
+            f"(2 * {top_n}). Reduce the number of salient data points "
+            f"requested ({top_n})"
+        )
+
     # Select the top_n lowest median values from the left side of the BA plot
     lower_idx = np.argsort(mean[reliability_idx])[:top_n]
     left_indices = reliability_idx[lower_idx]
@@ -262,6 +274,15 @@ def identify_bland_altman_salient_data(
     top_p_count = int(percentile * len(right_sort_mean))
     top_p_sorted = right_sort_mean[:top_p_count]
 
+    # Check that there are enough data points left to identify the requested
+    # number of rightmost points
+    if top_p_count < top_n:
+        raise ValueError(
+            f"Too few data points ({top_p_count}) to identify the requested "
+            f"Bland-Altman right-most salient points ({top_n}). Increase the "
+            f"percentile requested ({top_n})"
+        )
+
     # Get absolute difference from mean_diff (closeness to zero mean difference)
     diff_distance = np.abs(diff[top_p_sorted] - mean_diff)
 
diff --git a/test/test_analysis.py b/test/test_analysis.py
@@ -114,11 +114,25 @@ def test_identify_bland_altman_salient_data():
     _data2 = np.array([1.1, 2.1, 1.1, 2.7, 3.4, 5.1, 2.2, 6.3, 7.6, 8.2])
 
     ci = 0.95
-    top_n = 2
 
-    # Generate measurements
+    # Verify that a sufficient number of data points exists to get the requested
+    # number of salient data points exists
+    top_n = 6
+    with pytest.raises(ValueError):
+        identify_bland_altman_salient_data(_data1, _data2, ci, top_n)
+
+    top_n = 4
+
+    # Verify that the percentile is not restrictive enough to get the requested
+    # number of rightmost salient data points exists
     percentile = 0.75
-    salient_data = identify_bland_altman_salient_data(_data1, _data2, ci, top_n, percentile=percentile)
+    with pytest.raises(ValueError):
+        identify_bland_altman_salient_data(_data1, _data2, ci, top_n, percentile=percentile)
+
+    percentile = 0.8
+    salient_data = identify_bland_altman_salient_data(
+        _data1, _data2, ci, top_n, percentile=percentile
+    )
 
     assert len(salient_data[BASalientEntity.RELIABILITY_MASK.value]) == len(_data1)