ENH: Fix Bland-Altman plot salient data identification

jhlegarreta · jhlegarreta · commit eca7996d22fd · 2025-05-07T21:50:20.000-04:00
Fix Bland-Altman plot salient data identification: - Fix the indices of the rightmost/closest to mean zero different salient points: the previous implementation was not keeping `top_n` points, and was returning all percentile count points. - The mask corresponding to the rightmost/closest to mean zero different salient points was not, in reality, a (boolean) mask; instead, it contained the indices of the salient points at issue. These turned out to be correctly computed, and hence when indexing the BA plot data array, the plot matched what was expected. This error went unnoticed in commit b2f544b because only the mask is used to highlight the salient data. Add a test that ensures that the returned data dimensionality is the one expected.
diff --git a/src/nifreeze/analysis/measure_agreement.py b/src/nifreeze/analysis/measure_agreement.py
@@ -258,18 +258,20 @@ def identify_bland_altman_salient_data(
     # Sort indices by descending mean (rightmost values first)
     right_sort_mean = remaining_idx[np.argsort(mean[remaining_idx])[::-1]]
 
-    # Take top percentile of the rightmost points
+    # Take a percentile of the rightmost points
     top_p_count = int(percentile * len(right_sort_mean))
     top_p_sorted = right_sort_mean[:top_p_count]
 
     # Get absolute difference from mean_diff (closeness to zero mean difference)
     diff_distance = np.abs(diff[top_p_sorted] - mean_diff)
 
     # Sort rightmost points by closeness to zero diff
-    upper_idx = np.argsort(diff_distance)
+    top_p_idx = np.argsort(diff_distance)
 
     # Take top_n of them
-    right_mask = right_sort_mean[upper_idx[:top_n]]
+    upper_idx = top_p_sorted[top_p_idx][:top_n]
+    right_mask = np.zeros_like(reliability_mask, dtype=bool)
+    right_mask[upper_idx] = True
 
     return {
         BASalientEntity.RELIABILITY_INDICES.value: reliability_idx,
diff --git a/test/test_analysis.py b/test/test_analysis.py
@@ -26,8 +26,10 @@
 import pytest
 
 from nifreeze.analysis.measure_agreement import (
+    BASalientEntity,
     compute_bland_altman_features,
     compute_z_score,
+    identify_bland_altman_salient_data,
 )
 
 
@@ -105,3 +107,23 @@ def test_compute_bland_altman_features(request):
     assert loa_lower < loa_upper
     assert np.isscalar(ci_mean)
     assert np.isscalar(ci_loa)
+
+
+def test_identify_bland_altman_salient_data():
+    _data1 = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
+    _data2 = np.array([1.1, 2.1, 1.1, 2.7, 3.4, 5.1, 2.2, 6.3, 7.6, 8.2])
+
+    ci = 0.95
+    top_n = 2
+
+    # Generate measurements
+    percentile = 0.75
+    salient_data = identify_bland_altman_salient_data(_data1, _data2, ci, top_n, percentile=percentile)
+
+    assert len(salient_data[BASalientEntity.RELIABILITY_MASK.value]) == len(_data1)
+
+    assert len(salient_data[BASalientEntity.LEFT_INDICES.value]) == top_n
+    assert len(salient_data[BASalientEntity.LEFT_MASK.value]) == len(_data1)
+
+    assert len(salient_data[BASalientEntity.RIGHT_INDICES.value]) == top_n
+    assert len(salient_data[BASalientEntity.RIGHT_MASK.value]) == len(_data1)