Skip to content

Commit b0a5e7b

Browse files
committed
ENH: Ensure BA data point sufficiency for salient point identification
Ensure BA data point sufficiency for salient point identification. Add the corresponding tests.
1 parent eca7996 commit b0a5e7b

File tree

2 files changed

+38
-3
lines changed

2 files changed

+38
-3
lines changed

src/nifreeze/analysis/measure_agreement.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,18 @@ def identify_bland_altman_salient_data(
245245
reliability_mask = get_reliability_mask(diff, loa_lower, loa_upper)
246246
reliability_idx = np.where(reliability_mask)[0]
247247

248+
# Check that there are enough data points left to identify the requested
249+
# number of salient data points
250+
reliability_point_count = len(reliability_idx)
251+
salient_point_count = 2 * top_n
252+
if reliability_point_count < salient_point_count:
253+
raise ValueError(
254+
f"Too few reliable data points ({reliability_point_count}) to "
255+
f"identify the requested Bland-Altman salient points "
256+
f"(2 * {top_n}). Reduce the number of salient data points "
257+
f"requested ({top_n})"
258+
)
259+
248260
# Select the top_n lowest median values from the left side of the BA plot
249261
lower_idx = np.argsort(mean[reliability_idx])[:top_n]
250262
left_indices = reliability_idx[lower_idx]
@@ -262,6 +274,15 @@ def identify_bland_altman_salient_data(
262274
top_p_count = int(percentile * len(right_sort_mean))
263275
top_p_sorted = right_sort_mean[:top_p_count]
264276

277+
# Check that there are enough data points left to identify the requested
278+
# number of rightmost points
279+
if top_p_count < top_n:
280+
raise ValueError(
281+
f"Too few data points ({top_p_count}) to identify the requested "
282+
f"Bland-Altman right-most salient points ({top_n}). Increase the "
283+
f"percentile requested ({top_n})"
284+
)
285+
265286
# Get absolute difference from mean_diff (closeness to zero mean difference)
266287
diff_distance = np.abs(diff[top_p_sorted] - mean_diff)
267288

test/test_analysis.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -114,11 +114,25 @@ def test_identify_bland_altman_salient_data():
114114
_data2 = np.array([1.1, 2.1, 1.1, 2.7, 3.4, 5.1, 2.2, 6.3, 7.6, 8.2])
115115

116116
ci = 0.95
117-
top_n = 2
118117

119-
# Generate measurements
118+
# Verify that a sufficient number of data points exists to get the requested
119+
# number of salient data points exists
120+
top_n = 6
121+
with pytest.raises(ValueError):
122+
identify_bland_altman_salient_data(_data1, _data2, ci, top_n)
123+
124+
top_n = 4
125+
126+
# Verify that the percentile is not restrictive enough to get the requested
127+
# number of rightmost salient data points exists
120128
percentile = 0.75
121-
salient_data = identify_bland_altman_salient_data(_data1, _data2, ci, top_n, percentile=percentile)
129+
with pytest.raises(ValueError):
130+
identify_bland_altman_salient_data(_data1, _data2, ci, top_n, percentile=percentile)
131+
132+
percentile = 0.8
133+
salient_data = identify_bland_altman_salient_data(
134+
_data1, _data2, ci, top_n, percentile=percentile
135+
)
122136

123137
assert len(salient_data[BASalientEntity.RELIABILITY_MASK.value]) == len(_data1)
124138

0 commit comments

Comments
 (0)