@@ -206,8 +206,9 @@ def identify_bland_altman_salient_data(
206
206
`top_n` data points from the BA plot.
207
207
208
208
Once the left-most data points identified, the right-most `percentile` data
209
- points are considered from the remaining data points, and `top_n` data
210
- points are identified out of these.
209
+ points are considered from the remaining data points.
210
+ The ``top_n`` data points closest to the zero mean difference are
211
+ identified among these.
211
212
212
213
Parameters
213
214
----------
@@ -245,6 +246,18 @@ def identify_bland_altman_salient_data(
245
246
reliability_mask = get_reliability_mask (diff , loa_lower , loa_upper )
246
247
reliability_idx = np .where (reliability_mask )[0 ]
247
248
249
+ # Check that there are enough data points left to identify the requested
250
+ # number of salient data points
251
+ reliability_point_count = len (reliability_idx )
252
+ salient_point_count = 2 * top_n
253
+ if reliability_point_count < salient_point_count :
254
+ raise ValueError (
255
+ f"Too few reliable data points ({ reliability_point_count } ) to "
256
+ f"identify the requested Bland-Altman salient points "
257
+ f"(2 * { top_n } ). Reduce the number of salient data points "
258
+ f"requested ({ top_n } )"
259
+ )
260
+
248
261
# Select the top_n lowest median values from the left side of the BA plot
249
262
lower_idx = np .argsort (mean [reliability_idx ])[:top_n ]
250
263
left_indices = reliability_idx [lower_idx ]
@@ -258,18 +271,29 @@ def identify_bland_altman_salient_data(
258
271
# Sort indices by descending mean (rightmost values first)
259
272
right_sort_mean = remaining_idx [np .argsort (mean [remaining_idx ])[::- 1 ]]
260
273
261
- # Take top percentile of the rightmost points
274
+ # Take a percentile of the rightmost points
262
275
top_p_count = int (percentile * len (right_sort_mean ))
263
276
top_p_sorted = right_sort_mean [:top_p_count ]
264
277
278
+ # Check that there are enough data points left to identify the requested
279
+ # number of rightmost points
280
+ if top_p_count < top_n :
281
+ raise ValueError (
282
+ f"Too few data points ({ top_p_count } ) to identify the requested "
283
+ f"Bland-Altman right-most salient points ({ top_n } ). Increase the "
284
+ f"percentile requested ({ top_n } )"
285
+ )
286
+
265
287
# Get absolute difference from mean_diff (closeness to zero mean difference)
266
288
diff_distance = np .abs (diff [top_p_sorted ] - mean_diff )
267
289
268
290
# Sort rightmost points by closeness to zero diff
269
- upper_idx = np .argsort (diff_distance )
291
+ top_p_idx = np .argsort (diff_distance )
270
292
271
293
# Take top_n of them
272
- right_mask = right_sort_mean [upper_idx [:top_n ]]
294
+ upper_idx = top_p_sorted [top_p_idx ][:top_n ]
295
+ right_mask = np .zeros_like (reliability_mask , dtype = bool )
296
+ right_mask [upper_idx ] = True
273
297
274
298
return {
275
299
BASalientEntity .RELIABILITY_INDICES .value : reliability_idx ,
0 commit comments