@@ -226,6 +226,166 @@ returns a vector of quantiles, respectively at `[0.0, 0.2, 0.4, 0.6, 0.8, 1.0]`.
226
226
"""
227
227
nquantile (x, n:: Integer ) = quantile (x, (0 : n)/ n)
228
228
229
+ """
230
+ quantilerank(itr, value; method=:inc)
231
+
232
+ Compute the quantile position in the [0, 1] interval of `value` relative to collection `itr`.
233
+
234
+ Different definitions can be chosen via the `method` keyword argument.
235
+ Let `count_less` be the number of elements of `itr` that are less than `value`,
236
+ `count_equal` the number of elements of `itr` that are equal to `value`, `n` the length of `itr`,
237
+ `greatest_smaller` the highest value below `value` and `smallest_greater` the lowest value above `value`.
238
+ Then `method` supports the following definitions:
239
+
240
+ - `:inc` (default): Return a value in the range 0 to 1 inclusive.
241
+ Return `count_less / (n - 1)` if `value ∈ itr`, otherwise apply interpolation based on
242
+ definition 7 of quantile in Hyndman and Fan (1996)
243
+ (equivalent to Excel `PERCENTRANK` and `PERCENTRANK.INC`).
244
+ This definition corresponds to the lower semi-continuous inverse of
245
+ [`quantile`](@ref) with its default parameters.
246
+
247
+ - `:exc`: Return a value in the range 0 to 1 exclusive.
248
+ Return `(count_less + 1) / (n + 1)` if `value ∈ itr` otherwise apply interpolation
249
+ based on definition 6 of quantile in Hyndman and Fan (1996)
250
+ (equivalent to Excel `PERCENTRANK.EXC`).
251
+
252
+ - `:compete`: Return `count_less / (n - 1)` if `value ∈ itr`, otherwise
253
+ return `(count_less - 1) / (n - 1)`, without interpolation
254
+ (equivalent to MariaDB `PERCENT_RANK`, dplyr `percent_rank`).
255
+
256
+ - `:tied`: Return `(count_less + count_equal/2) / n`, without interpolation.
257
+ Based on the definition in Roscoe, J. T. (1975)
258
+ (equivalent to `"mean"` kind of SciPy `percentileofscore`).
259
+
260
+ - `:strict`: Return `count_less / n`, without interpolation
261
+ (equivalent to `"strict"` kind of SciPy `percentileofscore`).
262
+
263
+ - `:weak`: Return `(count_less + count_equal) / n`, without interpolation
264
+ (equivalent to `"weak"` kind of SciPy `percentileofscore`).
265
+
266
+ !!! note
267
+ An `ArgumentError` is thrown if `itr` contains `NaN` or `missing` values
268
+ or if `itr` contains fewer than two elements.
269
+
270
+ # References
271
+ Roscoe, J. T. (1975). [Fundamental Research Statistics for the Behavioral Sciences]
272
+ (http://www.bryanburnham.net/wp-content/uploads/2014/07/Fundamental-Statistics-for-the-Behavioral-Sciences-v2.0.pdf#page=57)",
273
+ 2nd ed., New York : Holt, Rinehart and Winston.
274
+
275
+ Hyndman, R.J and Fan, Y. (1996) "[Sample Quantiles in Statistical Packages]
276
+ (https://www.amherst.edu/media/view/129116/original/Sample+Quantiles.pdf)",
277
+ *The American Statistician*, Vol. 50, No. 4, pp. 361-365.
278
+
279
+ # Examples
280
+ ```julia
281
+ julia> using StatsBase
282
+
283
+ julia> v1 = [1, 1, 1, 2, 3, 4, 8, 11, 12, 13];
284
+
285
+ julia> v2 = [1, 2, 3, 5, 6, missing, 8];
286
+
287
+ julia> v3 = [1, 2, 3, 4, 4, 5, 6, 7, 8, 9];
288
+
289
+ julia> quantilerank(v1, 2)
290
+ 0.3333333333333333
291
+
292
+ julia> quantilerank(v1, 2, method=:exc), quantilerank(v1, 2, method=:tied)
293
+ (0.36363636363636365, 0.35)
294
+
295
+ # use `skipmissing` for vectors with missing entries.
296
+ julia> quantilerank(skipmissing(v2), 4)
297
+ 0.5
298
+
299
+ # use broadcasting with `Ref` to compute quantile rank for multiple values
300
+ julia> quantilerank.(Ref(v3), [4, 8])
301
+ 2-element Vector{Float64}:
302
+ 0.3333333333333333
303
+ 0.8888888888888888
304
+ ```
305
+ """
306
+ function quantilerank (itr, value; method:: Symbol = :inc )
307
+ ((value isa Number && isnan (value)) || ismissing (value)) &&
308
+ throw (ArgumentError (" `value` cannot be NaN or missing" ))
309
+ any (x -> ismissing (x) || (x isa Number && isnan (x)), itr) &&
310
+ throw (ArgumentError (" `itr` cannot contain missing or NaN entries" ))
311
+
312
+ count_less = count_equal = n = 0
313
+ greatest_smaller = smallest_greater = value
314
+ for x in itr
315
+ if x == value
316
+ count_equal += 1
317
+ elseif x < value
318
+ count_less += 1
319
+ if greatest_smaller == value || greatest_smaller < x
320
+ greatest_smaller = x
321
+ end
322
+ else
323
+ if smallest_greater == value || smallest_greater > x
324
+ smallest_greater = x
325
+ end
326
+ end
327
+ n += 1
328
+ end
329
+
330
+ n == 0 && throw (ArgumentError (" `itr` is empty. Pass a collection with at least two elements" ))
331
+ n == 1 && throw (ArgumentError (" `itr` has only 1 value. Pass a collection with at least two elements" ))
332
+
333
+ if method == :inc
334
+ if greatest_smaller == value
335
+ return 0.0
336
+ elseif count_equal > 0
337
+ return count_less / (n - 1 )
338
+ elseif smallest_greater == value
339
+ return 1.0
340
+ else
341
+ lower = (count_less - 1 ) / (n - 1 )
342
+ upper = count_less / (n - 1 )
343
+ ratio = (value - greatest_smaller) / (smallest_greater - greatest_smaller)
344
+ return lower + ratio * (upper - lower)
345
+ end
346
+ elseif method == :exc
347
+ if count_less == 0 && count_equal == 0
348
+ return 0.0
349
+ elseif count_less == 0
350
+ return 1.0 / (n + 1 )
351
+ elseif count_equal > 0
352
+ return (count_less + 1 ) / (n + 1 )
353
+ elseif smallest_greater == value
354
+ return 1.0
355
+ else
356
+ lower = count_less / (n + 1 )
357
+ upper = (count_less + 1 ) / (n + 1 )
358
+ ratio = (value - greatest_smaller) / (smallest_greater - greatest_smaller)
359
+ return lower + ratio * (upper - lower)
360
+ end
361
+ elseif method == :compete
362
+ if value > maximum (itr)
363
+ return 1.0
364
+ elseif value ≤ minimum (itr)
365
+ return 0.0
366
+ else
367
+ value ∈ itr && (count_less += 1 )
368
+ return (count_less - 1 ) / (n - 1 )
369
+ end
370
+ elseif method == :tied
371
+ return (count_less + count_equal/ 2 ) / n
372
+ elseif method == :strict
373
+ return count_less / n
374
+ elseif method == :weak
375
+ return (count_less + count_equal) / n
376
+ else
377
+ throw (ArgumentError (" method=:$method is not valid. Pass :inc, :exc, :compete, :tied, :strict or :weak." ))
378
+ end
379
+ end
380
+
381
+ """
382
+ percentilerank(itr, value; method=:inc)
383
+
384
+ Return the `q`th percentile of `value` in collection `itr`, i.e. [`quantilerank(itr, value)`](@ref) * 100.
385
+
386
+ See the [`quantilerank`](@ref) docstring for more details.
387
+ """
388
+ percentilerank (itr, value; method:: Symbol = :inc ) = quantilerank (itr, value, method= method) * 100
229
389
230
390
# ############################
231
391
#
0 commit comments