@@ -88,6 +88,11 @@ the last interval, which is closed on both ends, i.e. `[lower, upper]`.
88
88
If `x` accepts missing values (i.e. `eltype(x) >: Missing`) the returned array will
89
89
also accept them.
90
90
91
+ !!! note
92
+ For floating point data, breaks may be rounded to `sigdigits` significant digits
93
+ when generating interval labels, meaning that they may not reflect exactly the cutpoints
94
+ used.
95
+
91
96
# Keyword arguments
92
97
* `extend::Union{Bool, Missing}=false`: when `false`, an error is raised if some values
93
98
in `x` fall outside of the breaks; when `true`, breaks are automatically added to include
@@ -312,24 +317,20 @@ in (sorted) `qs`.
312
317
function find_breaks (v:: AbstractVector , qs:: AbstractVector )
313
318
n = length (qs)
314
319
breaks = similar (v, n)
315
- breaks_prev = similar (v, n)
316
- n == 0 && return (breaks, breaks_prev)
320
+ n == 0 && return breaks
317
321
318
322
i = 1
319
323
q = qs[1 ]
320
- @inbounds for j in eachindex (v)
321
- x = v[j]
324
+ @inbounds for x in v
322
325
# Use isless and isequal to differentiate -0.0 from 0.0
323
326
if isless (q, x) || isequal (q, x)
324
327
breaks[i] = x
325
- # FIXME : handle duplicated breaks
326
- breaks_prev[i] = v[clamp (j- 1 , firstindex (v), lastindex (v))]
327
328
i += 1
328
329
i > n && break
329
330
q = qs[i]
330
331
end
331
332
end
332
- return ( breaks, breaks_prev)
333
+ return breaks
333
334
end
334
335
335
336
"""
@@ -346,6 +347,11 @@ but breaks are taken from actual data values instead of estimated quantiles.
346
347
If `x` contains `missing` values, they are automatically skipped when computing
347
348
quantiles.
348
349
350
+ !!! note
351
+ For floating point data, breaks may be rounded to `sigdigits` significant digits
352
+ when generating interval labels, meaning that they may not reflect exactly the cutpoints
353
+ used.
354
+
349
355
# Keyword arguments
350
356
* `labels::Union{AbstractVector, Function}`: a vector of strings, characters
351
357
or numbers giving the names to use for the intervals; or a function
@@ -376,8 +382,7 @@ function cut(x::AbstractArray, ngroups::Integer;
376
382
throw (ArgumentError (" NaN values are not allowed in input vector" ))
377
383
end
378
384
qs = quantile! (sorted_x, (1 : (ngroups- 1 ))/ ngroups, sorted= true )
379
- breaks, breaks_prev = find_breaks (sorted_x, qs)
380
- breaks = [min_x; breaks; max_x]
385
+ breaks = [min_x; find_breaks (sorted_x, qs); max_x]
381
386
if ! allowempty && ! allunique (@view breaks[1 : end - 1 ])
382
387
throw (ArgumentError (" cannot compute $ngroups quantiles due to " *
383
388
" too many duplicated values in `x`. " *
@@ -386,38 +391,6 @@ function cut(x::AbstractArray, ngroups::Integer;
386
391
end
387
392
if labels === nothing
388
393
labels = allowempty ? numbered_formatter : default_formatter
389
-
390
- if eltype (breaks) <: AbstractFloat
391
- while true
392
- local i
393
- for outer i in 2 : lastindex (breaks)
394
- b1 = breaks[i- 1 ]
395
- b2 = breaks[i]
396
- isequal (b1, b2) && continue
397
-
398
- # Find minimal number of digits so that `floor` does not
399
- # return a value that is lower than value immediately below break
400
- # We skip the first break, which is the minimum and has no equivalent
401
- # in `breaks_prev`
402
- b1_rounded = round (b1, sigdigits= sigdigits)
403
- b2_rounded = round (b2, sigdigits= sigdigits)
404
- if i < lastindex (breaks) &&
405
- (isequal (b2_rounded, breaks_prev[i- 1 ]) || isless (b2_rounded, breaks_prev[i- 1 ]))
406
- sigdigits += 1
407
- break
408
- end
409
-
410
- # Find minimal number of digits so that breaks are unique
411
- b1_str = Printf. format (CUT_FMT, sigdigits, b1)
412
- b2_str = Printf. format (CUT_FMT, sigdigits, b2)
413
- if b1_str == b2_str
414
- sigdigits += 1
415
- break
416
- end
417
- end
418
- i == lastindex (breaks) && break
419
- end
420
- end
421
394
end
422
395
return cut (x, breaks; labels= labels, sigdigits= sigdigits, allowempty= allowempty)
423
396
end
0 commit comments