Skip to content

Commit e1acb38

Browse files
committed
Simplify logic
1 parent 062efb7 commit e1acb38

File tree

1 file changed

+14
-41
lines changed

1 file changed

+14
-41
lines changed

src/extras.jl

Lines changed: 14 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,11 @@ the last interval, which is closed on both ends, i.e. `[lower, upper]`.
8888
If `x` accepts missing values (i.e. `eltype(x) >: Missing`) the returned array will
8989
also accept them.
9090
91+
!!! note
92+
For floating point data, breaks may be rounded to `sigdigits` significant digits
93+
when generating interval labels, meaning that they may not reflect exactly the cutpoints
94+
used.
95+
9196
# Keyword arguments
9297
* `extend::Union{Bool, Missing}=false`: when `false`, an error is raised if some values
9398
in `x` fall outside of the breaks; when `true`, breaks are automatically added to include
@@ -312,24 +317,20 @@ in (sorted) `qs`.
312317
function find_breaks(v::AbstractVector, qs::AbstractVector)
313318
n = length(qs)
314319
breaks = similar(v, n)
315-
breaks_prev = similar(v, n)
316-
n == 0 && return (breaks, breaks_prev)
320+
n == 0 && return breaks
317321

318322
i = 1
319323
q = qs[1]
320-
@inbounds for j in eachindex(v)
321-
x = v[j]
324+
@inbounds for x in v
322325
# Use isless and isequal to differentiate -0.0 from 0.0
323326
if isless(q, x) || isequal(q, x)
324327
breaks[i] = x
325-
# FIXME : handle duplicated breaks
326-
breaks_prev[i] = v[clamp(j-1, firstindex(v), lastindex(v))]
327328
i += 1
328329
i > n && break
329330
q = qs[i]
330331
end
331332
end
332-
return (breaks, breaks_prev)
333+
return breaks
333334
end
334335

335336
"""
@@ -346,6 +347,11 @@ but breaks are taken from actual data values instead of estimated quantiles.
346347
If `x` contains `missing` values, they are automatically skipped when computing
347348
quantiles.
348349
350+
!!! note
351+
For floating point data, breaks may be rounded to `sigdigits` significant digits
352+
when generating interval labels, meaning that they may not reflect exactly the cutpoints
353+
used.
354+
349355
# Keyword arguments
350356
* `labels::Union{AbstractVector, Function}`: a vector of strings, characters
351357
or numbers giving the names to use for the intervals; or a function
@@ -376,8 +382,7 @@ function cut(x::AbstractArray, ngroups::Integer;
376382
throw(ArgumentError("NaN values are not allowed in input vector"))
377383
end
378384
qs = quantile!(sorted_x, (1:(ngroups-1))/ngroups, sorted=true)
379-
breaks, breaks_prev = find_breaks(sorted_x, qs)
380-
breaks = [min_x; breaks; max_x]
385+
breaks = [min_x; find_breaks(sorted_x, qs); max_x]
381386
if !allowempty && !allunique(@view breaks[1:end-1])
382387
throw(ArgumentError("cannot compute $ngroups quantiles due to " *
383388
"too many duplicated values in `x`. " *
@@ -386,38 +391,6 @@ function cut(x::AbstractArray, ngroups::Integer;
386391
end
387392
if labels === nothing
388393
labels = allowempty ? numbered_formatter : default_formatter
389-
390-
if eltype(breaks) <: AbstractFloat
391-
while true
392-
local i
393-
for outer i in 2:lastindex(breaks)
394-
b1 = breaks[i-1]
395-
b2 = breaks[i]
396-
isequal(b1, b2) && continue
397-
398-
# Find minimal number of digits so that `floor` does not
399-
# return a value that is lower than value immediately below break
400-
# We skip the first break, which is the minimum and has no equivalent
401-
# in `breaks_prev`
402-
b1_rounded = round(b1, sigdigits=sigdigits)
403-
b2_rounded = round(b2, sigdigits=sigdigits)
404-
if i < lastindex(breaks) &&
405-
(isequal(b2_rounded, breaks_prev[i-1]) || isless(b2_rounded, breaks_prev[i-1]))
406-
sigdigits += 1
407-
break
408-
end
409-
410-
# Find minimal number of digits so that breaks are unique
411-
b1_str = Printf.format(CUT_FMT, sigdigits, b1)
412-
b2_str = Printf.format(CUT_FMT, sigdigits, b2)
413-
if b1_str == b2_str
414-
sigdigits += 1
415-
break
416-
end
417-
end
418-
i == lastindex(breaks) && break
419-
end
420-
end
421394
end
422395
return cut(x, breaks; labels=labels, sigdigits=sigdigits, allowempty=allowempty)
423396
end

0 commit comments

Comments
 (0)