@@ -27,17 +27,67 @@ function fill_refs!(refs::AbstractArray, X::AbstractArray,
2727 end
2828end
2929
30+ if VERSION >= v " 1.10"
31+ const CUT_FMT = Printf. Format (" %.*g" )
32+ end
33+
3034"""
31- default_formatter(from, to, i; leftclosed, rightclosed)
35+ CategoricalArrays.default_formatter(from, to, i::Integer;
36+ leftclosed::Bool, rightclosed::Bool,
37+ sigdigits::Integer)
3238
33- Provide the default label format for the `cut(x, breaks)` method.
39+ Provide the default label format for the `cut(x, breaks)` method,
40+ which is `"[from, to)"` if `leftclosed` is `true` and `"[from, to)"` otherwise.
41+
42+ If they are floating points values, breaks are turned into to strings using
43+ `@sprintf("%.*g", sigdigits, break)`
44+ (or `to` using `@sprintf("%.*g", sigdigits, break)` for the last break).
3445"""
35- default_formatter (from, to, i; leftclosed, rightclosed) =
36- string (leftclosed ? " [" : " (" , from, " , " , to, rightclosed ? " ]" : " )" )
46+ function default_formatter (from, to, i:: Integer ;
47+ leftclosed:: Bool , rightclosed:: Bool ,
48+ sigdigits:: Integer )
49+ @static if VERSION >= v " 1.10"
50+ from_str = from isa AbstractFloat ?
51+ Printf. format (CUT_FMT, sigdigits, from) :
52+ string (from)
53+ to_str = to isa AbstractFloat ?
54+ Printf. format (CUT_FMT, sigdigits, to) :
55+ string (to)
56+ else
57+ from_str = from isa AbstractFloat ?
58+ Printf. format (Printf. Format (" %.$(sigdigits) g" ), from) :
59+ string (from)
60+ to_str = to isa AbstractFloat ?
61+ Printf. format (Printf. Format (" %.$(sigdigits) g" ), to) :
62+ string (to)
63+ end
64+ string (leftclosed ? " [" : " (" , from_str, " , " , to_str, rightclosed ? " ]" : " )" )
65+ end
66+
67+ """
68+ CategoricalArrays.numbered_formatter(from, to, i::Integer;
69+ leftclosed::Bool, rightclosed::Bool,
70+ sigdigits::Integer)
71+
72+ Provide the default label format for the `cut(x, ngroups)` method
73+ when `allowempty=true`, which is `"i: [from, to)"` if `leftclosed`
74+ is `true` and `"i: [from, to)"` otherwise.
75+
76+ If they are floating points values, breaks are turned into to strings using
77+ `@sprintf("%.*g", sigdigits, breaks)`
78+ (or `to` using `@sprintf("%.*g", sigdigits, break)` for the last break).
79+ """
80+ numbered_formatter (from, to, i:: Integer ;
81+ leftclosed:: Bool , rightclosed:: Bool ,
82+ sigdigits:: Integer ) =
83+ string (i, " : " ,
84+ default_formatter (from, to, i, leftclosed= leftclosed, rightclosed= rightclosed,
85+ sigdigits= sigdigits))
3786
3887@doc raw """
3988 cut(x::AbstractArray, breaks::AbstractVector;
4089 labels::Union{AbstractVector,Function},
90+ sigdigits::Integer=3,
4191 extend::Union{Bool,Missing}=false, allowempty::Bool=false)
4292
4393Cut a numeric array into intervals at values `breaks`
@@ -49,15 +99,25 @@ the last interval, which is closed on both ends, i.e. `[lower, upper]`.
4999If `x` accepts missing values (i.e. `eltype(x) >: Missing`) the returned array will
50100also accept them.
51101
102+ !!! note
103+ For floating point data, breaks may be rounded to `sigdigits` significant digits
104+ when generating interval labels, meaning that they may not reflect exactly the cutpoints
105+ used.
106+
52107# Keyword arguments
53108* `extend::Union{Bool, Missing}=false`: when `false`, an error is raised if some values
54109 in `x` fall outside of the breaks; when `true`, breaks are automatically added to include
55110 all values in `x`; when `missing`, values outside of the breaks generate `missing` entries.
56111* `labels::Union{AbstractVector, Function}`: a vector of strings, characters
57- or numbers giving the names to use for
58- the intervals; or a function `f(from, to, i; leftclosed, rightclosed)` that generates
112+ or numbers giving the names to use for the intervals; or a function
113+ `f(from, to, i::Integer ; leftclosed::Bool , rightclosed::Bool, sigdigits::Integer )` that generates
59114 the labels from the left and right interval boundaries and the group index. Defaults to
60- `"[from, to)"` (or `"[from, to]"` for the rightmost interval if `extend == true`).
115+ [`CategoricalArrays.default_formatter`](@ref), giving `"[from, to)"` (or `"[from, to]"`
116+ for the rightmost interval if `extend == true`).
117+ * `sigdigits::Integer=3`: the minimum number of significant digits to use in labels.
118+ This value is increased automatically if necessary so that rounded breaks are unique.
119+ Only used for floating point types and when `labels` is a function, in which case it
120+ is passed to it as a keyword argument.
61121* `allowempty::Bool=false`: when `false`, an error is raised if some breaks other than
62122 the last one appear multiple times, generating empty intervals; when `true`,
63123 duplicate breaks are allowed and the intervals they generate are kept as
@@ -69,19 +129,19 @@ julia> using CategoricalArrays
69129
70130julia> cut(-1:0.5:1, [0, 1], extend=true)
711315-element CategoricalArray{String,1,UInt32}:
72- "[-1.0, 0. 0)"
73- "[-1.0, 0. 0)"
74- "[0.0 , 1.0 ]"
75- "[0.0 , 1.0 ]"
76- "[0.0 , 1.0 ]"
132+ "[-1, 0)"
133+ "[-1, 0)"
134+ "[0, 1]"
135+ "[0, 1]"
136+ "[0, 1]"
77137
78138julia> cut(-1:0.5:1, 2)
791395-element CategoricalArray{String,1,UInt32}:
80- "Q1: [-1.0, 0. 0)"
81- "Q1: [-1.0, 0. 0)"
82- "Q2: [0.0 , 1.0 ]"
83- "Q2: [0.0 , 1.0 ]"
84- "Q2: [0.0 , 1.0 ]"
140+ "[-1, 0)"
141+ "[-1, 0)"
142+ "[0 , 1]"
143+ "[0 , 1]"
144+ "[0 , 1]"
85145
86146julia> cut(-1:0.5:1, 2, labels=["A", "B"])
871475-element CategoricalArray{String,1,UInt32}:
@@ -114,15 +174,17 @@ julia> cut(-1:0.5:1, 3, labels=fmt)
114174@inline function cut (x:: AbstractArray , breaks:: AbstractVector ;
115175 extend:: Union{Bool, Missing} = false ,
116176 labels:: Union{AbstractVector{<:SupportedTypes},Function} = default_formatter,
177+ sigdigits:: Integer = 3 ,
117178 allowempty:: Bool = false )
118- return _cut (x, breaks, extend, labels, allowempty)
179+ return _cut (x, breaks, extend, labels, sigdigits, allowempty)
119180end
120181
121182# Separate function for inferability (thanks to inlining of cut)
122183function _cut (x:: AbstractArray{T, N} , breaks:: AbstractVector ,
123184 extend:: Union{Bool, Missing} ,
124185 labels:: Union{AbstractVector{<:SupportedTypes},Function} ,
125- allowempty:: Bool = false ) where {T, N}
186+ sigdigits:: Integer ,
187+ allowempty:: Bool ) where {T, N}
126188 if ! issorted (breaks)
127189 breaks = sort (breaks)
128190 end
@@ -179,21 +241,60 @@ function _cut(x::AbstractArray{T, N}, breaks::AbstractVector,
179241 end
180242 end
181243
244+ # Find minimal number of digits so that distinct breaks remain so
245+ if eltype (breaks) <: AbstractFloat
246+ while true
247+ local i
248+ for outer i in 2 : lastindex (breaks)
249+ b1 = breaks[i- 1 ]
250+ b2 = breaks[i]
251+ isequal (b1, b2) && continue
252+
253+ @static if VERSION >= v " 1.9"
254+ b1_str = Printf. format (CUT_FMT, sigdigits, b1)
255+ b2_str = Printf. format (CUT_FMT, sigdigits, b2)
256+ else
257+ b1_str = Printf. format (Printf. Format (" %.$(sigdigits) g" ), b1)
258+ b2_str = Printf. format (Printf. Format (" %.$(sigdigits) g" ), b2)
259+ end
260+ if b1_str == b2_str
261+ sigdigits += 1
262+ break
263+ end
264+ end
265+ i == lastindex (breaks) && break
266+ end
267+ end
182268 n = length (breaks)
183269 n >= 2 || throw (ArgumentError (" at least two breaks must be provided when extend is not true" ))
184270 if labels isa Function
185271 from = breaks[1 : n- 1 ]
186272 to = breaks[2 : n]
187- firstlevel = labels (from[1 ], to[1 ], 1 ,
188- leftclosed= ! isequal (breaks[1 ], breaks[2 ]), rightclosed= false )
273+ local firstlevel
274+ try
275+ firstlevel = labels (from[1 ], to[1 ], 1 ,
276+ leftclosed= ! isequal (breaks[1 ], breaks[2 ]), rightclosed= false ,
277+ sigdigits= sigdigits)
278+ catch
279+ # Support functions defined before v1.0, where sigdigits did not exist
280+ Base. depwarn (" `labels` function is now required to accept a `sigdigits` keyword argument" ,
281+ :cut )
282+ labels_orig = labels
283+ labels = (from, to, i; leftclosed, rightclosed, sigdigits) ->
284+ labels_orig (from, to, i; leftclosed, rightclosed)
285+ firstlevel = labels_orig (from[1 ], to[1 ], 1 ,
286+ leftclosed= ! isequal (breaks[1 ], breaks[2 ]), rightclosed= false )
287+ end
189288 levs = Vector {typeof(firstlevel)} (undef, n- 1 )
190289 levs[1 ] = firstlevel
191290 for i in 2 : n- 2
192291 levs[i] = labels (from[i], to[i], i,
193- leftclosed= ! isequal (breaks[i], breaks[i+ 1 ]), rightclosed= false )
292+ leftclosed= ! isequal (breaks[i], breaks[i+ 1 ]), rightclosed= false ,
293+ sigdigits= sigdigits)
194294 end
195295 levs[end ] = labels (from[end ], to[end ], n- 1 ,
196- leftclosed= true , rightclosed= true )
296+ leftclosed= true , rightclosed= true ,
297+ sigdigits= sigdigits)
197298 else
198299 length (labels) == n- 1 ||
199300 throw (ArgumentError (" labels must be of length $(n- 1 ) , but got length $(length (labels)) " ))
@@ -213,14 +314,6 @@ function _cut(x::AbstractArray{T, N}, breaks::AbstractVector,
213314 CategoricalArray {S, N} (refs, pool)
214315end
215316
216- """
217- quantile_formatter(from, to, i; leftclosed, rightclosed)
218-
219- Provide the default label format for the `cut(x, ngroups)` method.
220- """
221- quantile_formatter (from, to, i; leftclosed, rightclosed) =
222- string (" Q" , i, " : " , leftclosed ? " [" : " (" , from, " , " , to, rightclosed ? " ]" : " )" )
223-
224317"""
225318Find first value in (sorted) `v` which is greater than or equal to each quantile
226319in (sorted) `qs`.
247340"""
248341 cut(x::AbstractArray, ngroups::Integer;
249342 labels::Union{AbstractVector{<:AbstractString},Function},
343+ sigdigits::Integer=3,
250344 allowempty::Bool=false)
251345
252346Cut a numeric array into `ngroups` quantiles.
@@ -257,19 +351,32 @@ but breaks are taken from actual data values instead of estimated quantiles.
257351If `x` contains `missing` values, they are automatically skipped when computing
258352quantiles.
259353
354+ !!! note
355+ For floating point data, breaks may be rounded to `sigdigits` significant digits
356+ when generating interval labels, meaning that they may not reflect exactly the cutpoints
357+ used.
358+
260359# Keyword arguments
261360* `labels::Union{AbstractVector, Function}`: a vector of strings, characters
262- or numbers giving the names to use for
263- the intervals; or a function `f(from, to, i; leftclosed, rightclosed)` that generates
361+ or numbers giving the names to use for the intervals; or a function
362+ `f(from, to, i::Integer ; leftclosed::Bool , rightclosed::Bool, sigdigits::Integer )` that generates
264363 the labels from the left and right interval boundaries and the group index. Defaults to
265- `"Qi: [from, to)"` (or `"Qi: [from, to]"` for the rightmost interval).
364+ [`CategoricalArrays.default_formatter`](@ref), giving `"[from, to)"` (or `"[from, to]"`
365+ for the rightmost interval if `extend == true`) if `allowempty=false`, otherwise to
366+ [`CategoricalArrays.numbered_formatter`](@ref), which prefixes the label with the quantile
367+ number to ensure uniqueness.
368+ * `sigdigits::Integer=3`: the minimum number of significant digits to use when rounding
369+ breaks for inclusion in generated labels. This value is increased automatically if necessary
370+ so that rounded breaks are unique. Only used for floating point types and when `labels` is a
371+ function, in which case it is passed to it as a keyword argument.
266372* `allowempty::Bool=false`: when `false`, an error is raised if some quantiles breakpoints
267373 other than the last one are equal, generating empty intervals;
268374 when `true`, duplicate breaks are allowed and the intervals they generate are kept as
269375 unused levels (but duplicate labels are not allowed).
270376"""
271377function cut (x:: AbstractArray , ngroups:: Integer ;
272- labels:: Union{AbstractVector{<:SupportedTypes},Function} = quantile_formatter,
378+ labels:: Union{AbstractVector{<:SupportedTypes},Function,Nothing} = nothing ,
379+ sigdigits:: Integer = 3 ,
273380 allowempty:: Bool = false )
274381 ngroups >= 1 || throw (ArgumentError (" ngroups must be strictly positive (got $ngroups )" ))
275382 sorted_x = eltype (x) >: Missing ? sort! (collect (skipmissing (x))) : sort (x)
@@ -286,5 +393,8 @@ function cut(x::AbstractArray, ngroups::Integer;
286393 " Pass `allowempty=true` to allow empty quantiles or " *
287394 " choose a lower value for `ngroups`." ))
288395 end
289- cut (x, breaks; labels= labels, allowempty= allowempty)
396+ if labels === nothing
397+ labels = allowempty ? numbered_formatter : default_formatter
398+ end
399+ return cut (x, breaks; labels= labels, sigdigits= sigdigits, allowempty= allowempty)
290400end
0 commit comments