Skip to content

Commit bbac8dc

Browse files
authored
Make cut close last interval on the right (#409)
This is much more useful, though slightly breaking.
1 parent d61d911 commit bbac8dc

File tree

3 files changed

+37
-29
lines changed

3 files changed

+37
-29
lines changed

src/extras.jl

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@ function fill_refs!(refs::AbstractArray, X::AbstractArray,
1111

1212
if ismissing(x)
1313
refs[i] = 0
14-
elseif extend === true && x == upper
14+
elseif x == upper
1515
refs[i] = n-1
16-
elseif extend !== true && !(lower <= x < upper)
16+
elseif extend !== true && !(lower <= x <= upper)
1717
extend === missing ||
1818
throw(ArgumentError("value $x (at index $i) does not fall inside the breaks: " *
1919
"adapt them manually, or pass extend=true or extend=missing"))
@@ -41,17 +41,15 @@ Cut a numeric array into intervals at values `breaks`
4141
and return an ordered `CategoricalArray` indicating
4242
the interval into which each entry falls. Intervals are of the form `[lower, upper)`,
4343
i.e. the lower bound is included and the upper bound is excluded, except
44-
if `extend=true` the last interval, which is then closed on both ends,
45-
i.e. `[lower, upper]`.
44+
the last interval, which is closed on both ends, i.e. `[lower, upper]`.
4645
4746
If `x` accepts missing values (i.e. `eltype(x) >: Missing`) the returned array will
4847
also accept them.
4948
5049
# Keyword arguments
5150
* `extend::Union{Bool, Missing}=false`: when `false`, an error is raised if some values
5251
in `x` fall outside of the breaks; when `true`, breaks are automatically added to include
53-
all values in `x`, and the upper bound is included in the last interval; when `missing`,
54-
values outside of the breaks generate `missing` entries.
52+
all values in `x`; when `missing`, values outside of the breaks generate `missing` entries.
5553
* `labels::Union{AbstractVector, Function}`: a vector of strings, characters
5654
or numbers giving the names to use for
5755
the intervals; or a function `f(from, to, i; leftclosed, rightclosed)` that generates
@@ -200,7 +198,7 @@ function _cut(x::AbstractArray{T, N}, breaks::AbstractVector,
200198
end
201199
levs[end] = labels(from[end], to[end], n-1,
202200
leftclosed=breaks[end-1] != breaks[end],
203-
rightclosed=coalesce(extend, false))
201+
rightclosed=true)
204202
else
205203
length(labels) == n-1 ||
206204
throw(ArgumentError("labels must be of length $(n-1), but got length $(length(labels))"))

test/15_extras.jl

Lines changed: 31 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6,27 +6,37 @@ const ≅ = isequal
66

77
@testset "cut($(Union{Int, T})[...])" for T in (Union{}, Missing)
88
x = @inferred cut(Vector{Union{Int, T}}([2, 3, 5]), [1, 3, 6])
9-
@test x == ["[1, 3)", "[3, 6)", "[3, 6)"]
9+
@test x == ["[1, 3)", "[3, 6]", "[3, 6]"]
1010
@test isa(x, CategoricalVector{Union{String, T}})
1111
@test isordered(x)
12-
@test levels(x) == ["[1, 3)", "[3, 6)"]
12+
@test levels(x) == ["[1, 3)", "[3, 6]"]
13+
14+
@test cut(Vector{Union{T, Int}}([2, 3, 5]), [2, 5], extend=false) ==
15+
["[2, 5]", "[2, 5]", "[2, 5]"]
1316

1417
err = @test_throws ArgumentError cut(Vector{Union{T, Int}}([2, 3, 5]), [3, 6])
1518
@test err.value.msg == "value 2 (at index 1) does not fall inside the breaks: adapt them manually, or pass extend=true or extend=missing"
1619

1720

18-
err = @test_throws ArgumentError cut(Vector{Union{T, Int}}([2, 3, 5]), [2, 5])
19-
@test err.value.msg == "value 5 (at index 3) does not fall inside the breaks: adapt them manually, or pass extend=true or extend=missing"
20-
2121
if T === Missing
2222
x = @inferred cut(Vector{Union{T, Int}}([2, 3, 5]), [2, 5], extend=missing)
2323
else
2424
x = cut(Vector{Union{T, Int}}([2, 3, 5]), [2, 5], extend=missing)
2525
end
26-
@test x ["[2, 5)", "[2, 5)", missing]
26+
@test x ["[2, 5]", "[2, 5]", "[2, 5]"]
2727
@test isa(x, CategoricalVector{Union{String, Missing}})
2828
@test isordered(x)
29-
@test levels(x) == ["[2, 5)"]
29+
@test levels(x) == ["[2, 5]"]
30+
31+
if T === Missing
32+
x = @inferred cut(Vector{Union{T, Int}}([2, 3, 6]), [2, 5], extend=missing)
33+
else
34+
x = cut(Vector{Union{T, Int}}([2, 3, 6]), [2, 5], extend=missing)
35+
end
36+
@test x ["[2, 5]", "[2, 5]", missing]
37+
@test isa(x, CategoricalVector{Union{String, Missing}})
38+
@test isordered(x)
39+
@test levels(x) == ["[2, 5]"]
3040

3141
x = @inferred cut(Vector{Union{T, Int}}([2, 3, 5]), [3, 6], extend=true)
3242
@test x == ["[2, 3)", "[3, 6]", "[3, 6]"]
@@ -40,10 +50,10 @@ const ≅ = isequal
4050
@test levels(x) == ["[2, 3)", "[3, 6]"]
4151

4252
x = @inferred cut(Vector{Union{T, Int}}([1, 2, 4]), [1, 3, 6])
43-
@test x == ["[1, 3)", "[1, 3)", "[3, 6)"]
53+
@test x == ["[1, 3)", "[1, 3)", "[3, 6]"]
4454
@test isa(x, CategoricalVector{Union{String, T}})
4555
@test isordered(x)
46-
@test levels(x) == ["[1, 3)", "[3, 6)"]
56+
@test levels(x) == ["[1, 3)", "[3, 6]"]
4757

4858
x = @inferred cut(Vector{Union{T, Int}}([1, 2, 4]), [3, 6], extend=true)
4959
@test x == ["[1, 3)", "[1, 3)", "[3, 6]"]
@@ -67,10 +77,10 @@ const ≅ = isequal
6777
breaks = [18, 25, 35, 60, 100]
6878
x = @inferred cut(Vector{Union{T, Int}}(ages), breaks)
6979
@test x == ["[18, 25)", "[18, 25)", "[25, 35)", "[25, 35)", "[18, 25)", "[18, 25)",
70-
"[35, 60)", "[25, 35)", "[60, 100)", "[35, 60)", "[35, 60)", "[25, 35)"]
80+
"[35, 60)", "[25, 35)", "[60, 100]", "[35, 60)", "[35, 60)", "[25, 35)"]
7181
@test isa(x, CategoricalVector{Union{String, T}})
7282
@test isordered(x)
73-
@test levels(x) == ["[18, 25)", "[25, 35)", "[35, 60)", "[60, 100)"]
83+
@test levels(x) == ["[18, 25)", "[25, 35)", "[35, 60)", "[60, 100]"]
7484

7585
breaks = [1, 6, 3] # Unsorted breaks
7686
labels = ["b", "a"] # Differs from lexical ordering
@@ -83,10 +93,10 @@ const ≅ = isequal
8393
@test levels(x) == ["b", "a"]
8494

8595
x = @inferred cut(Matrix{Union{Float64, T}}([-1.1 3.0; 1.456 10.394]), [-2.134, 3.0, 12.5])
86-
@test x == ["[-2.134, 3.0)" "[3.0, 12.5)"; "[-2.134, 3.0)" "[3.0, 12.5)"]
96+
@test x == ["[-2.134, 3.0)" "[3.0, 12.5]"; "[-2.134, 3.0)" "[3.0, 12.5]"]
8797
@test isa(x, CategoricalMatrix{Union{String, T}})
8898
@test isordered(x)
89-
@test levels(x) == ["[-2.134, 3.0)", "[3.0, 12.5)"]
99+
@test levels(x) == ["[-2.134, 3.0)", "[3.0, 12.5]"]
90100

91101
labels = 0:2:8
92102
x = @inferred cut(Vector{Union{T, Int}}(1:8), 0:2:10, labels=labels)
@@ -179,7 +189,7 @@ end
179189
@test_throws ArgumentError cut(1:10, [1, 5, 5, 11])
180190
y = cut(1:10, [1, 5, 5, 11], allowempty=true)
181191
@test y == cut(1:10, [1, 5, 11])
182-
@test levels(y) == ["[1, 5)", "(5, 5)", "[5, 11)"]
192+
@test levels(y) == ["[1, 5)", "(5, 5)", "[5, 11]"]
183193

184194
@test_throws ArgumentError cut(1:10, [1, 5, 5, 5, 11])
185195
@test_throws ArgumentError cut(1:10, [1, 5, 5, 11],
@@ -191,29 +201,29 @@ end
191201

192202
@test_throws ArgumentError cut(1:10, [1, 5, 5, 11], labels=string.(1:3))
193203
y = cut(1:10, [1, 5, 5, 11], allowempty=true, labels=string.(1:3))
194-
@test y == recode(cut(1:10, [1, 5, 11]), "[1, 5)" => "1", "[5, 11)" => "3")
204+
@test y == recode(cut(1:10, [1, 5, 11]), "[1, 5)" => "1", "[5, 11]" => "3")
195205
@test levels(y) == string.(1:3)
196206

197207
@test_throws ArgumentError cut(1:10, [1, 5, 5, 5, 11], labels=string.(1:4))
198208
y = cut(1:10, [1, 5, 5, 5, 11], allowempty=true, labels=string.(1:4))
199-
@test y == recode(cut(1:10, [1, 5, 11]), "[1, 5)" => "1", "[5, 11)" => "4")
209+
@test y == recode(cut(1:10, [1, 5, 11]), "[1, 5)" => "1", "[5, 11]" => "4")
200210
@test levels(y) == string.(1:4)
201211

202212
@test_throws ArgumentError cut(1:10, [1, 5, 5, 5, 5, 11], labels=string.(1:5))
203213
y = cut(1:10, [1, 5, 5, 5, 5, 11], allowempty=true, labels=string.(1:5))
204-
@test y == recode(cut(1:10, [1, 5, 11]), "[1, 5)" => "1", "[5, 11)" => "5")
214+
@test y == recode(cut(1:10, [1, 5, 11]), "[1, 5)" => "1", "[5, 11]" => "5")
205215
@test levels(y) == string.(1:5)
206216

207217
@test_throws ArgumentError cut(1:10, [1, 3, 3, 5, 5, 11], labels=string.(1:5))
208218
y = cut(1:10, [1, 3, 3, 5, 5, 11], allowempty=true, labels=string.(1:5))
209219
@test y == recode(cut(1:10, [1, 3, 5, 11]),
210-
"[1, 3)" => "1", "[3, 5)" => "3", "[5, 11)" => "5")
220+
"[1, 3)" => "1", "[3, 5)" => "3", "[5, 11]" => "5")
211221
@test levels(y) == string.(1:5)
212222

213223
@test_throws ArgumentError cut(1:10, [1, 3, 3, 3, 5, 5, 5, 11], labels=string.(1:7))
214224
y = cut(1:10, [1, 3, 3, 3, 5, 5, 5, 11], allowempty=true, labels=string.(1:7))
215225
@test y == recode(cut(1:10, [1, 3, 5, 11]),
216-
"[1, 3)" => "1", "[3, 5)" => "4", "[5, 11)" => "7")
226+
"[1, 3)" => "1", "[3, 5)" => "4", "[5, 11]" => "7")
217227
@test levels(y) == string.(1:7)
218228

219229
@test_throws ArgumentError cut(1:10, [1, 3, 5, 5, 11],
@@ -255,9 +265,9 @@ end
255265
end
256266

257267
@testset "cut with extend=missing" begin
258-
x = @inferred cut([-0.0, 0.0, 1.0, 2.0, 3.0, 4.0], [-0.0, 0.0, 3.0],
268+
x = @inferred cut([-0.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0], [-0.0, 0.0, 3.0],
259269
labels=[-0.0, 0.0], extend=missing)
260-
@test x [-0.0, 0.0, 0.0, 0.0, missing, missing]
270+
@test x [-0.0, 0.0, 0.0, 0.0, 0.0, missing, missing]
261271
@test x isa CategoricalArray{Union{Missing, Float64},1,UInt32}
262272
@test isordered(x)
263273
@test levels(x) == [-0.0, 0.0]

test/17_deprecated.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ const ≅ = isequal
1010
@test x ["a", missing, missing]
1111

1212
x = cut([1, missing, 100], [1, 2], allow_missing=true)
13-
@test x ["[1, 2)", missing, missing]
13+
@test x ["[1, 2]", missing, missing]
1414
end
1515

1616
end

0 commit comments

Comments
 (0)