Skip to content

Commit 847a745

Browse files
authored
Implement skipmissing argument to levels (#391)
The argument is added by DataAPI 1.10 (JuliaData/DataAPI.jl#46). When `skipmissing=true`, the method for `CategoricalArray` can be slightly more efficient than the fallback defined in DataAPI as it avoids calling `unique`.
1 parent 3b2773a commit 847a745

File tree

3 files changed

+45
-3
lines changed

3 files changed

+45
-3
lines changed

src/array.jl

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -753,14 +753,29 @@ end
753753
leveltype(::Type{T}) where {T <: CategoricalArray} = leveltype(nonmissingtype(eltype(T)))
754754

755755
"""
756-
levels(x::CategoricalArray)
756+
levels(x::CategoricalArray; skipmissing=true)
757757
levels(x::CategoricalValue)
758758
759759
Return the levels of categorical array or value `x`.
760760
This may include levels which do not actually appear in the data
761761
(see [`droplevels!`](@ref)).
762+
`missing` will be included only if it appears in the data and
763+
`skipmissing=false` is passed.
764+
765+
The returned vector is an internal field of `x` which must not be mutated
766+
as doing so would corrupt it.
762767
"""
763-
DataAPI.levels(A::CategoricalArray) = levels(A.pool)
768+
@inline function DataAPI.levels(A::CatArrOrSub{T}; skipmissing::Bool=true) where T
769+
if eltype(A) >: Missing && !skipmissing
770+
if any(==(0), refs(A))
771+
T[levels(pool(A)); missing]
772+
else
773+
convert(Vector{T}, levels(pool(A)))
774+
end
775+
else
776+
levels(pool(A))
777+
end
778+
end
764779

765780
"""
766781
levels!(A::CategoricalArray, newlevels::Vector; allowmissing::Bool=false)

src/subarray.jl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# delegate methods for SubArrays to support view
22

3-
DataAPI.levels(sa::SubArray{T,N,P}) where {T,N,P<:CategoricalArray} = levels(parent(sa))
43
isordered(sa::SubArray{T,N,P}) where {T,N,P<:CategoricalArray} = isordered(parent(sa))
54
# This method cannot support allowmissing=true since that would modify the parent
65
levels!(sa::SubArray{T,N,P}, newlevels::Vector) where {T,N,P<:CategoricalArray} =

test/13_arraycommon.jl

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2260,4 +2260,32 @@ end
22602260
Vector{CategoricalVector{<:Any, <:Integer, <:Any, <:Any, Union{}}}
22612261
end
22622262

2263+
@testset "levels with skipmissing argument" begin
2264+
for x in (categorical(["a", "b", "a"], levels=["b", "c", "a"]),
2265+
view(categorical(["c", "b", "a"], levels=["b", "c", "a"]), 2:3))
2266+
@test @inferred(levels(x)) == ["b", "c", "a"]
2267+
@test @inferred(levels(x, skipmissing=true)) == ["b", "c", "a"]
2268+
@test @inferred(levels(x, skipmissing=false)) == ["b", "c", "a"]
2269+
end
2270+
2271+
for x in (categorical(Union{String, Missing}["a", "b", "a"], levels=["b", "c", "a"]),
2272+
view(categorical(Union{String, Missing}["c", "b", "a"], levels=["b", "c", "a"]), 2:3),
2273+
view(categorical(Union{String, Missing}[missing, "b", "a"], levels=["b", "c", "a"]), 2:3))
2274+
@test @inferred(levels(x)) == ["b", "c", "a"]
2275+
@test levels(x, skipmissing=true) == ["b", "c", "a"]
2276+
@test levels(x, skipmissing=true) isa Vector{String}
2277+
@test levels(x, skipmissing=false) == ["b", "c", "a"]
2278+
@test levels(x, skipmissing=false) isa Vector{Union{String, Missing}}
2279+
end
2280+
2281+
for x in (categorical(Union{String, Missing}["a", "b", missing], levels=["b", "c", "a"]),
2282+
view(categorical(Union{String, Missing}["c", "b", missing], levels=["b", "c", "a"]), 2:3))
2283+
@test @inferred(levels(x)) == ["b", "c", "a"]
2284+
@test levels(x, skipmissing=true) == ["b", "c", "a"]
2285+
@test levels(x, skipmissing=true) isa Vector{String}
2286+
@test levels(x, skipmissing=false) ["b", "c", "a", missing]
2287+
@test levels(x, skipmissing=false) isa Vector{Union{String, Missing}}
2288+
end
2289+
end
2290+
22632291
end

0 commit comments

Comments
 (0)