Skip to content

Commit 831f010

Browse files
authored
fix nonunique bug (#3393)
1 parent 1a5da8a commit 831f010

File tree

5 files changed

+20
-5
lines changed

5 files changed

+20
-5
lines changed

NEWS.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@
2020
instead of using the interactive thread pool when Julia was started
2121
with `-tM,N` with N > 0
2222
([#3385](https://github.com/JuliaData/DataFrames.jl/pull/3385))
23+
* Correctly return `Bool[]` in the `nonunique` function applied to a data frame
24+
with a pulled column that has zero levels in the pool
25+
([#3393](https://github.com/JuliaData/DataFrames.jl/pull/3393))
2326

2427
# DataFrames.jl v1.6.1 Release Notes
2528

src/abstractdataframe/unique.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ function nonunique(df::AbstractDataFrame; keep::Symbol=:first)
8787
if !(keep in (:first, :last, :noduplicates))
8888
throw(ArgumentError("`keep` must be :first, :last, or :noduplicates"))
8989
end
90-
ncol(df) == 0 && return Bool[]
90+
nrow(df) == 0 && return Bool[]
9191
res = fill(true, nrow(df))
9292
cols = ntuple(i -> df[!, i], ncol(df))
9393
if keep == :first

src/groupeddataframe/utils.jl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,11 @@ function row_group_slots!(cols::NTuple{N, AbstractVector},
337337
nt = max(1, lg ÷ 100_000)
338338
end
339339
# if there are few rows per group limit the number of threads used
340-
nt = clamp(round(Int, (lg / 4) / ngroups - 2), 1, nt)
340+
if ngroups == 0
341+
nt = 1
342+
else
343+
nt = clamp(round(Int, (lg / 4) / ngroups - 2), 1, nt)
344+
end
341345

342346
seen = fill(false, ngroups)
343347
seen_vec = Vector{Vector{Bool}}(undef, nt)

test/duplicates.jl

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
module TestDuplicates
22

3-
using Test, DataFrames, CategoricalArrays, Random
3+
using Test, DataFrames, CategoricalArrays, Random, PooledArrays
44
const = isequal
55

66
@testset "nonunique" begin
@@ -30,15 +30,18 @@ const ≅ = isequal
3030
@test_throws ArgumentError unique!(df)
3131
@test_throws ArgumentError unique(df, true)
3232

33-
pdf = view(DataFrame(a=CategoricalArray(["a", "a", missing, missing, "b", missing, "a", missing]),
34-
b=CategoricalArray(["a", "b", missing, missing, "b", "a", "a", "a"])), :, :)
33+
pdf = view(DataFrame(a=CategoricalArray(["a", "a", missing, missing, "b", missing, "a", missing]),
34+
b=CategoricalArray(["a", "b", missing, missing, "b", "a", "a", "a"])), :, :)
3535
updf = DataFrame(a=CategoricalArray(["a", "a", missing, "b", missing]),
3636
b=CategoricalArray(["a", "b", missing, "b", "a"]))
3737
@test nonunique(pdf) == [false, false, false, true, false, false, true, true]
3838
@test nonunique(updf) == falses(5)
3939
@test updf unique(pdf)
4040
@test_throws ArgumentError unique!(pdf)
4141
@test_throws ArgumentError unique(pdf, true)
42+
43+
@test isempty(nonunique(DataFrame(a=PooledArray(Int[]))))
44+
@test typeof(nonunique(DataFrame(a=PooledArray(Int[])))) === Vector{Bool}
4245
end
4346

4447
@testset "nonunique, nonunique, unique! with extra argument" begin

test/grouping.jl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4531,4 +4531,9 @@ end
45314531
end
45324532
end
45334533

4534+
@testset "no levels in pooled grouping bug #3393" begin
4535+
@test isempty(groupby_checked(DataFrame(x=PooledArray([missing])), :x, skipmissing=true))
4536+
@test isempty(groupby_checked(DataFrame(x=categorical([missing])), :x, skipmissing=true))
4537+
end
4538+
45344539
end # module

0 commit comments

Comments
 (0)