Skip to content

Commit 85fa306

Browse files
authored
Faster computation of quantiles in describe (#2909)
Computing all quantiles when we only need the median is significantly slower. Also avoid trying to compute quantiles for columns that do not support basic arithmetic since the failure only happens after sorting the vector, which is almost all of the work.
1 parent 115bcc9 commit 85fa306

File tree

2 files changed

+18
-5
lines changed

2 files changed

+18
-5
lines changed

src/abstractdataframe/abstractdataframe.jl

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -703,10 +703,23 @@ function get_stats(@nospecialize(col::Union{AbstractVector, Base.SkipMissing}),
703703
d = Dict{Symbol, Any}()
704704

705705
if :q25 in stats || :median in stats || :q75 in stats
706-
q = try quantile(col, [.25, .5, .75]) catch; (nothing, nothing, nothing) end
707-
d[:q25] = q[1]
708-
d[:median] = q[2]
709-
d[:q75] = q[3]
706+
# types that do not support basic arithmetic (like strings) will only fail
707+
# after sorting the data, so check this beforehand to fail early
708+
T = eltype(col)
709+
if isconcretetype(T) && !hasmethod(-, Tuple{T, T})
710+
d[:q25] = d[:median] = d[:q75] = nothing
711+
else
712+
mcol = Base.copymutable(col)
713+
if :q25 in stats
714+
d[:q25] = try quantile!(mcol, 0.25) catch; nothing; end
715+
end
716+
if :median in stats
717+
d[:median] = try quantile!(mcol, 0.50) catch; nothing; end
718+
end
719+
if :q75 in stats
720+
d[:q75] = try quantile!(mcol, 0.75) catch; nothing; end
721+
end
722+
end
710723
end
711724

712725
if :min in stats || :max in stats

test/dataframe.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -668,7 +668,7 @@ end
668668
nothing, nothing, nothing],
669669
min = [1.0, 1.0, "a", "a", Date(2000), 1],
670670
q25 = [1.75, 1.5, nothing, nothing, nothing, nothing],
671-
median = [2.5, 2.0, nothing, nothing, nothing, nothing],
671+
median = [2.5, 2.0, nothing, nothing, VERSION >= v"1.7.0-beta1.2" ? Date(2002) : nothing, nothing],
672672
q75 = [3.25, 2.5, nothing, nothing, nothing, nothing],
673673
max = [4.0, 3.0, "d", "c", Date(2004), 2],
674674
nunique = [nothing, nothing, 4, 3, 4, 2],

0 commit comments

Comments
 (0)