-
Notifications
You must be signed in to change notification settings - Fork 13
Open
Description
If length of pool is much smaller than the number of entries we can run the following (working code):
function median_fast(x::PooledVector)
n = length(x)
p = sortperm(x.pool)
counts = zeros(Int, length(p))
for v in x.refs
counts[v] += 1
end
cum = 0
for (j,i) in enumerate(p)
cum += counts[i]
if isodd(n)
if cum >= div(n + 1, 2)
return middle(x.pool[i])
end
else
if cum >= div(n, 2)
if cum == div(n, 2)
return middle(x.pool[i], x.pool[p[j+1]])
else
return middle(x.pool[i])
end
end
end
end
error("unreachable reached")
end
Example timings:
julia> x = PooledArray(rand(1:100, 10^8));
julia> @time median_fast(x);
0.063734 seconds (4 allocations: 1.781 KiB)
julia> @time median_fast(x);
0.070556 seconds (4 allocations: 1.781 KiB)
julia> @time median(x);
0.931585 seconds (3 allocations: 762.940 MiB, 9.86% gc time)
julia> @time median(x);
0.955555 seconds (3 allocations: 762.940 MiB, 14.29% gc time)
Metadata
Metadata
Assignees
Labels
No labels