Skip to content

knuths_sample! is incorrect when used with offset axes #645

@yurivish

Description

@yurivish

The function knuths_sample! is documented here.

julia> StatsBase.knuths_sample!(OffsetArray(collect(1:11), -5:5), zeros(11))
11-element Array{Float64,1}:
      7.0
 559108.0
      0.0
      0.0
      4.557992976e9
     10.0
     11.0
     11.0
      9.0
      8.0
      4.417700624e9

The behavior is due to multiple incorrect uses of @inbounds.

function knuths_sample!(rng::AbstractRNG, a::AbstractArray, x::AbstractArray;
initshuffle::Bool=true)
n = length(a)
k = length(x)
k <= n || error("length(x) should not exceed length(a)")
# initialize
for i = 1:k
@inbounds x[i] = a[i]
end
if initshuffle
@inbounds for j = 1:k
l = rand(rng, j:k)
if l != j
t = x[j]
x[j] = x[l]
x[l] = t
end
end
end
# scan remaining
s = Sampler(rng, 1:k)
for i = k+1:n
if rand(rng) * i < k # keep it with probability k / i
@inbounds x[rand(rng, s)] = a[i]
end
end
return x
end

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions