diff --git a/src/host/random.jl b/src/host/random.jl index 256ecea9..7759e841 100644 --- a/src/host/random.jl +++ b/src/host/random.jl @@ -96,9 +96,10 @@ function Random.randn!(rng::RNG, A::AnyGPUArray{T}) where T <: Number threads = (length(A) - 1) ÷ 2 + 1 @kernel function randn!(a, randstates) i = @index(Global, Linear) + threadidx = @index(Local, Linear) idx = 2*(i - 1) + 1 - U1 = gpu_rand(T, i, randstates) - U2 = gpu_rand(T, i, randstates) + U1 = gpu_rand(T, threadidx, randstates) + U2 = gpu_rand(T, threadidx, randstates) Z0 = sqrt(T(-2.0)*log(U1))*cos(T(2pi)*U2) Z1 = sqrt(T(-2.0)*log(U1))*sin(T(2pi)*U2) @inbounds a[idx] = Z0 diff --git a/test/testsuite/random.jl b/test/testsuite/random.jl index f2cf832a..f9430258 100644 --- a/test/testsuite/random.jl +++ b/test/testsuite/random.jl @@ -44,7 +44,7 @@ @testset "randn" begin # normally-distributed # XXX: randn calls sqrt, and Base's sqrt(::Complex) performs # checked type conversions that throw boxed numbers. - for T in filter(isrealfloattype, eltypes), d in (2, (2,2)) + for T in filter(isrealfloattype, eltypes), d in (2, (2, 2), (128, 128)) A = AT{T}(undef, d) B = copy(A) randn!(rng, A) @@ -56,7 +56,7 @@ randn!(rng, A) Random.seed!(rng, 1) randn!(rng, B) - @test Array(A) == Array(B) + @test Array(A) ≈ Array(B) if rng != cpu_rng randn!(cpu_rng, A)