Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lib/JLArrays/Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "JLArrays"
uuid = "27aeb0d3-9eb9-45fb-866b-73c2ecf80fcb"
authors = ["Tim Besard <[email protected]>"]
version = "0.3.0"
version = "0.3.1"

[deps]
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
Expand Down
3 changes: 2 additions & 1 deletion lib/JLArrays/src/JLArrays.jl
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ KernelAbstractions.allocate(::JLBackend, ::Type{T}, dims::Tuple) where T = JLArr
end

if KernelAbstractions.workgroupsize(kernel) <: DynamicSize && workgroupsize === nothing
workgroupsize = (1024,) # Vectorization, 4x unrolling, minimal grain size
workgroupsize = (MAXTHREADS,) # Vectorization, 4x unrolling, minimal grain size
end
iterspace, dynamic = partition(kernel, ndrange, workgroupsize)
# partition checked that the ndrange's agreed
Expand All @@ -403,6 +403,7 @@ else
end

function (obj::Kernel{JLBackend})(args...; ndrange=nothing, workgroupsize=nothing)
ndrange, workgroupsize, _, _ = launch_config(obj, ndrange, workgroupsize)
device_args = jlconvert.(args)
new_obj = convert_to_cpu(obj)
new_obj(device_args...; ndrange, workgroupsize)
Expand Down
5 changes: 3 additions & 2 deletions src/host/random.jl
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,10 @@ function Random.randn!(rng::RNG, A::AnyGPUArray{T}) where T <: Number
threads = (length(A) - 1) ÷ 2 + 1
@kernel function randn!(a, randstates)
i = @index(Global, Linear)
threadidx = @index(Local, Linear)
idx = 2*(i - 1) + 1
U1 = gpu_rand(T, i, randstates)
U2 = gpu_rand(T, i, randstates)
U1 = gpu_rand(T, threadidx, randstates)
U2 = gpu_rand(T, threadidx, randstates)
Z0 = sqrt(T(-2.0)*log(U1))*cos(T(2pi)*U2)
Z1 = sqrt(T(-2.0)*log(U1))*sin(T(2pi)*U2)
@inbounds a[idx] = Z0
Expand Down
16 changes: 12 additions & 4 deletions test/testsuite/random.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,25 @@
end
cpu_rng = Random.default_rng()

SEEDING_BROKEN = (rng != cpu_rng) && !contains(string(AT), "JLArray")

@testset "rand" begin # uniform
for T in eltypes, d in (10, (10,10))
@testset "$d $T" for T in eltypes, d in (10, (10, 10), (1024, 1024))
A = AT{T}(undef, d)
B = copy(A)
rand!(rng, A)
rand!(rng, B)
@test Array(A) != Array(B)

A = AT(rand(T, d))
B = AT(rand(T, d))

Random.seed!(rng)
Random.seed!(rng, 1)
rand!(rng, A)
Random.seed!(rng, 1)
rand!(rng, B)
@test all(Array(A) .== Array(B))
@test Array(A) == Array(B) broken=SEEDING_BROKEN && (prod(d) > length(rng.state))

if rng != cpu_rng
rand!(cpu_rng, A)
Expand All @@ -44,19 +49,22 @@
@testset "randn" begin # normally-distributed
# XXX: randn calls sqrt, and Base's sqrt(::Complex) performs
# checked type conversions that throw boxed numbers.
for T in filter(isrealfloattype, eltypes), d in (2, (2,2))
@testset "$d $T" for T in filter(isrealfloattype, eltypes), d in (2, (2, 2), (1024, 1024))
A = AT{T}(undef, d)
B = copy(A)
randn!(rng, A)
randn!(rng, B)
@test Array(A) != Array(B)

A = AT(rand(T, d))
B = AT(rand(T, d))

Random.seed!(rng)
Random.seed!(rng, 1)
randn!(rng, A)
Random.seed!(rng, 1)
randn!(rng, B)
@test Array(A) == Array(B)
@test Array(A) == Array(B) broken=SEEDING_BROKEN && (prod(d) > (2 * length(rng.state)))

if rng != cpu_rng
randn!(cpu_rng, A)
Expand Down
Loading