Skip to content

Commit 3f9cdbe

Browse files
authored
Merge pull request #822
use correct wavefrontsize for device-side rng
2 parents 20012bb + b724f6b commit 3f9cdbe

File tree

5 files changed

+19
-14
lines changed

5 files changed

+19
-14
lines changed

src/compiler/codegen.jl

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,14 +118,13 @@ end
118118

119119
function compiler_config(dev::HIP.HIPDevice;
120120
name::Union{String, Nothing} = nothing, kernel::Bool = true,
121-
unsafe_fp_atomics::Bool = true,
121+
unsafe_fp_atomics::Bool = true, wavefrontsize64::Bool = HIP.wavefrontsize(dev) == 64,
122122
)
123123
dev_isa, features = parse_llvm_features(HIP.gcn_arch(dev))
124124
if !isempty(features)
125125
features = "$features,"
126126
end
127127

128-
wavefrontsize64 = HIP.wavefrontsize(dev) == 64
129128
features = if wavefrontsize64
130129
features * "-wavefrontsize32,+wavefrontsize64"
131130
else

src/device/random.jl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -82,11 +82,10 @@ end
8282
threadId = workitemIdx().x +
8383
(workitemIdx().y - Int32(1)) * workgroupDim().x +
8484
(workitemIdx().z - Int32(1)) * workgroupDim().x * workgroupDim().y
85-
warpId = (threadId - Int32(1)) >> 0x5 + Int32(1) # fld1 by 32
85+
wavefrontsize_log2 = ifelse(wavefrontsize() == UInt32(32), 0x5, 0x6)
86+
warpId = (threadId - Int32(1)) >> wavefrontsize_log2 + Int32(1) # fld1 by 32
8687

87-
if field === :seed
88-
@inbounds global_random_seed()[1]
89-
elseif field === :key
88+
if field === :key
9089
@inbounds global_random_keys()[warpId]
9190
elseif field === :ctr1
9291
@inbounds global_random_counters()[warpId]
@@ -104,7 +103,8 @@ end
104103
threadId = workitemIdx().x +
105104
(workitemIdx().y - Int32(1)) * workgroupDim().x +
106105
(workitemIdx().z - Int32(1)) * workgroupDim().x * workgroupDim().y
107-
warpId = (threadId - Int32(1)) >> 0x5 + Int32(1) # fld1 by 32
106+
wavefrontsize_log2 = ifelse(wavefrontsize() == UInt32(32), 0x5, 0x6)
107+
warpId = (threadId - Int32(1)) >> wavefrontsize_log2 + Int32(1) # fld1 by 32
108108

109109
if field === :key
110110
@inbounds global_random_keys()[warpId] = x

src/highlevel.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ register methods for the the `AMDGPU.Adaptor` type.
9393
rocconvert(arg) = adapt(Runtime.Adaptor(), arg)
9494

9595
const MACRO_KWARGS = [:launch]
96-
const COMPILER_KWARGS = [:name, :unsafe_fp_atomics]
96+
const COMPILER_KWARGS = [:name, :unsafe_fp_atomics, :wavefrontsize64]
9797
const LAUNCH_KWARGS = [:gridsize, :groupsize, :shmem, :stream, :cooperative]
9898

9999
"""

test/device/random.jl

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,11 @@ function apply_seed(seed)
1818
end
1919
end
2020

21-
@testset "rand($T), seed $seed" for T in (
21+
wavefrontsize64 = AMDGPU.HIP.wavefrontsize(AMDGPU.device()) == 32 ? [false, true] : [true]
22+
23+
@testset "rand($T), seed $seed, wavefrontsize64 = $wavefrontsize64" for T in (
2224
Int32, UInt32, Int64, UInt64, Int128, UInt128, Float16, Float32, Float64,
23-
), seed in (nothing, #=missing,=# 1234)
25+
), seed in (nothing, #=missing,=# 1234), wavefrontsize64 in wavefrontsize64
2426
# different kernel invocations should get different numbers
2527
@testset "across launches" begin
2628
function kernel(A::AbstractArray{T}, seed) where {T}
@@ -85,9 +87,9 @@ end
8587
end
8688
end
8789

88-
@testset "basic randn($T), seed $seed" for T in (
90+
@testset "basic randn($T), seed $seed, wavefrontsize64 = $wavefrontsize64" for T in (
8991
Float16, Float32, Float64,
90-
), seed in (nothing, #=missing,=# 1234)
92+
), seed in (nothing, #=missing,=# 1234), wavefrontsize64 in wavefrontsize64
9193
function kernel(A::AbstractArray{T}, seed) where {T}
9294
apply_seed(seed)
9395
tid = workitemIdx().x
@@ -108,9 +110,9 @@ end
108110
end
109111
end
110112

111-
@testset "basic randexp($T), seed $seed" for T in (
113+
@testset "basic randexp($T), seed $seed, wavefrontsize64 = $wavefrontsize64" for T in (
112114
Float16, Float32, Float64,
113-
), seed in (nothing, #=missing,=# 1234)
115+
), seed in (nothing, #=missing,=# 1234), wavefrontsize64 in wavefrontsize64
114116
function kernel(A::AbstractArray{T}, seed) where {T}
115117
apply_seed(seed)
116118
tid = workitemIdx().x

test/device/wavefront.jl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,4 +145,8 @@ end
145145
@roc kernel(RX)
146146
AMDGPU.synchronize()
147147
@allowscalar @test RX[1] == wavefrontsize
148+
149+
@roc wavefrontsize64=true kernel(RX)
150+
AMDGPU.synchronize()
151+
@allowscalar @test RX[1] == 64
148152
end

0 commit comments

Comments
 (0)