Skip to content

Commit 7561d29

Browse files
committed
add ability to force wavefrontsize64,
test rng with wavefrontsize64 on RDNA as well
1 parent cd25fb6 commit 7561d29

File tree

4 files changed

+14
-9
lines changed

4 files changed

+14
-9
lines changed

src/compiler/codegen.jl

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,14 +118,13 @@ end
118118

119119
function compiler_config(dev::HIP.HIPDevice;
120120
name::Union{String, Nothing} = nothing, kernel::Bool = true,
121-
unsafe_fp_atomics::Bool = true,
121+
unsafe_fp_atomics::Bool = true, wavefrontsize64::Bool = HIP.wavefrontsize(dev) == 64,
122122
)
123123
dev_isa, features = parse_llvm_features(HIP.gcn_arch(dev))
124124
if !isempty(features)
125125
features = "$features,"
126126
end
127127

128-
wavefrontsize64 = HIP.wavefrontsize(dev) == 64
129128
features = if wavefrontsize64
130129
features * "-wavefrontsize32,+wavefrontsize64"
131130
else

src/highlevel.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ register methods for the the `AMDGPU.Adaptor` type.
9393
rocconvert(arg) = adapt(Runtime.Adaptor(), arg)
9494

9595
const MACRO_KWARGS = [:launch]
96-
const COMPILER_KWARGS = [:name, :unsafe_fp_atomics]
96+
const COMPILER_KWARGS = [:name, :unsafe_fp_atomics, :wavefrontsize64]
9797
const LAUNCH_KWARGS = [:gridsize, :groupsize, :shmem, :stream, :cooperative]
9898

9999
"""

test/device/random.jl

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,11 @@ function apply_seed(seed)
1818
end
1919
end
2020

21-
@testset "rand($T), seed $seed" for T in (
21+
wavefrontsize64 = AMDGPU.HIP.wavefrontsize(AMDGPU.device()) == 32 ? [false, true] : [true]
22+
23+
@testset "rand($T), seed $seed, wavefrontsize64 = $wavefrontsize64" for T in (
2224
Int32, UInt32, Int64, UInt64, Int128, UInt128, Float16, Float32, Float64,
23-
), seed in (nothing, #=missing,=# 1234)
25+
), seed in (nothing, #=missing,=# 1234), wavefrontsize64 in wavefrontsize64
2426
# different kernel invocations should get different numbers
2527
@testset "across launches" begin
2628
function kernel(A::AbstractArray{T}, seed) where {T}
@@ -85,9 +87,9 @@ end
8587
end
8688
end
8789

88-
@testset "basic randn($T), seed $seed" for T in (
90+
@testset "basic randn($T), seed $seed, wavefrontsize64 = $wavefrontsize64" for T in (
8991
Float16, Float32, Float64,
90-
), seed in (nothing, #=missing,=# 1234)
92+
), seed in (nothing, #=missing,=# 1234), wavefrontsize64 in wavefrontsize64
9193
function kernel(A::AbstractArray{T}, seed) where {T}
9294
apply_seed(seed)
9395
tid = workitemIdx().x
@@ -108,9 +110,9 @@ end
108110
end
109111
end
110112

111-
@testset "basic randexp($T), seed $seed" for T in (
113+
@testset "basic randexp($T), seed $seed, wavefrontsize64 = $wavefrontsize64" for T in (
112114
Float16, Float32, Float64,
113-
), seed in (nothing, #=missing,=# 1234)
115+
), seed in (nothing, #=missing,=# 1234), wavefrontsize64 in wavefrontsize64
114116
function kernel(A::AbstractArray{T}, seed) where {T}
115117
apply_seed(seed)
116118
tid = workitemIdx().x

test/device/wavefront.jl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,4 +145,8 @@ end
145145
@roc kernel(RX)
146146
AMDGPU.synchronize()
147147
@allowscalar @test RX[1] == wavefrontsize
148+
149+
@roc wavefrontsize64=true kernel(RX)
150+
AMDGPU.synchronize()
151+
@allowscalar @test RX[1] == 64
148152
end

0 commit comments

Comments
 (0)