Skip to content

Commit 5cfe97b

Browse files
authored
Re-enable ci for amd math fns (#1537)
* Re-enable ci for amd math fns * better amd gpu errs * print * amd intrs * Update Project.toml
1 parent a93b618 commit 5cfe97b

File tree

3 files changed

+62
-20
lines changed

3 files changed

+62
-20
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ EnzymeStaticArraysExt = "StaticArrays"
3030
CEnum = "0.4, 0.5"
3131
ChainRulesCore = "1"
3232
EnzymeCore = "0.7.5"
33-
Enzyme_jll = "0.0.122"
33+
Enzyme_jll = "0.0.123"
3434
GPUCompiler = "0.21, 0.22, 0.23, 0.24, 0.25, 0.26"
3535
LLVM = "6.1, 7"
3636
ObjectFile = "0.4"

src/compiler.jl

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1632,7 +1632,7 @@ function emit_error(B::LLVM.IRBuilder, orig, string)
16321632
string*=sprint(io->Base.show_backtrace(io, bt))
16331633
end
16341634

1635-
ct = if occursin("ptx", LLVM.triple(mod))
1635+
ct = if occursin("ptx", LLVM.triple(mod)) || occursin("amdgcn", LLVM.triple(mod))
16361636
GPUCompiler.emit_exception!(B, string, orig)
16371637
else
16381638
call!(B, funcT, func, LLVM.Value[globalstring_ptr!(B, string)])
@@ -5932,6 +5932,46 @@ function GPUCompiler.codegen(output::Symbol, job::CompilerJob{<:EnzymeTarget};
59325932
end
59335933
end
59345934
end
5935+
if parent_job.config.target isa GPUCompiler.GCNCompilerTarget
5936+
arg1 = ("acos", "acosh", "asin",
5937+
"asinh", "atan2", "atan",
5938+
"atanh", "cbrt", "ceil",
5939+
"copysign", "cos", "native_cos",
5940+
"cosh", "cospi", "i0",
5941+
"i1", "erfc", "erfcinv",
5942+
"erfcx", "erf", "erfinv",
5943+
"exp10", "native_exp10", "exp2",
5944+
"exp", "native_exp", "expm1",
5945+
"fabs", "fdim", "floor",
5946+
"fma", "fmax", "fmin",
5947+
"fmod", "frexp", "hypot",
5948+
"ilogb", "isfinite", "isinf",
5949+
"isnan", "j0", "j1",
5950+
"ldexp", "lgamma", "log10",
5951+
"native_log10", "log1p", "log2",
5952+
"log2", "logb", "log",
5953+
"native_log", "modf", "nearbyint",
5954+
"nextafter", "len3", "len4",
5955+
"ncdf", "ncdfinv", "pow",
5956+
"pown", "rcbrt", "remainder",
5957+
"remquo", "rhypot", "rint",
5958+
"rlen3", "rlen4", "round",
5959+
"rsqrt", "scalb", "scalbn",
5960+
"signbit", "sincos", "sincospi",
5961+
"sin", "native_sin", "sinh",
5962+
"sinpi", "sqrt", "native_sqrt",
5963+
"tan", "tanh", "tgamma",
5964+
"trunc", "y0", "y1")
5965+
for n in arg1, (T, pf, lpf) in ((LLVM.DoubleType(), "", "f64"), (LLVM.FloatType(), "f", "f32"))
5966+
fname = "__ocml_"*n*"_"*lpf
5967+
if !haskey(functions(mod), fname)
5968+
FT = LLVM.FunctionType(T, [T], vararg=false)
5969+
wrapper_f = LLVM.Function(mod, fname, FT)
5970+
llname = "llvm."*n*"."*lpf
5971+
push!(function_attributes(wrapper_f), StringAttribute("implements", llname))
5972+
end
5973+
end
5974+
end
59355975
end
59365976
for (name, fnty) in fnsToInject
59375977
for (T, JT, pf) in ((LLVM.DoubleType(), Float64, ""), (LLVM.FloatType(), Float32, "f"))

test/amdgpu.jl

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -38,15 +38,17 @@ function grad_exp_kernel(A, dA)
3838
return nothing
3939
end
4040

41-
# @testset "exp_kernel" begin
42-
# A = AMDGPU.ones(64,)
43-
# @roc groupsize=length(A) exp_kernel(A)
44-
# A = AMDGPU.ones(64,)
45-
# dA = similar(A)
46-
# dA .= 1
47-
# @roc groupsize=length(A) grad_exp_kernel(A, dA)
48-
# @test all(dA .== exp(1.f0))
49-
# end
41+
Enzyme.API.printall!(true)
42+
43+
@testset "exp_kernel" begin
44+
A = AMDGPU.ones(64,)
45+
@roc groupsize=length(A) exp_kernel(A)
46+
A = AMDGPU.ones(64,)
47+
dA = similar(A)
48+
dA .= 1
49+
@roc groupsize=length(A) grad_exp_kernel(A, dA)
50+
@test all(dA .== exp(1.f0))
51+
end
5052

5153
function cos_kernel(A)
5254
i = workitemIdx().x
@@ -61,12 +63,12 @@ function grad_cos_kernel(A, dA)
6163
return nothing
6264
end
6365

64-
# @testset "cos_kernel" begin
65-
# A = AMDGPU.ones(64,)
66-
# @roc groupsize=length(A) cos_kernel(A)
67-
# A = AMDGPU.ones(64,)
68-
# dA = similar(A)
69-
# dA .= 1
70-
# @roc groupsize=length(A) grad_cos_kernel(A, dA)
71-
# @test all(dA .≈ -sin(1.f0))
72-
# end
66+
@testset "cos_kernel" begin
67+
A = AMDGPU.ones(64,)
68+
@roc groupsize=length(A) cos_kernel(A)
69+
A = AMDGPU.ones(64,)
70+
dA = similar(A)
71+
dA .= 1
72+
@roc groupsize=length(A) grad_cos_kernel(A, dA)
73+
@test all(dA .≈ -sin(1.f0))
74+
end

0 commit comments

Comments
 (0)