SIMD support for math intrinsics #379

simeonschaub · 2025-10-10T09:43:19Z

closes #376

(Removing the overrides doesn't really buy us much either since I am then getting a bunch of miscompilation, likely due to the presence of throw statements) I excempted `hypot` for now, since that actually seems to be used in `GPUArrays`

github-actions · 2025-10-10T09:44:20Z

Your PR requires formatting changes to meet the project's style guidelines.
Please consider running Runic (git runic sds/float16) to apply these changes.

Click here to view the suggested changes.

diff --git a/lib/intrinsics/ext/SPIRVIntrinsicsSIMDExt.jl b/lib/intrinsics/ext/SPIRVIntrinsicsSIMDExt.jl
index 1ecaa61..b1a3cd9 100644
--- a/lib/intrinsics/ext/SPIRVIntrinsicsSIMDExt.jl
+++ b/lib/intrinsics/ext/SPIRVIntrinsicsSIMDExt.jl
@@ -9,8 +9,8 @@ const known_intrinsics = String[]
 
 # Generate vectorized math intrinsics
 for N in [2, 3, 4, 8, 16], T in [Float16, Float32, Float64]
-    VT = :(Vec{$N,$T})
-    LVT = :(SIMD.LVec{$N,$T})
+    VT = :(Vec{$N, $T})
+    LVT = :(SIMD.LVec{$N, $T})
 
     @eval begin
         # Unary operations
@@ -98,8 +98,8 @@ for N in [2, 3, 4, 8, 16], T in [Float16, Float32, Float64]
     end
 
     # Special operations with Int32 parameters
-    VIntT = :(Vec{$N,Int32})
-    LVIntT = :(SIMD.LVec{$N,Int32})
+    VIntT = :(Vec{$N, Int32})
+    LVIntT = :(SIMD.LVec{$N, Int32})
 
     @eval begin
         @device_function SPIRVIntrinsics.ilogb(x::$VT) = $VIntT(@builtin_ccall("ilogb", $LVIntT, ($LVT,), x.data))
@@ -112,9 +112,9 @@ end
 # nan functions - take unsigned integer codes and return floats
 for N in [2, 3, 4, 8, 16]
     @eval begin
-        @device_function SPIRVIntrinsics.nan(nancode::Vec{$N,UInt16}) = Vec{$N,Float16}(@builtin_ccall("nan", SIMD.LVec{$N,Float16}, (SIMD.LVec{$N,UInt16},), nancode.data))
-        @device_function SPIRVIntrinsics.nan(nancode::Vec{$N,UInt32}) = Vec{$N,Float32}(@builtin_ccall("nan", SIMD.LVec{$N,Float32}, (SIMD.LVec{$N,UInt32},), nancode.data))
-        @device_function SPIRVIntrinsics.nan(nancode::Vec{$N,UInt64}) = Vec{$N,Float64}(@builtin_ccall("nan", SIMD.LVec{$N,Float64}, (SIMD.LVec{$N,UInt64},), nancode.data))
+        @device_function SPIRVIntrinsics.nan(nancode::Vec{$N, UInt16}) = Vec{$N, Float16}(@builtin_ccall("nan", SIMD.LVec{$N, Float16}, (SIMD.LVec{$N, UInt16},), nancode.data))
+        @device_function SPIRVIntrinsics.nan(nancode::Vec{$N, UInt32}) = Vec{$N, Float32}(@builtin_ccall("nan", SIMD.LVec{$N, Float32}, (SIMD.LVec{$N, UInt32},), nancode.data))
+        @device_function SPIRVIntrinsics.nan(nancode::Vec{$N, UInt64}) = Vec{$N, Float64}(@builtin_ccall("nan", SIMD.LVec{$N, Float64}, (SIMD.LVec{$N, UInt64},), nancode.data))
     end
 end
 
diff --git a/lib/intrinsics/src/utils.jl b/lib/intrinsics/src/utils.jl
index 3e81fe7..0fdb74b 100644
--- a/lib/intrinsics/src/utils.jl
+++ b/lib/intrinsics/src/utils.jl
@@ -88,7 +88,7 @@ Base.Experimental.@MethodTable(method_table)
 
 macro device_override(ex)
     esc(quote
-        Base.Experimental.@overlay($method_table, $ex)
+            Base.Experimental.@overlay($method_table, $ex)
     end)
 end
 
diff --git a/src/compiler/compilation.jl b/src/compiler/compilation.jl
index 4ed2811..d68668b 100644
--- a/src/compiler/compilation.jl
+++ b/src/compiler/compilation.jl
@@ -17,8 +17,8 @@ GPUCompiler.isintrinsic(job::OpenCLCompilerJob, fn::String) =
            job, fn) ||
     in(fn, known_intrinsics) ||
     let SPIRVIntrinsicsSIMDExt = Base.get_extension(SPIRVIntrinsics, :SPIRVIntrinsicsSIMDExt)
-        SPIRVIntrinsicsSIMDExt !== nothing && in(fn, SPIRVIntrinsicsSIMDExt.known_intrinsics)
-    end ||
+    SPIRVIntrinsicsSIMDExt !== nothing && in(fn, SPIRVIntrinsicsSIMDExt.known_intrinsics)
+end ||
     contains(fn, "__spirv_")
 
 
diff --git a/test/intrinsics.jl b/test/intrinsics.jl
index ff6ded4..677e9ec 100644
--- a/test/intrinsics.jl
+++ b/test/intrinsics.jl
@@ -165,61 +165,61 @@ end
     @test call_on_device(OpenCL.mad, x, y, z) ≈ x * y + z
 end
 
-@testset "SIMD - $N x $T" for N in simd_ns, T in float_types
-    v = Vec{N, T}(ntuple(_ -> rand(T), N))
-
-    # unary ops: sin, cos, sqrt
-    a = call_on_device(sin, v)
-    @test all(a[i] ≈ sin(v[i]) for i in 1:N)
-
-    b = call_on_device(cos, v)
-    @test all(b[i] ≈ cos(v[i]) for i in 1:N)
-
-    c = call_on_device(sqrt, v)
-    @test all(c[i] ≈ sqrt(v[i]) for i in 1:N)
-
-    # binary ops: max, hypot
-    w = Vec{N, T}(ntuple(_ -> rand(T), N))
-    d = call_on_device(max, v, w)
-    @test all(d[i] == max(v[i], w[i]) for i in 1:N)
-
-    broken = ispocl && T == Float16
-    if !broken
-        h = call_on_device(hypot, v, w)
-        @test all(h[i] ≈ hypot(v[i], w[i]) for i in 1:N)
-    end
-
-    # ternary op: fma
-    x = Vec{N, T}(ntuple(_ -> rand(T), N))
-    e = call_on_device(fma, v, w, x)
-    @test all(e[i] ≈ fma(v[i], w[i], x[i]) for i in 1:N)
-
-    # special cases: ilogb, ldexp, ^ with Int32, rootn
-    v_pos = Vec{N, T}(ntuple(_ -> rand(T) + T(1), N))
-    @test call_on_device(OpenCL.ilogb, v_pos) isa Vec{N, Int32} broken = broken
-
-    k = Vec{N, Int32}(ntuple(_ -> rand(Int32.(-5:5)), N))
-    @test let
-        ldexp_result = call_on_device(ldexp, v_pos, k)
-        all(ldexp_result[i] ≈ ldexp(v_pos[i], k[i]) for i in 1:N)
-    end broken = broken
-
-    base = Vec{N, T}(ntuple(_ -> rand(T) + T(0.5), N))
-    exp_int = Vec{N, Int32}(ntuple(_ -> rand(Int32.(0:3)), N))
-    @test let
-        pow_result = call_on_device(^, base, exp_int)
-        all(pow_result[i] ≈ base[i] ^ exp_int[i] for i in 1:N)
-    end broken = broken
-
-    rootn_base = Vec{N, T}(ntuple(_ -> rand(T) * T(10) + T(1), N))
-    rootn_n = Vec{N, Int32}(ntuple(_ -> rand(Int32.(2:4)), N))
-    @test call_on_device(OpenCL.rootn, rootn_base, rootn_n) isa Vec{N, T} broken = broken
-
-    # special cases: nan
-    nan_code = Vec{N, Base.uinttype(T)}(ntuple(_ -> rand(Base.uinttype(T)), N))
-    nan_result = call_on_device(OpenCL.nan, nan_code)
-    @test all(isnan(nan_result[i]) for i in 1:N)
-end
+        @testset "SIMD - $N x $T" for N in simd_ns, T in float_types
+            v = Vec{N, T}(ntuple(_ -> rand(T), N))
+
+            # unary ops: sin, cos, sqrt
+            a = call_on_device(sin, v)
+            @test all(a[i] ≈ sin(v[i]) for i in 1:N)
+
+            b = call_on_device(cos, v)
+            @test all(b[i] ≈ cos(v[i]) for i in 1:N)
+
+            c = call_on_device(sqrt, v)
+            @test all(c[i] ≈ sqrt(v[i]) for i in 1:N)
+
+            # binary ops: max, hypot
+            w = Vec{N, T}(ntuple(_ -> rand(T), N))
+            d = call_on_device(max, v, w)
+            @test all(d[i] == max(v[i], w[i]) for i in 1:N)
+
+            broken = ispocl && T == Float16
+            if !broken
+                h = call_on_device(hypot, v, w)
+                @test all(h[i] ≈ hypot(v[i], w[i]) for i in 1:N)
+            end
+
+            # ternary op: fma
+            x = Vec{N, T}(ntuple(_ -> rand(T), N))
+            e = call_on_device(fma, v, w, x)
+            @test all(e[i] ≈ fma(v[i], w[i], x[i]) for i in 1:N)
+
+            # special cases: ilogb, ldexp, ^ with Int32, rootn
+            v_pos = Vec{N, T}(ntuple(_ -> rand(T) + T(1), N))
+            @test call_on_device(OpenCL.ilogb, v_pos) isa Vec{N, Int32} broken = broken
+
+            k = Vec{N, Int32}(ntuple(_ -> rand(Int32.(-5:5)), N))
+            @test let
+                ldexp_result = call_on_device(ldexp, v_pos, k)
+                all(ldexp_result[i] ≈ ldexp(v_pos[i], k[i]) for i in 1:N)
+            end broken = broken
+
+            base = Vec{N, T}(ntuple(_ -> rand(T) + T(0.5), N))
+            exp_int = Vec{N, Int32}(ntuple(_ -> rand(Int32.(0:3)), N))
+            @test let
+                pow_result = call_on_device(^, base, exp_int)
+                all(pow_result[i] ≈ base[i]^exp_int[i] for i in 1:N)
+            end broken = broken
+
+            rootn_base = Vec{N, T}(ntuple(_ -> rand(T) * T(10) + T(1), N))
+            rootn_n = Vec{N, Int32}(ntuple(_ -> rand(Int32.(2:4)), N))
+            @test call_on_device(OpenCL.rootn, rootn_base, rootn_n) isa Vec{N, T} broken = broken
+
+            # special cases: nan
+            nan_code = Vec{N, Base.uinttype(T)}(ntuple(_ -> rand(Base.uinttype(T)), N))
+            nan_result = call_on_device(OpenCL.nan, nan_code)
+            @test all(isnan(nan_result[i]) for i in 1:N)
+        end
 
 end

codecov · 2025-10-10T09:45:48Z

Codecov Report

✅ All modified and coverable lines are covered by tests.
✅ Project coverage is 79.04%. Comparing base (d0e3372) to head (7e0917b).

Additional details and impacted files

@@               Coverage Diff               @@
##           sds/float16     #379      +/-   ##
===============================================
+ Coverage        79.01%   79.04%   +0.03%     
===============================================
  Files               12       12              
  Lines              672      673       +1     
===============================================
+ Hits               531      532       +1     
  Misses             141      141

☔ View full report in Codecov by Sentry.
📢 Have feedback on the report? Share it here.

🚀 New features to boost your workflow:

❄️ Test Analytics: Detect flaky tests, report on failures, and find test suite problems.

simeonschaub added 5 commits October 5, 2025 17:52

add Float16 math functions

8923030

add tests

edb3228

mark intrinsics missing in pocl as broken

8d2478e

(Removing the overrides doesn't really buy us much either since I am then getting a bunch of miscompilation, likely due to the presence of throw statements) I excempted `hypot` for now, since that actually seems to be used in `GPUArrays`

more extensive testing

d0e3372

SIMD support for math intrinsics

cfce757

simeonschaub added 3 commits October 10, 2025 13:07

fix test failures

b5a3685

mark ldexp as broken for pocl Float16

0e8dfb6

pown is also broken

7e0917b

simeonschaub linked an issue Oct 13, 2025 that may be closed by this pull request

support vectorized math intrinsics #376

Open

Base automatically changed from sds/float16 to master October 14, 2025 11:21

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

SIMD support for math intrinsics #379

SIMD support for math intrinsics #379

Uh oh!

simeonschaub commented Oct 10, 2025 •

edited

Loading

Uh oh!

github-actions bot commented Oct 10, 2025 •

edited

Loading

Uh oh!

codecov bot commented Oct 10, 2025 •

edited

Loading

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

1 participant

SIMD support for math intrinsics #379

Are you sure you want to change the base?

SIMD support for math intrinsics #379

Uh oh!

Conversation

simeonschaub commented Oct 10, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

github-actions bot commented Oct 10, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

codecov bot commented Oct 10, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Codecov Report

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

1 participant

simeonschaub commented Oct 10, 2025 •

edited

Loading

github-actions bot commented Oct 10, 2025 •

edited

Loading

codecov bot commented Oct 10, 2025 •

edited

Loading