Skip to content

Conversation

@simeonschaub
Copy link
Member

@simeonschaub simeonschaub commented Oct 10, 2025

closes #376

(Removing the overrides doesn't really buy us much either since I am
then getting a bunch of miscompilation, likely due to the presence of
throw statements)

I excempted `hypot` for now, since that actually seems to be used in
`GPUArrays`
@github-actions
Copy link
Contributor

github-actions bot commented Oct 10, 2025

Your PR requires formatting changes to meet the project's style guidelines.
Please consider running Runic (git runic sds/float16) to apply these changes.

Click here to view the suggested changes.
diff --git a/lib/intrinsics/ext/SPIRVIntrinsicsSIMDExt.jl b/lib/intrinsics/ext/SPIRVIntrinsicsSIMDExt.jl
index 1ecaa61..b1a3cd9 100644
--- a/lib/intrinsics/ext/SPIRVIntrinsicsSIMDExt.jl
+++ b/lib/intrinsics/ext/SPIRVIntrinsicsSIMDExt.jl
@@ -9,8 +9,8 @@ const known_intrinsics = String[]
 
 # Generate vectorized math intrinsics
 for N in [2, 3, 4, 8, 16], T in [Float16, Float32, Float64]
-    VT = :(Vec{$N,$T})
-    LVT = :(SIMD.LVec{$N,$T})
+    VT = :(Vec{$N, $T})
+    LVT = :(SIMD.LVec{$N, $T})
 
     @eval begin
         # Unary operations
@@ -98,8 +98,8 @@ for N in [2, 3, 4, 8, 16], T in [Float16, Float32, Float64]
     end
 
     # Special operations with Int32 parameters
-    VIntT = :(Vec{$N,Int32})
-    LVIntT = :(SIMD.LVec{$N,Int32})
+    VIntT = :(Vec{$N, Int32})
+    LVIntT = :(SIMD.LVec{$N, Int32})
 
     @eval begin
         @device_function SPIRVIntrinsics.ilogb(x::$VT) = $VIntT(@builtin_ccall("ilogb", $LVIntT, ($LVT,), x.data))
@@ -112,9 +112,9 @@ end
 # nan functions - take unsigned integer codes and return floats
 for N in [2, 3, 4, 8, 16]
     @eval begin
-        @device_function SPIRVIntrinsics.nan(nancode::Vec{$N,UInt16}) = Vec{$N,Float16}(@builtin_ccall("nan", SIMD.LVec{$N,Float16}, (SIMD.LVec{$N,UInt16},), nancode.data))
-        @device_function SPIRVIntrinsics.nan(nancode::Vec{$N,UInt32}) = Vec{$N,Float32}(@builtin_ccall("nan", SIMD.LVec{$N,Float32}, (SIMD.LVec{$N,UInt32},), nancode.data))
-        @device_function SPIRVIntrinsics.nan(nancode::Vec{$N,UInt64}) = Vec{$N,Float64}(@builtin_ccall("nan", SIMD.LVec{$N,Float64}, (SIMD.LVec{$N,UInt64},), nancode.data))
+        @device_function SPIRVIntrinsics.nan(nancode::Vec{$N, UInt16}) = Vec{$N, Float16}(@builtin_ccall("nan", SIMD.LVec{$N, Float16}, (SIMD.LVec{$N, UInt16},), nancode.data))
+        @device_function SPIRVIntrinsics.nan(nancode::Vec{$N, UInt32}) = Vec{$N, Float32}(@builtin_ccall("nan", SIMD.LVec{$N, Float32}, (SIMD.LVec{$N, UInt32},), nancode.data))
+        @device_function SPIRVIntrinsics.nan(nancode::Vec{$N, UInt64}) = Vec{$N, Float64}(@builtin_ccall("nan", SIMD.LVec{$N, Float64}, (SIMD.LVec{$N, UInt64},), nancode.data))
     end
 end
 
diff --git a/lib/intrinsics/src/utils.jl b/lib/intrinsics/src/utils.jl
index 3e81fe7..0fdb74b 100644
--- a/lib/intrinsics/src/utils.jl
+++ b/lib/intrinsics/src/utils.jl
@@ -88,7 +88,7 @@ Base.Experimental.@MethodTable(method_table)
 
 macro device_override(ex)
     esc(quote
-        Base.Experimental.@overlay($method_table, $ex)
+            Base.Experimental.@overlay($method_table, $ex)
     end)
 end
 
diff --git a/src/compiler/compilation.jl b/src/compiler/compilation.jl
index 4ed2811..d68668b 100644
--- a/src/compiler/compilation.jl
+++ b/src/compiler/compilation.jl
@@ -17,8 +17,8 @@ GPUCompiler.isintrinsic(job::OpenCLCompilerJob, fn::String) =
            job, fn) ||
     in(fn, known_intrinsics) ||
     let SPIRVIntrinsicsSIMDExt = Base.get_extension(SPIRVIntrinsics, :SPIRVIntrinsicsSIMDExt)
-        SPIRVIntrinsicsSIMDExt !== nothing && in(fn, SPIRVIntrinsicsSIMDExt.known_intrinsics)
-    end ||
+    SPIRVIntrinsicsSIMDExt !== nothing && in(fn, SPIRVIntrinsicsSIMDExt.known_intrinsics)
+end ||
     contains(fn, "__spirv_")
 
 
diff --git a/test/intrinsics.jl b/test/intrinsics.jl
index ff6ded4..677e9ec 100644
--- a/test/intrinsics.jl
+++ b/test/intrinsics.jl
@@ -165,61 +165,61 @@ end
     @test call_on_device(OpenCL.mad, x, y, z) ≈ x * y + z
 end
 
-@testset "SIMD - $N x $T" for N in simd_ns, T in float_types
-    v = Vec{N, T}(ntuple(_ -> rand(T), N))
-
-    # unary ops: sin, cos, sqrt
-    a = call_on_device(sin, v)
-    @test all(a[i] ≈ sin(v[i]) for i in 1:N)
-
-    b = call_on_device(cos, v)
-    @test all(b[i] ≈ cos(v[i]) for i in 1:N)
-
-    c = call_on_device(sqrt, v)
-    @test all(c[i] ≈ sqrt(v[i]) for i in 1:N)
-
-    # binary ops: max, hypot
-    w = Vec{N, T}(ntuple(_ -> rand(T), N))
-    d = call_on_device(max, v, w)
-    @test all(d[i] == max(v[i], w[i]) for i in 1:N)
-
-    broken = ispocl && T == Float16
-    if !broken
-        h = call_on_device(hypot, v, w)
-        @test all(h[i] ≈ hypot(v[i], w[i]) for i in 1:N)
-    end
-
-    # ternary op: fma
-    x = Vec{N, T}(ntuple(_ -> rand(T), N))
-    e = call_on_device(fma, v, w, x)
-    @test all(e[i] ≈ fma(v[i], w[i], x[i]) for i in 1:N)
-
-    # special cases: ilogb, ldexp, ^ with Int32, rootn
-    v_pos = Vec{N, T}(ntuple(_ -> rand(T) + T(1), N))
-    @test call_on_device(OpenCL.ilogb, v_pos) isa Vec{N, Int32} broken = broken
-
-    k = Vec{N, Int32}(ntuple(_ -> rand(Int32.(-5:5)), N))
-    @test let
-        ldexp_result = call_on_device(ldexp, v_pos, k)
-        all(ldexp_result[i] ≈ ldexp(v_pos[i], k[i]) for i in 1:N)
-    end broken = broken
-
-    base = Vec{N, T}(ntuple(_ -> rand(T) + T(0.5), N))
-    exp_int = Vec{N, Int32}(ntuple(_ -> rand(Int32.(0:3)), N))
-    @test let
-        pow_result = call_on_device(^, base, exp_int)
-        all(pow_result[i] ≈ base[i] ^ exp_int[i] for i in 1:N)
-    end broken = broken
-
-    rootn_base = Vec{N, T}(ntuple(_ -> rand(T) * T(10) + T(1), N))
-    rootn_n = Vec{N, Int32}(ntuple(_ -> rand(Int32.(2:4)), N))
-    @test call_on_device(OpenCL.rootn, rootn_base, rootn_n) isa Vec{N, T} broken = broken
-
-    # special cases: nan
-    nan_code = Vec{N, Base.uinttype(T)}(ntuple(_ -> rand(Base.uinttype(T)), N))
-    nan_result = call_on_device(OpenCL.nan, nan_code)
-    @test all(isnan(nan_result[i]) for i in 1:N)
-end
+        @testset "SIMD - $N x $T" for N in simd_ns, T in float_types
+            v = Vec{N, T}(ntuple(_ -> rand(T), N))
+
+            # unary ops: sin, cos, sqrt
+            a = call_on_device(sin, v)
+            @test all(a[i] ≈ sin(v[i]) for i in 1:N)
+
+            b = call_on_device(cos, v)
+            @test all(b[i] ≈ cos(v[i]) for i in 1:N)
+
+            c = call_on_device(sqrt, v)
+            @test all(c[i] ≈ sqrt(v[i]) for i in 1:N)
+
+            # binary ops: max, hypot
+            w = Vec{N, T}(ntuple(_ -> rand(T), N))
+            d = call_on_device(max, v, w)
+            @test all(d[i] == max(v[i], w[i]) for i in 1:N)
+
+            broken = ispocl && T == Float16
+            if !broken
+                h = call_on_device(hypot, v, w)
+                @test all(h[i] ≈ hypot(v[i], w[i]) for i in 1:N)
+            end
+
+            # ternary op: fma
+            x = Vec{N, T}(ntuple(_ -> rand(T), N))
+            e = call_on_device(fma, v, w, x)
+            @test all(e[i] ≈ fma(v[i], w[i], x[i]) for i in 1:N)
+
+            # special cases: ilogb, ldexp, ^ with Int32, rootn
+            v_pos = Vec{N, T}(ntuple(_ -> rand(T) + T(1), N))
+            @test call_on_device(OpenCL.ilogb, v_pos) isa Vec{N, Int32} broken = broken
+
+            k = Vec{N, Int32}(ntuple(_ -> rand(Int32.(-5:5)), N))
+            @test let
+                ldexp_result = call_on_device(ldexp, v_pos, k)
+                all(ldexp_result[i] ≈ ldexp(v_pos[i], k[i]) for i in 1:N)
+            end broken = broken
+
+            base = Vec{N, T}(ntuple(_ -> rand(T) + T(0.5), N))
+            exp_int = Vec{N, Int32}(ntuple(_ -> rand(Int32.(0:3)), N))
+            @test let
+                pow_result = call_on_device(^, base, exp_int)
+                all(pow_result[i] ≈ base[i]^exp_int[i] for i in 1:N)
+            end broken = broken
+
+            rootn_base = Vec{N, T}(ntuple(_ -> rand(T) * T(10) + T(1), N))
+            rootn_n = Vec{N, Int32}(ntuple(_ -> rand(Int32.(2:4)), N))
+            @test call_on_device(OpenCL.rootn, rootn_base, rootn_n) isa Vec{N, T} broken = broken
+
+            # special cases: nan
+            nan_code = Vec{N, Base.uinttype(T)}(ntuple(_ -> rand(Base.uinttype(T)), N))
+            nan_result = call_on_device(OpenCL.nan, nan_code)
+            @test all(isnan(nan_result[i]) for i in 1:N)
+        end
 
 end
 

@codecov
Copy link

codecov bot commented Oct 10, 2025

Codecov Report

✅ All modified and coverable lines are covered by tests.
✅ Project coverage is 79.04%. Comparing base (d0e3372) to head (7e0917b).

Additional details and impacted files
@@               Coverage Diff               @@
##           sds/float16     #379      +/-   ##
===============================================
+ Coverage        79.01%   79.04%   +0.03%     
===============================================
  Files               12       12              
  Lines              672      673       +1     
===============================================
+ Hits               531      532       +1     
  Misses             141      141              

☔ View full report in Codecov by Sentry.
📢 Have feedback on the report? Share it here.

🚀 New features to boost your workflow:
  • ❄️ Test Analytics: Detect flaky tests, report on failures, and find test suite problems.

@simeonschaub simeonschaub linked an issue Oct 13, 2025 that may be closed by this pull request
Base automatically changed from sds/float16 to master October 14, 2025 11:21
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

support vectorized math intrinsics

1 participant