Auto-detect SPIR-V extensions from device capabilities by SimonDanisch · Pull Request #410 · JuliaGPU/OpenCL.jl

SimonDanisch · 2026-01-08T15:05:35Z

This is needed for supporting Atomix in KernelAbstractions, since we can't set the extension from a KA kernel launch.
This seems to be the most minimal fix, not sure if there are any problems with this or if it would need to be more complete.

@atomic

When using KernelAbstractions with features like Atomix.@atomic on Float32, the required SPIR-V extensions (e.g., SPV_EXT_shader_atomic_float_add) were not being enabled because KernelAbstractions calls @OpenCL internally without the `extensions` parameter. This change automatically queries the device's OpenCL extensions and maps them to the corresponding SPIR-V extensions that should be enabled during compilation. Currently supported mappings: - cl_ext_float_atomics -> SPV_EXT_shader_atomic_float_add, SPV_EXT_shader_atomic_float_min_max The mapping dictionary can be extended as needed for other extension pairs. Fixes the issue where Float32 atomics fail through KernelAbstractions even when the device supports cl_ext_float_atomics. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

github-actions · 2026-01-08T15:06:39Z

Your PR requires formatting changes to meet the project's style guidelines.
Please consider running Runic (git runic master) to apply these changes.

Click here to view the suggested changes.

diff --git a/lib/intrinsics/src/integer.jl b/lib/intrinsics/src/integer.jl
index 1a76c38..9e82919 100644
--- a/lib/intrinsics/src/integer.jl
+++ b/lib/intrinsics/src/integer.jl
@@ -36,7 +36,7 @@ for gentype in generic_integer_types
 
 @device_function popcount(x::$gentype) = @builtin_ccall("popcount", $gentype, ($gentype,), x)
 
-@device_override Base.bitreverse(x::$gentype) = @builtin_ccall("bit_reverse", $gentype, ($gentype,), x)
+        @device_override Base.bitreverse(x::$gentype) = @builtin_ccall("bit_reverse", $gentype, ($gentype,), x)
 
 @device_function mad24(x::$gentype, y::$gentype, z::$gentype) = @builtin_ccall("mad24", $gentype, ($gentype, $gentype, $gentype), x, y, z)
 @device_function mul24(x::$gentype, y::$gentype) = @builtin_ccall("mul24", $gentype, ($gentype, $gentype), x, y)
diff --git a/src/compiler/compilation.jl b/src/compiler/compilation.jl
index d084f3e..b02fb21 100644
--- a/src/compiler/compilation.jl
+++ b/src/compiler/compilation.jl
@@ -159,8 +159,10 @@ function compiler_config(dev::cl.Device; kwargs...)
     end
     return config
 end
-@noinline function _compiler_config(dev; kernel=true, name=nothing, always_inline=false,
-                                     extensions::Vector{String}=String[], kwargs...)
+@noinline function _compiler_config(
+        dev; kernel = true, name = nothing, always_inline = false,
+        extensions::Vector{String} = String[], kwargs...
+    )
     supports_fp16 = "cl_khr_fp16" in dev.extensions
     supports_fp64 = "cl_khr_fp64" in dev.extensions
 
@@ -170,8 +172,10 @@ end
     all_extensions = unique!(vcat(extensions, auto_extensions))
 
     # create GPUCompiler objects
-    target = SPIRVCompilerTarget(; supports_fp16, supports_fp64, validate=true,
-                                  extensions=all_extensions, kwargs...)
+    target = SPIRVCompilerTarget(;
+        supports_fp16, supports_fp64, validate = true,
+        extensions = all_extensions, kwargs...
+    )
     params = OpenCLCompilerParams()
     CompilerConfig(target, params; kernel, name, always_inline)
 end
diff --git a/test/spirv_extensions.jl b/test/spirv_extensions.jl
index 2645153..6f3db93 100644
--- a/test/spirv_extensions.jl
+++ b/test/spirv_extensions.jl
@@ -3,63 +3,63 @@ using Atomix: Atomix
 
 @testset "spirv_extensions" begin
 
-@testset "bitreverse KernelAbstractions kernel" begin
-    @kernel function bitreverse_ka!(out, inp)
-        i = @index(Global)
-        @inbounds out[i] = bitreverse(inp[i])
-    end
+    @testset "bitreverse KernelAbstractions kernel" begin
+        @kernel function bitreverse_ka!(out, inp)
+            i = @index(Global)
+            @inbounds out[i] = bitreverse(inp[i])
+        end
 
-    @testset "$T" for T in [Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64]
-        N = 64
-        inp = CLArray(rand(T, N))
-        out = similar(inp)
-        bitreverse_ka!(OpenCLBackend())(out, inp; ndrange=N)
-        synchronize(OpenCLBackend())
+        @testset "$T" for T in [Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64]
+            N = 64
+            inp = CLArray(rand(T, N))
+            out = similar(inp)
+            bitreverse_ka!(OpenCLBackend())(out, inp; ndrange = N)
+            synchronize(OpenCLBackend())
 
-        @test Array(out) == bitreverse.(Array(inp))
+            @test Array(out) == bitreverse.(Array(inp))
+        end
     end
-end
 
-@testset "atomic float accumulation without manual extensions" begin
-    # The auto-spirv-extensions feature detects cl_ext_float_atomics and enables
-    # SPV_EXT_shader_atomic_float_add automatically. Previously this required
-    # passing extensions=["SPV_EXT_shader_atomic_float_add"] to @opencl manually.
-    #
-    # We test with a concurrent accumulation pattern where multiple work-items
-    # write to the same output locations. Without atomics this would race;
-    # with Atomix.@atomic (which emits OpAtomicFAddEXT) the result must be exact.
-    if "cl_ext_float_atomics" in cl.device().extensions
-        @kernel function atomic_accum_kernel!(out, arr)
-            i, j = @index(Global, NTuple)
-            for k in 1:size(out, 1)
-                Atomix.@atomic out[k, i] += arr[i, j]
+    @testset "atomic float accumulation without manual extensions" begin
+        # The auto-spirv-extensions feature detects cl_ext_float_atomics and enables
+        # SPV_EXT_shader_atomic_float_add automatically. Previously this required
+        # passing extensions=["SPV_EXT_shader_atomic_float_add"] to @opencl manually.
+        #
+        # We test with a concurrent accumulation pattern where multiple work-items
+        # write to the same output locations. Without atomics this would race;
+        # with Atomix.@atomic (which emits OpAtomicFAddEXT) the result must be exact.
+        if "cl_ext_float_atomics" in cl.device().extensions
+            @kernel function atomic_accum_kernel!(out, arr)
+                i, j = @index(Global, NTuple)
+                for k in 1:size(out, 1)
+                    Atomix.@atomic out[k, i] += arr[i, j]
+                end
             end
-        end
 
-        @testset "$T" for T in [Float32, Float64]
-            if T == Float64 && !("cl_khr_fp64" in cl.device().extensions)
-                continue
-            end
+            @testset "$T" for T in [Float32, Float64]
+                if T == Float64 && !("cl_khr_fp64" in cl.device().extensions)
+                    continue
+                end
 
-            M, N = 32, 64
-            img = zeros(T, M, N)
-            img[5:15, 5:15] .= one(T)
-            img[20:30, 20:30] .= T(2)
+                M, N = 32, 64
+                img = zeros(T, M, N)
+                img[5:15, 5:15] .= one(T)
+                img[20:30, 20:30] .= T(2)
 
-            cl_img = CLArray(img)
-            out = KernelAbstractions.zeros(OpenCLBackend(), T, M, N)
-            atomic_accum_kernel!(OpenCLBackend())(out, cl_img; ndrange=(M, N))
-            synchronize(OpenCLBackend())
+                cl_img = CLArray(img)
+                out = KernelAbstractions.zeros(OpenCLBackend(), T, M, N)
+                atomic_accum_kernel!(OpenCLBackend())(out, cl_img; ndrange = (M, N))
+                synchronize(OpenCLBackend())
 
-            # Each out[k, i] = sum(img[i, :]) — accumulate row i across all columns
-            out_host = Array(out)
-            expected = zeros(T, M, N)
-            for i in 1:M
-                expected[:, i] .= sum(img[i, :])
+                # Each out[k, i] = sum(img[i, :]) — accumulate row i across all columns
+                out_host = Array(out)
+                expected = zeros(T, M, N)
+                for i in 1:M
+                    expected[:, i] .= sum(img[i, :])
+                end
+                @test out_host ≈ expected
             end
-            @test out_host ≈ expected
         end
     end
-end
 
 end

codecov · 2026-01-08T15:08:23Z

Codecov Report

❌ Patch coverage is 0% with 12 lines in your changes missing coverage. Please review.
✅ Project coverage is 31.17%. Comparing base (a1f79db) to head (33228a2).
⚠️ Report is 6 commits behind head on master.

Files with missing lines	Patch %	Lines
src/compiler/compilation.jl	0.00%	12 Missing ⚠️

Additional details and impacted files

@@             Coverage Diff             @@
##           master     #410       +/-   ##
===========================================
- Coverage   80.84%   31.17%   -49.67%     
===========================================
  Files          12       11        -1     
  Lines         736      696       -40     
===========================================
- Hits          595      217      -378     
- Misses        141      479      +338

☔ View full report in Codecov by Sentry.
📢 Have feedback on the report? Share it here.

🚀 New features to boost your workflow:

❄️ Test Analytics: Detect flaky tests, report on failures, and find test suite problems.

simeonschaub · 2026-02-16T17:29:18Z

Sorry for taking so long to review! I think this is a good approach for now. I was initially thinking it might make more sense to detect this from the generated code rather than unconditionally enabling it on all supported devices, but that seems a lot more complex and I am not sure it gains us much.

Could you add some tests? I was also thinking it might make sense to separate the bitreverse changes into a separate PR, but I will leave that up to you. Thanks for the contribution!

vchuravy mentioned this pull request Jan 8, 2026

POCL backend atomix JuliaGPU/KernelAbstractions.jl#671

Open

SimonDanisch added 3 commits January 28, 2026 15:05

add SPV_KHR_bit_instructions

24cbd43

add bitreverse

38b583d

erge branch 'master' into auto-spirv-extensions

ecfd47d

add tests

33228a2

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Auto-detect SPIR-V extensions from device capabilities#410

Auto-detect SPIR-V extensions from device capabilities#410
SimonDanisch wants to merge 5 commits intomasterfrom
auto-spirv-extensions

SimonDanisch commented Jan 8, 2026

Uh oh!

github-actions bot commented Jan 8, 2026 •

edited

Loading

Uh oh!

codecov bot commented Jan 8, 2026 •

edited

Loading

Uh oh!

simeonschaub commented Feb 16, 2026

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

2 participants

Conversation

SimonDanisch commented Jan 8, 2026

Uh oh!

github-actions bot commented Jan 8, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

codecov bot commented Jan 8, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Codecov Report

Uh oh!

simeonschaub commented Feb 16, 2026

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

2 participants

github-actions bot commented Jan 8, 2026 •

edited

Loading

codecov bot commented Jan 8, 2026 •

edited

Loading