|
193 | 193 | # Check that we can call this function from the CPU, to support deferred codegen for Enzyme. |
194 | 194 | @test ccall("extern deferred_codegen", llvmcall, UInt, (UInt,), 3) == 3 |
195 | 195 | end |
| 196 | + |
| 197 | +@testset "@device_function macro" begin |
| 198 | + # Test that @device_function creates both CPU stub and overlay |
| 199 | + # The macro should: |
| 200 | + # 1. Define a CPU-visible function that returns the expected type |
| 201 | + # 2. Register an overlay in GLOBAL_METHOD_TABLE for GPU compilation |
| 202 | + |
| 203 | + # Create a test module to contain the device functions |
| 204 | + test_mod = @eval module $(gensym("DeviceFunctionTest")) |
| 205 | + using GPUCompiler |
| 206 | + |
| 207 | + # Test with Ptr return type (common for runtime functions) |
| 208 | + GPUCompiler.@device_function(Ptr{Nothing}, |
| 209 | + @inline test_device_ptr() = ccall("extern gpu_test", llvmcall, Ptr{Nothing}, ()) |
| 210 | + ) |
| 211 | + |
| 212 | + # Test with primitive return type |
| 213 | + GPUCompiler.@device_function(Nothing, |
| 214 | + @inline test_device_nothing() = ccall("extern gpu_test2", llvmcall, Nothing, ()) |
| 215 | + ) |
| 216 | + end |
| 217 | + |
| 218 | + # Verify the functions are defined in the test module |
| 219 | + @test isdefined(test_mod, :test_device_ptr) |
| 220 | + @test isdefined(test_mod, :test_device_nothing) |
| 221 | + |
| 222 | + # Verify the overlay exists in the global method table |
| 223 | + mt_view = GPUCompiler.get_method_table_view(Base.get_world_counter(), GPUCompiler.GLOBAL_METHOD_TABLE) |
| 224 | + sig_ptr = Tuple{typeof(test_mod.test_device_ptr)} |
| 225 | + sig_nothing = Tuple{typeof(test_mod.test_device_nothing)} |
| 226 | + |
| 227 | + # The overlay should be findable in the method table |
| 228 | + result_ptr = findsup(sig_ptr, mt_view) |
| 229 | + result_nothing = findsup(sig_nothing, mt_view) |
| 230 | + @test result_ptr !== nothing |
| 231 | + @test result_nothing !== nothing |
| 232 | +end |
0 commit comments