|
1 | 1 | using Test |
2 | 2 | import ParallelStencil |
3 | 3 | using ParallelStencil.ParallelKernel |
4 | | -import ParallelStencil.ParallelKernel: @reset_parallel_kernel, @is_initialized, SUPPORTED_PACKAGES, PKG_CUDA, PKG_AMDGPU, PKG_METAL, PKG_THREADS, PKG_POLYESTER |
| 4 | +import ParallelStencil.ParallelKernel: @reset_parallel_kernel, @is_initialized, SUPPORTED_PACKAGES, PKG_CUDA, PKG_AMDGPU, PKG_METAL, PKG_THREADS, PKG_POLYESTER, select_hardware, current_hardware |
5 | 5 | import ParallelStencil.ParallelKernel: @require, @prettystring, @iscpu |
6 | 6 | import ParallelStencil.ParallelKernel: checknoargs, checkargs_sharedMem, Dim3 |
7 | 7 | using ParallelStencil.ParallelKernel.Exceptions |
8 | | -TEST_PACKAGES = SUPPORTED_PACKAGES |
| 8 | +const PKG_KERNELABSTRACTIONS = hasproperty(ParallelStencil.ParallelKernel, :PKG_KERNELABSTRACTIONS) ? ParallelStencil.ParallelKernel.PKG_KERNELABSTRACTIONS : Symbol(:KernelAbstractions) |
| 9 | + |
| 10 | +TEST_PACKAGES = collect(SUPPORTED_PACKAGES) |
| 11 | +if PKG_KERNELABSTRACTIONS ∉ TEST_PACKAGES |
| 12 | + push!(TEST_PACKAGES, PKG_KERNELABSTRACTIONS) |
| 13 | +end |
9 | 14 | @static if PKG_CUDA in TEST_PACKAGES |
10 | 15 | import CUDA |
11 | 16 | if !CUDA.functional() TEST_PACKAGES = filter!(x->x≠PKG_CUDA, TEST_PACKAGES) end |
|
21 | 26 | @static if PKG_POLYESTER in TEST_PACKAGES |
22 | 27 | import Polyester |
23 | 28 | end |
| 29 | +@static if PKG_KERNELABSTRACTIONS in TEST_PACKAGES |
| 30 | + if Base.find_package("KernelAbstractions") === nothing |
| 31 | + TEST_PACKAGES = filter!(x->x≠PKG_KERNELABSTRACTIONS, TEST_PACKAGES) |
| 32 | + else |
| 33 | + import KernelAbstractions |
| 34 | + end |
| 35 | +end |
24 | 36 | Base.retry_load_extensions() # Potentially needed to load the extensions after the packages have been filtered. |
25 | 37 |
|
| 38 | +kernelabstractions_gpu_symbols() = Symbol[] |
| 39 | + |
| 40 | +@static if PKG_KERNELABSTRACTIONS in TEST_PACKAGES |
| 41 | + function kernelabstractions_gpu_symbols() |
| 42 | + symbols = Symbol[] |
| 43 | + if isdefined(@__MODULE__, :CUDA) && CUDA.functional() |
| 44 | + push!(symbols, :gpu_cuda) |
| 45 | + end |
| 46 | + if isdefined(@__MODULE__, :AMDGPU) && AMDGPU.functional() |
| 47 | + push!(symbols, :gpu_amd) |
| 48 | + end |
| 49 | + if isdefined(@__MODULE__, :Metal) |
| 50 | + if Sys.isapple() && Metal.functional() |
| 51 | + push!(symbols, :gpu_metal) |
| 52 | + end |
| 53 | + end |
| 54 | + return symbols |
| 55 | + end |
| 56 | +end |
| 57 | + |
26 | 58 |
|
27 | 59 | macro expr_allocated(ex) |
28 | 60 | expanded = Base.macroexpand(__module__, ex; recursive=true) |
@@ -75,6 +107,49 @@ eval(:( |
75 | 107 | @test @prettystring(1, @sharedMem($FloatDefault, (2,3))) == "ParallelStencil.ParallelKernel.@sharedMem_metal $(nameof($FloatDefault)) (2, 3)" |
76 | 108 | # @test @prettystring(1, @pk_show()) == "Metal.@mtlshow" #TODO: not yet supported for Metal |
77 | 109 | # @test @prettystring(1, @pk_println()) == "Metal.@mtlprintln" #TODO: not yet supported for Metal |
| 110 | + elseif $package == $PKG_KERNELABSTRACTIONS |
| 111 | + select_hardware(:cpu) |
| 112 | + @test current_hardware() == :cpu |
| 113 | + @test @prettystring(1, @gridDim()) == "ParallelStencil.ParallelKernel.@gridDim_cpu" |
| 114 | + @test @prettystring(1, @blockIdx()) == "ParallelStencil.ParallelKernel.@blockIdx_cpu" |
| 115 | + @test @prettystring(1, @blockDim()) == "ParallelStencil.ParallelKernel.@blockDim_cpu" |
| 116 | + @test @prettystring(1, @threadIdx()) == "ParallelStencil.ParallelKernel.@threadIdx_cpu" |
| 117 | + @test @prettystring(1, @sync_threads()) == "ParallelStencil.ParallelKernel.@sync_threads_cpu" |
| 118 | + @test @prettystring(1, @sharedMem($FloatDefault, (2,3))) == "ParallelStencil.ParallelKernel.@sharedMem_cpu $(nameof($FloatDefault)) (2, 3)" |
| 119 | + for symbol in kernelabstractions_gpu_symbols() |
| 120 | + select_hardware(symbol) |
| 121 | + if symbol == :gpu_cuda |
| 122 | + @test @prettystring(1, @gridDim()) == "CUDA.gridDim()" |
| 123 | + @test @prettystring(1, @blockIdx()) == "CUDA.blockIdx()" |
| 124 | + @test @prettystring(1, @blockDim()) == "CUDA.blockDim()" |
| 125 | + @test @prettystring(1, @threadIdx()) == "CUDA.threadIdx()" |
| 126 | + @test @prettystring(1, @sync_threads()) == "CUDA.sync_threads()" |
| 127 | + @test @prettystring(1, @sharedMem($FloatDefault, (2,3))) == "CUDA.@cuDynamicSharedMem $(nameof($FloatDefault)) (2, 3)" |
| 128 | + elseif symbol == :gpu_amd |
| 129 | + @test @prettystring(1, @gridDim()) == "AMDGPU.gridGroupDim()" |
| 130 | + @test @prettystring(1, @blockIdx()) == "AMDGPU.workgroupIdx()" |
| 131 | + @test @prettystring(1, @blockDim()) == "AMDGPU.workgroupDim()" |
| 132 | + @test @prettystring(1, @threadIdx()) == "AMDGPU.workitemIdx()" |
| 133 | + @test @prettystring(1, @sync_threads()) == "AMDGPU.sync_workgroup()" |
| 134 | + # @test @prettystring(1, @sharedMem($FloatDefault, (2,3))) == "" #TODO: not yet supported for AMDGPU |
| 135 | + elseif symbol == :gpu_metal |
| 136 | + @test @prettystring(1, @gridDim()) == "Metal.threadgroups_per_grid_3d()" |
| 137 | + @test @prettystring(1, @blockIdx()) == "Metal.threadgroup_position_in_grid_3d()" |
| 138 | + @test @prettystring(1, @blockDim()) == "Metal.threads_per_threadgroup_3d()" |
| 139 | + @test @prettystring(1, @threadIdx()) == "Metal.thread_position_in_threadgroup_3d()" |
| 140 | + @test @prettystring(1, @sync_threads()) == "Metal.threadgroup_barrier(; flag = Metal.MemoryFlagThreadGroup)" |
| 141 | + @test @prettystring(1, @sharedMem($FloatDefault, (2,3))) == "ParallelStencil.ParallelKernel.@sharedMem_metal $(nameof($FloatDefault)) (2, 3)" |
| 142 | + end |
| 143 | + @test current_hardware() == symbol |
| 144 | + end |
| 145 | + select_hardware(:cpu) |
| 146 | + @test current_hardware() == :cpu |
| 147 | + @test @prettystring(1, @gridDim()) == "ParallelStencil.ParallelKernel.@gridDim_cpu" |
| 148 | + @test @prettystring(1, @blockIdx()) == "ParallelStencil.ParallelKernel.@blockIdx_cpu" |
| 149 | + @test @prettystring(1, @blockDim()) == "ParallelStencil.ParallelKernel.@blockDim_cpu" |
| 150 | + @test @prettystring(1, @threadIdx()) == "ParallelStencil.ParallelKernel.@threadIdx_cpu" |
| 151 | + @test @prettystring(1, @sync_threads()) == "ParallelStencil.ParallelKernel.@sync_threads_cpu" |
| 152 | + @test @prettystring(1, @sharedMem($FloatDefault, (2,3))) == "ParallelStencil.ParallelKernel.@sharedMem_cpu $(nameof($FloatDefault)) (2, 3)" |
78 | 153 | elseif @iscpu($package) |
79 | 154 | @test @prettystring(1, @gridDim()) == "ParallelStencil.ParallelKernel.@gridDim_cpu" |
80 | 155 | @test @prettystring(1, @blockIdx()) == "ParallelStencil.ParallelKernel.@blockIdx_cpu" |
|
0 commit comments