diff --git a/test/Project.toml b/test/Project.toml index a44cae6b..ad73c325 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -8,6 +8,7 @@ InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +ParallelTestRunner = "d3525ed8-44d0-4b2c-a655-542cee43accc" Preferences = "21216c6a-2e73-6563-6e65-726566657250" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" REPL = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" @@ -23,4 +24,8 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" pocl_jll = "627d6b7a-bbe6-5189-83e7-98cc0a5aeadd" [compat] -pocl_jll = "7.0" +pocl_jll = "~7.0" +ParallelTestRunner = "1.0.1" + +[sources] +ParallelTestRunner = {url="https://github.com/JuliaTesting/ParallelTestRunner.jl", rev="tb/testsuite"} diff --git a/test/array.jl b/test/array.jl index afe97e23..53fcd9b5 100644 --- a/test/array.jl +++ b/test/array.jl @@ -1,107 +1,105 @@ using LinearAlgebra import Adapt -@testset "CLArray" begin - @testset "constructors" begin - xs = CLArray{Int, 2, cl.Buffer}(undef, 2, 3) - @test collect(CLArray([1 2; 3 4])) == [1 2; 3 4] - @test testf(vec, rand(Float32, 5, 3)) - @test Base.elsize(xs) == sizeof(Int) - @test CLArray{Int, 2}(xs) === xs - - @test device_accessible(xs) - @test !host_accessible(xs) - @test_throws ArgumentError Base.unsafe_convert(Ptr{Int}, xs) - @test_throws ArgumentError Base.unsafe_convert(Ptr{Float32}, xs) - - @test collect(OpenCL.zeros(Float32, 2, 2)) == zeros(Float32, 2, 2) - @test collect(OpenCL.ones(Float32, 2, 2)) == ones(Float32, 2, 2) - - @test collect(OpenCL.fill(0, 2, 2)) == zeros(Int, 2, 2) - @test collect(OpenCL.fill(1, 2, 2)) == ones(Int, 2, 2) - end +@testset "constructors" begin + xs = CLArray{Int, 2, cl.Buffer}(undef, 2, 3) + @test collect(CLArray([1 2; 3 4])) == [1 2; 3 4] + @test testf(vec, rand(Float32, 5, 3)) + @test Base.elsize(xs) == sizeof(Int) + @test CLArray{Int, 2}(xs) === xs + + @test device_accessible(xs) + @test !host_accessible(xs) + @test_throws ArgumentError Base.unsafe_convert(Ptr{Int}, xs) + @test_throws ArgumentError Base.unsafe_convert(Ptr{Float32}, xs) + + @test collect(OpenCL.zeros(Float32, 2, 2)) == zeros(Float32, 2, 2) + @test collect(OpenCL.ones(Float32, 2, 2)) == ones(Float32, 2, 2) + + @test collect(OpenCL.fill(0, 2, 2)) == zeros(Int, 2, 2) + @test collect(OpenCL.fill(1, 2, 2)) == ones(Int, 2, 2) +end - @testset "adapt" begin - A = rand(Float32, 3, 3) - dA = CLArray(A) - @test Adapt.adapt(Array, dA) == A - @test Adapt.adapt(CLArray, A) isa CLArray - @test Array(Adapt.adapt(CLArray, A)) == A - end +@testset "adapt" begin + A = rand(Float32, 3, 3) + dA = CLArray(A) + @test Adapt.adapt(Array, dA) == A + @test Adapt.adapt(CLArray, A) isa CLArray + @test Array(Adapt.adapt(CLArray, A)) == A +end - @testset "reshape" begin - A = [ - 1 2 3 4 - 5 6 7 8 - ] - gA = reshape(CLArray(A), 1, 8) - _A = reshape(A, 1, 8) - _gA = Array(gA) - @test all(_A .== _gA) - A = [1, 2, 3, 4] - gA = reshape(CLArray(A), 4) - end +@testset "reshape" begin + A = [ + 1 2 3 4 + 5 6 7 8 + ] + gA = reshape(CLArray(A), 1, 8) + _A = reshape(A, 1, 8) + _gA = Array(gA) + @test all(_A .== _gA) + A = [1, 2, 3, 4] + gA = reshape(CLArray(A), 4) +end - @testset "fill(::SubArray)" begin - xs = OpenCL.zeros(Float32, 3) - fill!(view(xs, 2:2), 1) - @test Array(xs) == [0, 1, 0] +@testset "fill(::SubArray)" begin + xs = OpenCL.zeros(Float32, 3) + fill!(view(xs, 2:2), 1) + @test Array(xs) == [0, 1, 0] +end +# TODO: Look into how to port the @sync + +if cl.memory_backend() isa cl.USMBackend + @testset "shared buffers & unsafe_wrap" begin + a = CLVector{Int, cl.UnifiedSharedMemory}(undef, 2) + + # check that basic operations work on arrays backed by shared memory + fill!(a, 40) + a .+= 2 + @test Array(a) == [42, 42] + + # derive an Array object and test that the memory keeps in sync + b = unsafe_wrap(Array, a) + b[1] = 100 + @test Array(a) == [100, 42] + copyto!(a, 2, [200], 1, 1) + cl.finish(cl.queue()) + @test b == [100, 200] end - # TODO: Look into how to port the @sync - - if cl.memory_backend() isa cl.USMBackend - @testset "shared buffers & unsafe_wrap" begin - a = CLVector{Int, cl.UnifiedSharedMemory}(undef, 2) - - # check that basic operations work on arrays backed by shared memory - fill!(a, 40) - a .+= 2 - @test Array(a) == [42, 42] - - # derive an Array object and test that the memory keeps in sync - b = unsafe_wrap(Array, a) - b[1] = 100 - @test Array(a) == [100, 42] - copyto!(a, 2, [200], 1, 1) - cl.finish(cl.queue()) - @test b == [100, 200] - end - - # https://github.com/JuliaGPU/CUDA.jl/issues/2191 - @testset "preserving memory types" begin - a = CLVector{Int, cl.UnifiedSharedMemory}([1]) - @test OpenCL.memtype(a) == cl.UnifiedSharedMemory - - # unified-ness should be preserved - b = a .+ 1 - @test OpenCL.memtype(b) == cl.UnifiedSharedMemory - - # when there's a conflict, we should defer to unified memory - c = CLVector{Int, cl.UnifiedSharedMemory}([1]) - d = CLVector{Int, cl.UnifiedDeviceMemory}([1]) - e = c .+ d - @test OpenCL.memtype(e) == cl.UnifiedSharedMemory - end + + # https://github.com/JuliaGPU/CUDA.jl/issues/2191 + @testset "preserving memory types" begin + a = CLVector{Int, cl.UnifiedSharedMemory}([1]) + @test OpenCL.memtype(a) == cl.UnifiedSharedMemory + + # unified-ness should be preserved + b = a .+ 1 + @test OpenCL.memtype(b) == cl.UnifiedSharedMemory + + # when there's a conflict, we should defer to unified memory + c = CLVector{Int, cl.UnifiedSharedMemory}([1]) + d = CLVector{Int, cl.UnifiedDeviceMemory}([1]) + e = c .+ d + @test OpenCL.memtype(e) == cl.UnifiedSharedMemory end +end - @testset "resizing" begin - a = CLArray([1, 2, 3]) +@testset "resizing" begin + a = CLArray([1, 2, 3]) - resize!(a, 3) - @test length(a) == 3 - @test Array(a) == [1, 2, 3] + resize!(a, 3) + @test length(a) == 3 + @test Array(a) == [1, 2, 3] - resize!(a, 5) - @test length(a) == 5 - @test Array(a)[1:3] == [1, 2, 3] + resize!(a, 5) + @test length(a) == 5 + @test Array(a)[1:3] == [1, 2, 3] - resize!(a, 2) - @test length(a) == 2 - @test Array(a)[1:2] == [1, 2] + resize!(a, 2) + @test length(a) == 2 + @test Array(a)[1:2] == [1, 2] - b = CLArray{Int}(undef, 0) - @test length(b) == 0 - resize!(b, 1) - @test length(b) == 1 - end + b = CLArray{Int}(undef, 0) + @test length(b) == 0 + resize!(b, 1) + @test length(b) == 1 end diff --git a/test/atomics.jl b/test/atomics.jl index ce69ca30..e29f0b29 100644 --- a/test/atomics.jl +++ b/test/atomics.jl @@ -1,7 +1,5 @@ using SPIRVIntrinsics: @builtin_ccall, @typed_ccall, LLVMPtr -@testset "atomics" begin - function atomic_count(counter) OpenCL.@atomic counter[] += 1 return @@ -15,27 +13,25 @@ end end end - if "cl_ext_float_atomics" in cl.device().extensions - function atomic_float_add(counter, val) - @builtin_ccall( - "atomic_add", Float32, - (LLVMPtr{Float32, AS.CrossWorkgroup}, Float32), - pointer(counter), val, - ) - return - end +if "cl_ext_float_atomics" in cl.device().extensions + function atomic_float_add(counter, val) + @builtin_ccall( + "atomic_add", Float32, + (LLVMPtr{Float32, AS.CrossWorkgroup}, Float32), + pointer(counter), val, + ) + return + end - @testset "SPV_EXT_shader_atomic_float_add extension" begin - a = OpenCL.zeros(Float32) - @opencl global_size = 1000 extensions = ["SPV_EXT_shader_atomic_float_add"] atomic_float_add(a, 1.0f0) - @test OpenCL.@allowscalar a[] == 1000.0f0 + @testset "SPV_EXT_shader_atomic_float_add extension" begin + a = OpenCL.zeros(Float32) + @opencl global_size = 1000 extensions = ["SPV_EXT_shader_atomic_float_add"] atomic_float_add(a, 1.0f0) + @test OpenCL.@allowscalar a[] == 1000.0f0 - spv = sprint() do io - OpenCL.code_native(io, atomic_float_add, Tuple{CLDeviceArray{Float32, 0, 1}, Float32}; extensions = ["SPV_EXT_shader_atomic_float_add"]) - end - @test occursin("OpExtension \"SPV_EXT_shader_atomic_float_add\"", spv) - @test occursin("OpAtomicFAddEXT", spv) + spv = sprint() do io + OpenCL.code_native(io, atomic_float_add, Tuple{CLDeviceArray{Float32, 0, 1}, Float32}; extensions = ["SPV_EXT_shader_atomic_float_add"]) end + @test occursin("OpExtension \"SPV_EXT_shader_atomic_float_add\"", spv) + @test occursin("OpAtomicFAddEXT", spv) end - end diff --git a/test/cmdqueue.jl b/test/cmdqueue.jl index f31ed9b4..f2ff64b1 100644 --- a/test/cmdqueue.jl +++ b/test/cmdqueue.jl @@ -1,27 +1,25 @@ -@testset "CmdQueue" begin - @testset "constructor" begin - @test cl.CmdQueue() != nothing - @test cl.CmdQueue(:profile) != nothing - try - cl.CmdQueue(:out_of_order) - cl.CmdQueue((:profile, :out_of_order)) - catch err - @warn("Platform $(cl.device().platform.name) does not seem to " * - "suport out of order queues: \n$err",maxlog=1, - exception=(err, catch_backtrace())) - end - @test_throws ArgumentError cl.CmdQueue(:unrecognized_flag) - for flag in [:profile, :out_of_order] - @test_throws ArgumentError cl.CmdQueue((flag, :unrecognized_flag)) - @test_throws ArgumentError cl.CmdQueue((flag, flag)) - end +@testset "constructor" begin + @test cl.CmdQueue() != nothing + @test cl.CmdQueue(:profile) != nothing + try + cl.CmdQueue(:out_of_order) + cl.CmdQueue((:profile, :out_of_order)) + catch err + @warn("Platform $(cl.device().platform.name) does not seem to " * + "suport out of order queues: \n$err",maxlog=1, + exception=(err, catch_backtrace())) end - - @testset "info" begin - q = cl.CmdQueue() - @test q.context == cl.context() - @test q.device == cl.device() - @test q.reference_count > 0 - @test typeof(q.properties) == cl.cl_command_queue_properties + @test_throws ArgumentError cl.CmdQueue(:unrecognized_flag) + for flag in [:profile, :out_of_order] + @test_throws ArgumentError cl.CmdQueue((flag, :unrecognized_flag)) + @test_throws ArgumentError cl.CmdQueue((flag, flag)) end end + +@testset "info" begin + q = cl.CmdQueue() + @test q.context == cl.context() + @test q.device == cl.device() + @test q.reference_count > 0 + @test typeof(q.properties) == cl.cl_command_queue_properties +end diff --git a/test/context.jl b/test/context.jl index bc5529cc..8d6bc620 100644 --- a/test/context.jl +++ b/test/context.jl @@ -1,87 +1,85 @@ -@testset "Context" begin - @testset "constructor" begin - @test_throws MethodError (cl.Context([])) +@testset "constructor" begin + @test_throws MethodError (cl.Context([])) - ctx = cl.Context(cl.device()) - @test ctx != nothing - @test ctx.reference_count == 1 - ctx_id = pointer(ctx) + ctx = cl.Context(cl.device()) + @test ctx != nothing + @test ctx.reference_count == 1 + ctx_id = pointer(ctx) - ctx2 = cl.Context(ctx_id; retain=true) - @test ctx.reference_count == 2 - finalize(ctx2) - @test ctx.reference_count == 1 + ctx2 = cl.Context(ctx_id; retain=true) + @test ctx.reference_count == 2 + finalize(ctx2) + @test ctx.reference_count == 1 - # TODO: support switching contexts - #@testset "Context callback" begin - # function context_test_callback(arg1, arg2, arg3) - # # We're not really testing it because, nvidia doesn't seem to care about this functionality: - # # https://devtalk.nvidia.com/default/topic/497433/context-callback-never-called/ - # OpenCL.cl.log_error("Callback works") - # return - # end - # - # function create_context_error() - # empty_kernel = " - # __kernel void test() { - # int c = 1 + 1; - # };" - # try - # p = cl.Program(source = empty_kernel) |> cl.build! - # k = cl.Kernel(p, "test") - # cl.call(k; global_size=1, local_size=10000000) - # catch - # end - # end - # - # ctx = cl.Context(cl.device(), callback = context_test_callback) - # context!(ctx) do - # create_context_error() - # end - #end - end + # TODO: support switching contexts + #@testset "Context callback" begin + # function context_test_callback(arg1, arg2, arg3) + # # We're not really testing it because, nvidia doesn't seem to care about this functionality: + # # https://devtalk.nvidia.com/default/topic/497433/context-callback-never-called/ + # OpenCL.cl.log_error("Callback works") + # return + # end + # + # function create_context_error() + # empty_kernel = " + # __kernel void test() { + # int c = 1 + 1; + # };" + # try + # p = cl.Program(source = empty_kernel) |> cl.build! + # k = cl.Kernel(p, "test") + # cl.call(k; global_size=1, local_size=10000000) + # catch + # end + # end + # + # ctx = cl.Context(cl.device(), callback = context_test_callback) + # context!(ctx) do + # create_context_error() + # end + #end +end - @testset "platform properties" begin - try - cl.Context(cl.CL_DEVICE_TYPE_CPU) - catch err - @test typeof(err) == cl.CLError - # CL_DEVICE_NOT_FOUND could be throw for GPU only drivers - @test err.desc in (:CL_INVALID_PLATFORM, - :CL_DEVICE_NOT_FOUND) - end +@testset "platform properties" begin + try + cl.Context(cl.CL_DEVICE_TYPE_CPU) + catch err + @test typeof(err) == cl.CLError + # CL_DEVICE_NOT_FOUND could be throw for GPU only drivers + @test err.desc in (:CL_INVALID_PLATFORM, + :CL_DEVICE_NOT_FOUND) + end - properties = [(cl.CL_CONTEXT_PLATFORM, cl.platform())] - for (cl_dev_type, sym_dev_type) in [(cl.CL_DEVICE_TYPE_CPU, :cpu), - (cl.CL_DEVICE_TYPE_GPU, :gpu)] - if !cl.has_device_type(cl.platform(), sym_dev_type) - continue - end - @test cl.Context(sym_dev_type; properties) != nothing - @test cl.Context(cl_dev_type; properties) != nothing - ctx = cl.Context(cl_dev_type; properties) - @test !isempty(ctx.properties) - test_properties = ctx.properties + properties = [(cl.CL_CONTEXT_PLATFORM, cl.platform())] + for (cl_dev_type, sym_dev_type) in [(cl.CL_DEVICE_TYPE_CPU, :cpu), + (cl.CL_DEVICE_TYPE_GPU, :gpu)] + if !cl.has_device_type(cl.platform(), sym_dev_type) + continue + end + @test cl.Context(sym_dev_type; properties) != nothing + @test cl.Context(cl_dev_type; properties) != nothing + ctx = cl.Context(cl_dev_type; properties) + @test !isempty(ctx.properties) + test_properties = ctx.properties - @test test_properties == properties + @test test_properties == properties - platform_in_properties = false - for (t, v) in test_properties - if t == cl.CL_CONTEXT_PLATFORM - @test v.name == cl.platform().name - @test v == cl.platform() - platform_in_properties = true - break - end + platform_in_properties = false + for (t, v) in test_properties + if t == cl.CL_CONTEXT_PLATFORM + @test v.name == cl.platform().name + @test v == cl.platform() + platform_in_properties = true + break end - @test platform_in_properties - end - try - ctx2 = cl.Context(cl.CL_DEVICE_TYPE_ACCELERATOR; properties) - catch err - @test typeof(err) == cl.CLError - @test err.desc == :CL_DEVICE_NOT_FOUND end + @test platform_in_properties + end + try + ctx2 = cl.Context(cl.CL_DEVICE_TYPE_ACCELERATOR; properties) + catch err + @test typeof(err) == cl.CLError + @test err.desc == :CL_DEVICE_NOT_FOUND end end diff --git a/test/device.jl b/test/device.jl index 94441053..ff726a57 100644 --- a/test/device.jl +++ b/test/device.jl @@ -1,91 +1,89 @@ -@testset "Device" begin - @testset "Type" begin - for (t, k) in zip((cl.CL_DEVICE_TYPE_GPU, cl.CL_DEVICE_TYPE_CPU, - cl.CL_DEVICE_TYPE_ACCELERATOR, cl.CL_DEVICE_TYPE_ALL), - (:gpu, :cpu, :accelerator, :all)) +@testset "Type" begin + for (t, k) in zip((cl.CL_DEVICE_TYPE_GPU, cl.CL_DEVICE_TYPE_CPU, + cl.CL_DEVICE_TYPE_ACCELERATOR, cl.CL_DEVICE_TYPE_ALL), + (:gpu, :cpu, :accelerator, :all)) - #for (dk, dt) in zip(cl.devices(cl.platform(), k), cl.devices(cl.platform(), t)) - # @fact dk == dt --> true - #end - #devices = cl.devices(cl.platform(), k) - #for device in devices - # @fact device.device_type == t --> true - #end - end + #for (dk, dt) in zip(cl.devices(cl.platform(), k), cl.devices(cl.platform(), t)) + # @fact dk == dt --> true + #end + #devices = cl.devices(cl.platform(), k) + #for device in devices + # @fact device.device_type == t --> true + #end end +end - @testset "Equality" begin - devices = cl.devices(cl.platform()) +@testset "Equality" begin + devices = cl.devices(cl.platform()) - if length(devices) > 1 - d1 = devices[1] - for d2 in devices[2:end] - @test pointer(d2) != pointer(d1) - @test hash(d2) != hash(d1) - @test isequal(d2, d1) == false - end + if length(devices) > 1 + d1 = devices[1] + for d2 in devices[2:end] + @test pointer(d2) != pointer(d1) + @test hash(d2) != hash(d1) + @test isequal(d2, d1) == false end - end + end +end - @testset "Info" begin - device_info_keys = Symbol[ - :driver_version, - :version, - :extensions, - :platform, - :name, - :device_type, - :has_image_support, - :vendor_id, - :max_compute_units, - :max_work_item_size, - :max_clock_frequency, - :address_bits, - :max_read_image_args, - :max_write_image_args, - :global_mem_size, - :max_mem_alloc_size, - :max_const_buffer_size, - :local_mem_size, - :has_local_mem, - :host_unified_memory, - :available, - :compiler_available, - :max_work_group_size, - :max_parameter_size, - :profiling_timer_resolution, - :max_image2d_shape, - :max_image3d_shape, - ] - @test isa(cl.platform(), cl.Platform) - @test_throws ErrorException cl.platform().zjdlkf +@testset "Info" begin + device_info_keys = Symbol[ + :driver_version, + :version, + :extensions, + :platform, + :name, + :device_type, + :has_image_support, + :vendor_id, + :max_compute_units, + :max_work_item_size, + :max_clock_frequency, + :address_bits, + :max_read_image_args, + :max_write_image_args, + :global_mem_size, + :max_mem_alloc_size, + :max_const_buffer_size, + :local_mem_size, + :has_local_mem, + :host_unified_memory, + :available, + :compiler_available, + :max_work_group_size, + :max_parameter_size, + :profiling_timer_resolution, + :max_image2d_shape, + :max_image3d_shape, + ] + @test isa(cl.platform(), cl.Platform) + @test_throws ErrorException cl.platform().zjdlkf - device = cl.device() - @test isa(device, cl.Device) - @test_throws ErrorException device.zjdlkf - for k in device_info_keys - v = getproperty(device, k) - if k == :extensions - @test isa(v, Array) - if length(v) > 0 - @test isa(v, Array{String, 1}) - end - elseif k == :platform - @test v == cl.platform() - elseif k == :max_work_item_sizes - @test length(v) == 3 - elseif k == :max_image2d_shape - @test length(v) == 2 - elseif k == :max_image3d_shape - @test length(v) == 3 + device = cl.device() + @test isa(device, cl.Device) + @test_throws ErrorException device.zjdlkf + for k in device_info_keys + v = getproperty(device, k) + if k == :extensions + @test isa(v, Array) + if length(v) > 0 + @test isa(v, Array{String, 1}) end + elseif k == :platform + @test v == cl.platform() + elseif k == :max_work_item_sizes + @test length(v) == 3 + elseif k == :max_image2d_shape + @test length(v) == 2 + elseif k == :max_image3d_shape + @test length(v) == 3 end + end - @test cl.queue_properties(cl.device()).profiling isa Bool - @test cl.queue_properties(cl.device()).out_of_order_exec isa Bool + @test cl.queue_properties(cl.device()).profiling isa Bool + @test cl.queue_properties(cl.device()).out_of_order_exec isa Bool - @test cl.exec_capabilities(cl.device()).native_kernel isa Bool + @test cl.exec_capabilities(cl.device()).native_kernel isa Bool - @test cl.svm_capabilities(cl.device()).fine_grain_buffer isa Bool - end + @test cl.svm_capabilities(cl.device()).fine_grain_buffer isa Bool end diff --git a/test/event.jl b/test/event.jl index d3c437cb..55796fac 100644 --- a/test/event.jl +++ b/test/event.jl @@ -3,68 +3,68 @@ if contains(cl.platform().vendor, "Intel") || contains(cl.platform().vendor, "po # hangs on Intel @warn "Skipping event tests on $(cl.platform().name)" else -@testset "Event" begin - @testset "status" begin - evt = cl.UserEvent() - evt.status - @test evt.status == :submitted - cl.complete(evt) - @test evt.status == :complete - finalize(evt) - end - @testset "wait" begin - # create user event - usr_evt = cl.UserEvent() - cl.enqueue_wait_for_events(usr_evt) +@testset "status" begin + evt = cl.UserEvent() + evt.status + @test evt.status == :submitted + cl.complete(evt) + @test evt.status == :complete + finalize(evt) +end + +@testset "wait" begin + # create user event + usr_evt = cl.UserEvent() + cl.enqueue_wait_for_events(usr_evt) - # create marker event - mkr_evt = cl.enqueue_marker() + # create marker event + mkr_evt = cl.enqueue_marker() - @test usr_evt.status == :submitted - @test mkr_evt.status in (:queued, :submitted) + @test usr_evt.status == :submitted + @test mkr_evt.status in (:queued, :submitted) - cl.complete(usr_evt) - @test usr_evt.status == :complete + cl.complete(usr_evt) + @test usr_evt.status == :complete - wait(mkr_evt) - @test mkr_evt.status == :complete + wait(mkr_evt) + @test mkr_evt.status == :complete - @test cl.cl_event_status(:running) == cl.CL_RUNNING - @test cl.cl_event_status(:submitted) == cl.CL_SUBMITTED - @test cl.cl_event_status(:queued) == cl.CL_QUEUED - @test cl.cl_event_status(:complete) == cl.CL_COMPLETE - end + @test cl.cl_event_status(:running) == cl.CL_RUNNING + @test cl.cl_event_status(:submitted) == cl.CL_SUBMITTED + @test cl.cl_event_status(:queued) == cl.CL_QUEUED + @test cl.cl_event_status(:complete) == cl.CL_COMPLETE +end - @testset "callback" begin - global callback_called = Ref(false) +@testset "callback" begin + global callback_called = Ref(false) - function test_callback(evt, status) - callback_called[] = true - end + function test_callback(evt, status) + callback_called[] = true + end - usr_evt = cl.UserEvent() + usr_evt = cl.UserEvent() - cl.enqueue_wait_for_events(usr_evt) + cl.enqueue_wait_for_events(usr_evt) - mkr_evt = cl.enqueue_marker() - cl.add_callback(mkr_evt, test_callback) + mkr_evt = cl.enqueue_marker() + cl.add_callback(mkr_evt, test_callback) - @test usr_evt.status == :submitted - @test mkr_evt.status in (:queued, :submitted) - @test !callback_called[] + @test usr_evt.status == :submitted + @test mkr_evt.status in (:queued, :submitted) + @test !callback_called[] - cl.complete(usr_evt) - @test usr_evt.status == :complete + cl.complete(usr_evt) + @test usr_evt.status == :complete - wait(mkr_evt) + wait(mkr_evt) - # Give callback some time to finish - yield() - sleep(0.5) + # Give callback some time to finish + yield() + sleep(0.5) - @test mkr_evt.status == :complete - @test callback_called[] - end + @test mkr_evt.status == :complete + @test callback_called[] end + end diff --git a/test/execution.jl b/test/execution.jl index e15a4349..1fd69d7a 100644 --- a/test/execution.jl +++ b/test/execution.jl @@ -1,6 +1,5 @@ using SPIRV_LLVM_Translator_jll - -@testset "execution" begin +using IOCapture @testset "@opencl" begin @@ -150,5 +149,3 @@ end @test occursin("target triple = \"spir64-unknown-unknown\"", llvm_backend_khronos) end end - -end diff --git a/test/intrinsics.jl b/test/intrinsics.jl index ad970c05..2f408fcb 100644 --- a/test/intrinsics.jl +++ b/test/intrinsics.jl @@ -12,8 +12,6 @@ end const float_types = filter(x -> x <: Base.IEEEFloat, GPUArraysTestSuite.supported_eltypes(CLArray)) const ispocl = cl.platform().name == "Portable Computing Language" -@testset "intrinsics" begin - @testset "barrier" begin @on_device barrier(OpenCL.LOCAL_MEM_FENCE) @@ -163,5 +161,3 @@ end end end - -end diff --git a/test/kernel.jl b/test/kernel.jl index e7033c8b..a938305b 100644 --- a/test/kernel.jl +++ b/test/kernel.jl @@ -1,163 +1,161 @@ -@testset "Kernel" begin - test_source = " - __kernel void sum(__global const float *a, - __global const float *b, - __global float *c, - const unsigned int count) - { - unsigned int gid = get_global_id(0); - if (gid < count) { - c[gid] = a[gid] + b[gid]; - } - } - " - - #TODO: tests for invalid kernel build error && logs... - - @testset "constructor" begin - prg = cl.Program(source=test_source) - @test_throws ArgumentError cl.Kernel(prg, "sum") - cl.build!(prg) - @test cl.Kernel(prg, "sum") != nothing - end +test_source = " +__kernel void sum(__global const float *a, + __global const float *b, + __global float *c, + const unsigned int count) +{ + unsigned int gid = get_global_id(0); + if (gid < count) { + c[gid] = a[gid] + b[gid]; + } +} +" + +#TODO: tests for invalid kernel build error && logs... + +@testset "constructor" begin + prg = cl.Program(source=test_source) + @test_throws ArgumentError cl.Kernel(prg, "sum") + cl.build!(prg) + @test cl.Kernel(prg, "sum") != nothing +end - @testset "info" begin - prg = cl.Program(source=test_source) - cl.build!(prg) - k = cl.Kernel(prg, "sum") - @test k.function_name == "sum" - @test k.num_args == 4 - @test k.reference_count > 0 - @test k.program == prg - @test typeof(k.attributes) == String - end +@testset "info" begin + prg = cl.Program(source=test_source) + cl.build!(prg) + k = cl.Kernel(prg, "sum") + @test k.function_name == "sum" + @test k.num_args == 4 + @test k.reference_count > 0 + @test k.program == prg + @test typeof(k.attributes) == String +end - @testset "mem/workgroup size" begin - prg = cl.Program(source=test_source) - cl.build!(prg) - k = cl.Kernel(prg, "sum") - wginfo = cl.work_group_info(k, cl.device()) - for sf in [:size, :compile_size, :local_mem_size, :private_mem_size, :prefered_size_multiple] - @test getproperty(wginfo, sf) != nothing - end +@testset "mem/workgroup size" begin + prg = cl.Program(source=test_source) + cl.build!(prg) + k = cl.Kernel(prg, "sum") + wginfo = cl.work_group_info(k, cl.device()) + for sf in [:size, :compile_size, :local_mem_size, :private_mem_size, :prefered_size_multiple] + @test getproperty(wginfo, sf) != nothing end +end - @testset "set_arg!/set_args!" begin - prg = cl.Program(source=test_source) |> cl.build! - k = cl.Kernel(prg, "sum") +@testset "set_arg!/set_args!" begin + prg = cl.Program(source=test_source) |> cl.build! + k = cl.Kernel(prg, "sum") - count = 1024 - nbytes = count * sizeof(Float32) + count = 1024 + nbytes = count * sizeof(Float32) - h_ones = ones(Float32, count) + h_ones = ones(Float32, count) - A = CLArray(h_ones) - B = CLArray(h_ones) - C = CLArray{Float32}(undef, count) + A = CLArray(h_ones) + B = CLArray(h_ones) + C = CLArray{Float32}(undef, count) - # we use julia's index by one convention - cl.set_arg!(k, 1, A.data[].mem) - cl.set_arg!(k, 2, B.data[].mem) - cl.set_arg!(k, 3, C.data[].mem) - cl.set_arg!(k, 4, UInt32(count)) + # we use julia's index by one convention + cl.set_arg!(k, 1, A.data[].mem) + cl.set_arg!(k, 2, B.data[].mem) + cl.set_arg!(k, 3, C.data[].mem) + cl.set_arg!(k, 4, UInt32(count)) - cl.enqueue_kernel(k, count) |> wait - r = Array(C) + cl.enqueue_kernel(k, count) |> wait + r = Array(C) - @test all(x -> x == 2.0, r) - cl.flush(cl.queue()) + @test all(x -> x == 2.0, r) + cl.flush(cl.queue()) - # test set_args with new kernel - k2 = cl.Kernel(prg, "sum") - cl.set_args!(k2, A.data[].mem, B.data[].mem, C.data[].mem, UInt32(count)) + # test set_args with new kernel + k2 = cl.Kernel(prg, "sum") + cl.set_args!(k2, A.data[].mem, B.data[].mem, C.data[].mem, UInt32(count)) - h_twos = fill(2f0, count) - copyto!(A, h_twos) - copyto!(B, h_twos) + h_twos = fill(2f0, count) + copyto!(A, h_twos) + copyto!(B, h_twos) - #TODO: check for ocl version, fill is opencl v1.2 - #cl.enqueue_fill(A, 2f0) - #cl.enqueue_fill(B, 2f0) + #TODO: check for ocl version, fill is opencl v1.2 + #cl.enqueue_fill(A, 2f0) + #cl.enqueue_fill(B, 2f0) - cl.enqueue_kernel(k, count) + cl.enqueue_kernel(k, count) - @test all(x -> x == 4.0, Array(C)) - end + @test all(x -> x == 4.0, Array(C)) +end - @testset "clcall" begin - simple_kernel = " - __kernel void test(__global float *i) { - *i += 1; - };" +@testset "clcall" begin + simple_kernel = " + __kernel void test(__global float *i) { + *i += 1; + };" - h_buff = Float32[1,] - d_arr = CLArray(h_buff) + h_buff = Float32[1,] + d_arr = CLArray(h_buff) - p = cl.Program(source=simple_kernel) |> cl.build! - k = cl.Kernel(p, "test") + p = cl.Program(source=simple_kernel) |> cl.build! + k = cl.Kernel(p, "test") - # dimensions must be the same size - @test_throws ArgumentError clcall(k, Tuple{CLPtr{Float32}}, d_arr; - global_size=(1,), local_size=(1,1)) - @test_throws ArgumentError clcall(k, Tuple{CLPtr{Float32}}, d_arr; - global_size=(1,1), local_size=(1,)) + # dimensions must be the same size + @test_throws ArgumentError clcall(k, Tuple{CLPtr{Float32}}, d_arr; + global_size=(1,), local_size=(1,1)) + @test_throws ArgumentError clcall(k, Tuple{CLPtr{Float32}}, d_arr; + global_size=(1,1), local_size=(1,)) - # dimensions are bounded - max_work_dim = cl.device().max_work_item_dims - bad = tuple([1 for _ in 1:(max_work_dim + 1)]) + # dimensions are bounded + max_work_dim = cl.device().max_work_item_dims + bad = tuple([1 for _ in 1:(max_work_dim + 1)]) - # calls are asynchronous, but cl.read blocks - clcall(k, Tuple{CLPtr{Float32}}, d_arr) - @test Array(d_arr) == [2f0] + # calls are asynchronous, but cl.read blocks + clcall(k, Tuple{CLPtr{Float32}}, d_arr) + @test Array(d_arr) == [2f0] - # enqueue task is an alias for calling - # a kernel with a global/local size of 1 - evt = cl.enqueue_task(k) - @test Array(d_arr) == [3f0] - end + # enqueue task is an alias for calling + # a kernel with a global/local size of 1 + evt = cl.enqueue_task(k) + @test Array(d_arr) == [3f0] +end - @testset "packed structures" begin - test_source = " - struct __attribute__((packed)) Test2{ - long f1; - int __attribute__((aligned (8))) f2; - }; - __kernel void structest(__global float *out, struct Test2 b){ - out[0] = b.f1; - out[1] = b.f2; - } - " - prg = cl.Program(source = test_source) - cl.build!(prg) - structkernel = cl.Kernel(prg, "structest") - out = CLArray{Float32}(undef, 2) - bstruct = (1, Int32(4)) - clcall(structkernel, Tuple{CLPtr{Float32}, Tuple{Int64, Cint}}, out, bstruct) - @test Array(out) == [1f0, 4f0] - end +@testset "packed structures" begin + test_source = " + struct __attribute__((packed)) Test2{ + long f1; + int __attribute__((aligned (8))) f2; + }; + __kernel void structest(__global float *out, struct Test2 b){ + out[0] = b.f1; + out[1] = b.f2; + } + " + prg = cl.Program(source = test_source) + cl.build!(prg) + structkernel = cl.Kernel(prg, "structest") + out = CLArray{Float32}(undef, 2) + bstruct = (1, Int32(4)) + clcall(structkernel, Tuple{CLPtr{Float32}, Tuple{Int64, Cint}}, out, bstruct) + @test Array(out) == [1f0, 4f0] +end - @testset "vector arguments" begin - test_source = " - __kernel void vec3_unpack(__global float *out, float3 a, float3 b) { - out[0] = a.x; - out[1] = a.y; - out[2] = a.z; - out[3] = b.x; - out[4] = b.y; - out[5] = b.z; - } - " - prg = cl.Program(source = test_source) - cl.build!(prg) - vec3kernel = cl.Kernel(prg, "vec3_unpack") - out = CLArray{Float32}(undef, 6) - # NOTE: the user is responsible for padding the vector to 4 elements - # (only on some platforms) - vec3_a = (1f0, 2f0, 3f0, 0f0) - vec3_b = (4f0, 5f0, 6f0, 0f0) - clcall( - vec3kernel, Tuple{CLPtr{Float32}, NTuple{4, Float32}, NTuple{4, Float32}}, - out, vec3_a, vec3_b) - @test Array(out) == [1f0, 2f0, 3f0, 4f0, 5f0, 6f0] - end +@testset "vector arguments" begin + test_source = " + __kernel void vec3_unpack(__global float *out, float3 a, float3 b) { + out[0] = a.x; + out[1] = a.y; + out[2] = a.z; + out[3] = b.x; + out[4] = b.y; + out[5] = b.z; + } + " + prg = cl.Program(source = test_source) + cl.build!(prg) + vec3kernel = cl.Kernel(prg, "vec3_unpack") + out = CLArray{Float32}(undef, 6) + # NOTE: the user is responsible for padding the vector to 4 elements + # (only on some platforms) + vec3_a = (1f0, 2f0, 3f0, 0f0) + vec3_b = (4f0, 5f0, 6f0, 0f0) + clcall( + vec3kernel, Tuple{CLPtr{Float32}, NTuple{4, Float32}, NTuple{4, Float32}}, + out, vec3_a, vec3_b) + @test Array(out) == [1f0, 2f0, 3f0, 4f0, 5f0, 6f0] end diff --git a/test/kernelabstractions.jl b/test/kernelabstractions.jl index debf5dec..97e54154 100644 --- a/test/kernelabstractions.jl +++ b/test/kernelabstractions.jl @@ -1,3 +1,17 @@ +# KernelAbstractions has a testsuite that isn't part of the main package. +# Include it directly. + +const KATestSuite = let + mod = @eval module $(gensym()) + using ..Test + import KernelAbstractions + kernelabstractions = pathof(KernelAbstractions) + kernelabstractions_root = dirname(dirname(kernelabstractions)) + include(joinpath(kernelabstractions_root, "test", "testsuite.jl")) + end + mod.Testsuite +end + skip_tests=Set([ "sparse", "Convert", # Need to opt out of i128 diff --git a/test/memory.jl b/test/memory.jl index 6adc081c..f352fd73 100644 --- a/test/memory.jl +++ b/test/memory.jl @@ -1,32 +1,30 @@ -@testset "Memory" begin - function create_test_buffer() - testarray = zeros(Float32, 1000) - cl.Buffer(testarray) - end +function create_test_buffer() + testarray = zeros(Float32, 1000) + cl.Buffer(testarray) +end - @testset "context" begin - buf = create_test_buffer() +@testset "context" begin + buf = create_test_buffer() - ctx = cl.context(buf) + ctx = cl.context(buf) - @test ctx != nothing - @test isequal(ctx, cl.context()) != nothing - end + @test ctx != nothing + @test isequal(ctx, cl.context()) != nothing +end - @testset "properties" begin - buf = create_test_buffer() +@testset "properties" begin + buf = create_test_buffer() - expectations = [ - (:type, cl.CL_MEM_OBJECT_BUFFER), - (:flags, (:rw, :copy)), - (:size, sizeof(buf)), - (:reference_count, 1), - (:map_count, 0) - ] + expectations = [ + (:type, cl.CL_MEM_OBJECT_BUFFER), + (:flags, (:rw, :copy)), + (:size, sizeof(buf)), + (:reference_count, 1), + (:map_count, 0) + ] - for expectation in expectations - prop, value = expectation - @test getproperty(buf, prop) == value - end + for expectation in expectations + prop, value = expectation + @test getproperty(buf, prop) == value end end diff --git a/test/platform.jl b/test/platform.jl index 5e1c95f4..b6f410d6 100644 --- a/test/platform.jl +++ b/test/platform.jl @@ -1,27 +1,25 @@ -@testset "Platform" begin - @testset "Info" begin - @test length(cl.platforms()) == cl.num_platforms() +@testset "Info" begin + @test length(cl.platforms()) == cl.num_platforms() - @test cl.platform() != nothing - @test pointer(cl.platform()) != C_NULL - @test cl.platform().opencl_version isa VersionNumber - end + @test cl.platform() != nothing + @test pointer(cl.platform()) != C_NULL + @test cl.platform().opencl_version isa VersionNumber +end - @testset "Equality" begin - platform = cl.platforms()[1] - platform_copy = cl.platforms()[1] +@testset "Equality" begin + platform = cl.platforms()[1] + platform_copy = cl.platforms()[1] - @test pointer(platform) == pointer(platform_copy) - @test hash(platform) == hash(platform_copy) - @test isequal(platform, platform) + @test pointer(platform) == pointer(platform_copy) + @test hash(platform) == hash(platform_copy) + @test isequal(platform, platform) - if length(cl.platforms()) > 1 - p1 = cl.platforms()[1] - for p2 in cl.platforms()[2:end] - @test pointer(p2) != pointer(p1) - @test hash(p2) != hash(p1) - @test !isequal(p2, p1) - end + if length(cl.platforms()) > 1 + p1 = cl.platforms()[1] + for p2 in cl.platforms()[2:end] + @test pointer(p2) != pointer(p1) + @test hash(p2) != hash(p1) + @test !isequal(p2, p1) end end end diff --git a/test/program.jl b/test/program.jl index 01c62839..b7d433b4 100644 --- a/test/program.jl +++ b/test/program.jl @@ -1,70 +1,68 @@ -@testset "Program" begin - let - @test_throws ArgumentError cl.Program() - @test_throws ArgumentError cl.Program(source="", il="") - end +let + @test_throws ArgumentError cl.Program() + @test_throws ArgumentError cl.Program(source="", il="") +end - test_source = " - __kernel void sum(__global const float *a, - __global const float *b, - __global float *c) - { - uint gid = get_global_id(0); - c[gid] = a[gid] + b[gid]; - } - " +test_source = " +__kernel void sum(__global const float *a, + __global const float *b, + __global float *c) +{ + uint gid = get_global_id(0); + c[gid] = a[gid] + b[gid]; +} +" - function create_test_program() - cl.Program(source=test_source) - end +function create_test_program() + cl.Program(source=test_source) +end - @testset "source constructor" begin - prg = cl.Program(source=test_source) - @test prg != nothing - end - @testset "info" begin - prg = cl.Program(source=test_source) +@testset "source constructor" begin + prg = cl.Program(source=test_source) + @test prg != nothing +end +@testset "info" begin + prg = cl.Program(source=test_source) - @test prg.context == cl.context() + @test prg.context == cl.context() - @test typeof(prg.devices) == Vector{cl.Device} - @test length(prg.devices) > 0 - @test cl.device() in prg.devices + @test typeof(prg.devices) == Vector{cl.Device} + @test length(prg.devices) > 0 + @test cl.device() in prg.devices - @test typeof(prg.source) == String - @test prg.source == test_source + @test typeof(prg.source) == String + @test prg.source == test_source - @test prg.reference_count > 0 - @test isempty(strip(prg.build_log[cl.device()])) - end + @test prg.reference_count > 0 + @test isempty(strip(prg.build_log[cl.device()])) +end - @testset "build" begin - prg = cl.Program(source=test_source) - @test cl.build!(prg) != nothing +@testset "build" begin + prg = cl.Program(source=test_source) + @test cl.build!(prg) != nothing - @test prg.build_status[cl.device()] == cl.CL_BUILD_SUCCESS - @test prg.build_log[cl.device()] isa String - end + @test prg.build_status[cl.device()] == cl.CL_BUILD_SUCCESS + @test prg.build_log[cl.device()] isa String +end - @testset "source code" begin - prg = cl.Program(source=test_source) - @test prg.source == test_source - end +@testset "source code" begin + prg = cl.Program(source=test_source) + @test prg.source == test_source +end - if contains(cl.platform().vendor, "pocl") - @warn "Skipping binary program tests on $(cl.platform().name)" - else - @testset "binaries" begin - prg = cl.Program(source=test_source) |> cl.build! +if contains(cl.platform().vendor, "pocl") + @warn "Skipping binary program tests on $(cl.platform().name)" +else + @testset "binaries" begin + prg = cl.Program(source=test_source) |> cl.build! - @test cl.device() in collect(keys(prg.binaries)) - binaries = prg.binaries - @test cl.device() in collect(keys(binaries)) - @test binaries[cl.device()] != nothing - @test length(binaries[cl.device()]) > 0 - prg2 = cl.Program(binaries=binaries) - @test prg2.binaries == binaries - @test prg2.source === nothing - end + @test cl.device() in collect(keys(prg.binaries)) + binaries = prg.binaries + @test cl.device() in collect(keys(binaries)) + @test binaries[cl.device()] != nothing + @test length(binaries[cl.device()]) > 0 + prg2 = cl.Program(binaries=binaries) + @test prg2.binaries == binaries + @test prg2.source === nothing end end diff --git a/test/runtests.jl b/test/runtests.jl index 6fd437f2..2e04653d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,413 +1,152 @@ -using Distributed -using Dates -import REPL -using Printf: @sprintf -using Base.Filesystem: path_separator +using ParallelTestRunner using Preferences +import OpenCL, pocl_jll +import Test -# parse some command-line arguments -function extract_flag!(args, flag, default=nothing) - for f in args - if startswith(f, flag) - # Check if it's just `--flag` or if it's `--flag=foo` - if f != flag - val = split(f, '=')[2] - if default !== nothing && !(typeof(default) <: AbstractString) - val = parse(typeof(default), val) - end - else - val = default - end - - # Drop this value from our args - filter!(x -> x != f, args) - return (true, val) - end - end - return (false, default) -end -do_help, _ = extract_flag!(ARGS, "--help") -if do_help - println(""" - Usage: runtests.jl [--help] [--list] [--jobs=N] [TESTS...] - - --help Show this text. - --list List all available tests. - --verbose Print more information during testing. - --quickfail Fail the entire run as soon as a single test errored. - --jobs=N Launch `N` processes to perform tests (default: Sys.CPU_THREADS). - --platform=NAME Run tests on the platform named `NAME` (default: all platforms). - - Remaining arguments filter the tests that will be executed.""") - exit(0) -end -_, jobs = extract_flag!(ARGS, "--jobs", Sys.CPU_THREADS) -do_verbose, _ = extract_flag!(ARGS, "--verbose") -do_quickfail, _ = extract_flag!(ARGS, "--quickfail") - -include("setup.jl") # make sure everything is precompiled @info "System information:\n" * sprint(io->OpenCL.versioninfo(io)) -@info "Running $jobs tests in parallel. If this is too many, specify the `--jobs` argument to the tests, or set the JULIA_CPU_THREADS environment variable." - -# choose tests -const tests = [] -const test_runners = Dict() -## files in the test folder -for (rootpath, dirs, files) in walkdir(@__DIR__) - # find Julia files - filter!(files) do file - endswith(file, ".jl") && file !== "setup.jl" && file !== "runtests.jl" - end - isempty(files) && continue - - # strip extension - files = map(files) do file - file[1:end-3] - end - - # prepend subdir - subdir = relpath(rootpath, @__DIR__) - if subdir != "." - files = map(files) do file - joinpath(subdir, file) +## custom arguments +args = parse_args(ARGS; custom=["platform"]) + +# determine tests to run +const testsuite = find_tests(pwd()) +## GPUArrays test suite: not part of the main package +const GPUArraysTestSuite = let + mod = @eval module $(gensym()) + using ..Test + import GPUArrays + gpuarrays = pathof(GPUArrays) + gpuarrays_root = dirname(dirname(gpuarrays)) + include(joinpath(gpuarrays_root, "test", "testsuite.jl")) end - end - - # unify path separators - files = map(files) do file - replace(file, path_separator => '/') - end - - append!(tests, files) - for file in files - test_runners[file] = ()->include("$(@__DIR__)/$file.jl") - end + mod.TestSuite end -sort!(tests; by=(file)->stat("$(@__DIR__)/$file.jl").size, rev=true) -## GPUArrays testsuite for name in keys(GPUArraysTestSuite.tests) - push!(tests, "gpuarrays/$name") - test_runners["gpuarrays/$name"] = ()->GPUArraysTestSuite.tests[name](CLArray) -end -## finalize -unique!(tests) - -# parse some more command-line arguments -## --list to list all available tests -do_list, _ = extract_flag!(ARGS, "--list") -if do_list - println("Available tests:") - for test in sort(tests) - println(" - $test") - end - exit(0) + test = "gpuarrays/$name" + testsuite[test] = :(GPUArraysTestSuite.tests[$name](CLArray)) end -## --platform selector -do_platform, platform = extract_flag!(ARGS, "--platform", nothing) -## no options should remain -optlike_args = filter(startswith("-"), ARGS) -if !isempty(optlike_args) - error("Unknown test options `$(join(optlike_args, " "))` (try `--help` for usage instructions)") -end -## the remaining args filter tests -if isempty(ARGS) - # default to running all tests, except: - filter!(tests) do test - if load_preference(OpenCL, "default_memory_backend") == "svm" && - test == "gpuarrays/indexing scalar" +## filter +if filter_tests!(testsuite, args) + if load_preference(OpenCL, "default_memory_backend") == "svm" # GPUArrays' scalar indexing tests assume that indexing is not supported + delete!(testsuite, "gpuarrays/indexing scalar") return false end - - return true - end -else - filter!(tests) do test - any(arg->startswith(test, arg), ARGS) - end end -# add workers -const test_exeflags = Base.julia_cmd() -filter!(test_exeflags.exec) do c - return !(startswith(c, "--depwarn") || startswith(c, "--check-bounds")) -end -push!(test_exeflags.exec, "--check-bounds=yes") -push!(test_exeflags.exec, "--startup-file=no") -push!(test_exeflags.exec, "--depwarn=yes") -push!(test_exeflags.exec, "--project=$(Base.active_project())") -const test_exename = popfirst!(test_exeflags.exec) -function addworker(X; kwargs...) - withenv("JULIA_NUM_THREADS" => 1, "OPENBLAS_NUM_THREADS" => 1) do - procs = addprocs(X; exename=test_exename, exeflags=test_exeflags, kwargs...) - @everywhere procs include($(joinpath(@__DIR__, "setup.jl"))) - procs +# wrap tests in device loops +function generate_test(test, expr) + # some tests require native execution capabilities + requires_il = test in ["atomics", "execution", "intrinsics", "kernelabstractions", + "statistics", "linalg", ] || + startswith(test, "gpuarrays/") + + # targets is a global variable that is defined in init_code + return quote + if isempty(targets) + platform_filter = $(args.custom["platform"]) + for platform in cl.platforms(), + device in cl.devices(platform) + if platform_filter !== nothing + # filter on the name or vendor + names = lowercase.([platform.name, platform.vendor]) + if !any(contains(platform_filter.value), names) + continue + end + end + push!(targets, (; platform, device)) + end + if isempty(targets) + if platform_filter !== nothing + throw(ArgumentError("No OpenCL platforms found")) + else + throw(ArgumentError("No OpenCL platforms found matching $(platform_filter.value)")) + end + end + end + + @testset "$(device.name)" for (; platform, device) in targets + cl.platform!(platform) + cl.device!(device) + + if !$(requires_il) || "cl_khr_il_program" in device.extensions + $(expr) + end + end end end -addworker(min(jobs, length(tests))) - -# pretty print information about gc and mem usage -testgroupheader = "Test" -workerheader = "(Worker)" -name_align = maximum([textwidth(testgroupheader) + textwidth(" ") + - textwidth(workerheader); map(x -> textwidth(x) + - 3 + ndigits(nworkers()), tests)]) -elapsed_align = textwidth("Time (s)") -gc_align = textwidth("GC (s)") -percent_align = textwidth("GC %") -alloc_align = textwidth("Alloc (MB)") -rss_align = textwidth("RSS (MB)") -printstyled(" "^(name_align + textwidth(testgroupheader) - 3), " | ") -printstyled(" | ---------------- CPU ---------------- |\n", color=:white) -printstyled(testgroupheader, color=:white) -printstyled(lpad(workerheader, name_align - textwidth(testgroupheader) + 1), " | ", color=:white) -printstyled("Time (s) | GC (s) | GC % | Alloc (MB) | RSS (MB) |\n", color=:white) -print_lock = stdout isa Base.LibuvStream ? stdout.lock : ReentrantLock() -if stderr isa Base.LibuvStream - stderr.lock = print_lock +for test in keys(testsuite) + testsuite[test] = generate_test(test, testsuite[test]) end -function print_testworker_stats(test, wrkr, resp) - @nospecialize resp - lock(print_lock) - try - printstyled(test, color=:white) - printstyled(lpad("($wrkr)", name_align - textwidth(test) + 1, " "), " | ", color=:white) - time_str = @sprintf("%7.2f",resp[2]) - printstyled(lpad(time_str, elapsed_align, " "), " | ", color=:white) - cpu_gc_str = @sprintf("%5.2f", resp[4]) - printstyled(lpad(cpu_gc_str, gc_align, " "), " | ", color=:white) - # since there may be quite a few digits in the percentage, - # the left-padding here is less to make sure everything fits - cpu_percent_str = @sprintf("%4.1f", 100 * resp[4] / resp[2]) - printstyled(lpad(cpu_percent_str, percent_align, " "), " | ", color=:white) - cpu_alloc_str = @sprintf("%5.2f", resp[3] / 2^20) - printstyled(lpad(cpu_alloc_str, alloc_align, " "), " | ", color=:white) +const init_code = quote + using OpenCL, pocl_jll - cpu_rss_str = @sprintf("%5.2f", resp[6] / 2^20) - printstyled(lpad(cpu_rss_str, rss_align, " "), " |\n", color=:white) - finally - unlock(print_lock) - end -end -global print_testworker_started = (name, wrkr)->begin - if do_verbose - lock(print_lock) - try - printstyled(name, color=:white) - printstyled(lpad("($wrkr)", name_align - textwidth(name) + 1, " "), " |", - " "^elapsed_align, "started at $(now())\n", color=:white) - finally - unlock(print_lock) + OpenCL.allowscalar(false) + const targets = [] + + # GPUArrays has a testsuite that isn't part of the main package. + # Include it directly. + const GPUArraysTestSuite = let + mod = @eval module $(gensym()) + using ..Test + import GPUArrays + gpuarrays = pathof(GPUArrays) + gpuarrays_root = dirname(dirname(gpuarrays)) + include(joinpath(gpuarrays_root, "test", "testsuite.jl")) end + mod.TestSuite end -end -function print_testworker_errored(name, wrkr) - lock(print_lock) - try - printstyled(name, color=:red) - printstyled(lpad("($wrkr)", name_align - textwidth(name) + 1, " "), " |", - " "^elapsed_align, " failed at $(now())\n", color=:red) - finally - unlock(print_lock) - end -end -# run tasks -t0 = now() -results = [] -all_tasks = Task[] -all_tests = copy(tests) -try - # Monitor stdin and kill this task on ^C - # but don't do this on Windows, because it may deadlock in the kernel - t = current_task() - running_tests = Dict{String, DateTime}() - if !Sys.iswindows() && isa(stdin, Base.TTY) - stdin_monitor = @async begin - term = REPL.Terminals.TTYTerminal("xterm", stdin, stdout, stderr) - try - REPL.Terminals.raw!(term, true) - while true - c = read(term, Char) - if c == '\x3' - Base.throwto(t, InterruptException()) - break - elseif c == '?' - println("Currently running: ") - tests = sort(collect(running_tests), by=x->x[2]) - foreach(tests) do (test, date) - println(test, " (running for ", round(now()-date, Minute), ")") - end - end - end - catch e - isa(e, InterruptException) || rethrow() - finally - REPL.Terminals.raw!(term, false) + const device_eltypes = Dict() + function GPUArraysTestSuite.supported_eltypes(::Type{<:CLArray}) + get!(device_eltypes, cl.device()) do + types = [Int16, Int32, Int64, + Complex{Int16}, Complex{Int32}, Complex{Int64}, + Float32, ComplexF32] + if "cl_khr_fp64" in cl.device().extensions + push!(types, Float64) + push!(types, ComplexF64) end + if "cl_khr_fp16" in cl.device().extensions + push!(types, Float16) + push!(types, ComplexF16) + end + return types end end - @sync begin - function recycle_worker(p) - rmprocs(p, waitfor=30) - return nothing - end - - for p in workers() - @async begin - push!(all_tasks, current_task()) - while length(tests) > 0 - test = popfirst!(tests) - - # sometimes a worker failed, and we need to spawn a new one - if p === nothing - p = addworker(1)[1] - end - wrkr = p - - local resp - # run the test - running_tests[test] = now() - try - resp = remotecall_fetch(runtests, wrkr, - test_runners[test], test, - platform) - catch e - isa(e, InterruptException) && return - resp = Any[e] - end - delete!(running_tests, test) - push!(results, (test, resp)) + testf(f, xs...; kwargs...) = GPUArraysTestSuite.compare(f, CLArray, xs...; kwargs...) - # act on the results - if resp[1] isa Exception - print_testworker_errored(test, wrkr) - do_quickfail && Base.throwto(t, InterruptException()) + ## auxiliary stuff - # the worker encountered some failure, recycle it - # so future tests get a fresh environment - p = recycle_worker(p) - else - print_testworker_stats(test, wrkr, resp) + # Run some code on-device + macro on_device(ex...) + code = ex[end] + kwargs = ex[1:end-1] - compilations = resp[7] - if Sys.iswindows() && compilations > 100 - # XXX: restart to avoid handle exhaustion - # (see pocl/pocl#1941) - @warn "Restarting worker $wrkr to avoid handle exhaustion" - p = recycle_worker(p) - end - end + @gensym kernel + esc(quote + let + function $kernel() + $code + return end - if p !== nothing - recycle_worker(p) - end - end - end - end -catch e - isa(e, InterruptException) || rethrow() - # If the test suite was merely interrupted, still print the - # summary, which can be useful to diagnose what's going on - foreach(task -> begin - istaskstarted(task) || return - istaskdone(task) && return - try - schedule(task, InterruptException(); error=true) - catch ex - @error "InterruptException" exception=ex,catch_backtrace() + @opencl $(kwargs...) $kernel() + cl.finish(cl.queue()) end - end, all_tasks) - for t in all_tasks - # NOTE: we can't just wait, but need to discard the exception, - # because the throwto for --quickfail also kills the worker. - try - wait(t) - catch e - showerror(stderr, e) - end - end -finally - if @isdefined stdin_monitor - schedule(stdin_monitor, InterruptException(); error=true) + end) end end -t1 = now() -elapsed = canonicalize(Dates.CompoundPeriod(t1-t0)) -println("Testing finished in $elapsed") -# construct a testset to render the test results -o_ts = Test.DefaultTestSet("Overall") -Test.push_testset(o_ts) -completed_tests = Set{String}() -for (testname, (resp,)) in results - push!(completed_tests, testname) - if isa(resp, Test.DefaultTestSet) - Test.push_testset(resp) - Test.record(o_ts, resp) - Test.pop_testset() - elseif isa(resp, Tuple{Int,Int}) - fake = Test.DefaultTestSet(testname) - for i in 1:resp[1] - Test.record(fake, Test.Pass(:test, nothing, nothing, nothing, nothing)) - end - for i in 1:resp[2] - Test.record(fake, Test.Broken(:test, nothing)) - end - Test.push_testset(fake) - Test.record(o_ts, fake) - Test.pop_testset() - elseif isa(resp, RemoteException) && isa(resp.captured.ex, Test.TestSetException) - println("Worker $(resp.pid) failed running test $(testname):") - Base.showerror(stdout, resp.captured) - println() - fake = Test.DefaultTestSet(testname) - for i in 1:resp.captured.ex.pass - Test.record(fake, Test.Pass(:test, nothing, nothing, nothing, nothing)) - end - for i in 1:resp.captured.ex.broken - Test.record(fake, Test.Broken(:test, nothing)) - end - for t in resp.captured.ex.errors_and_fails - Test.record(fake, t) - end - Test.push_testset(fake) - Test.record(o_ts, fake) - Test.pop_testset() +# avoid handle exhaustion on Windows by running each test in a separate process (pocl/pocl#1941) +function test_worker(test) + if Sys.iswindows() + addworker() else - if !isa(resp, Exception) - resp = ErrorException(string("Unknown result type : ", typeof(resp))) - end - # If this test raised an exception that is not a remote testset exception, - # i.e. not a RemoteException capturing a TestSetException that means - # the test runner itself had some problem, so we may have hit a segfault, - # deserialization errors or something similar. Record this testset as Errored. - fake = Test.DefaultTestSet(testname) - Test.record(fake, Test.Error(:nontest_error, testname, nothing, Any[(resp, [])], LineNumberNode(1))) - Test.push_testset(fake) - Test.record(o_ts, fake) - Test.pop_testset() + nothing end end -for test in all_tests - (test in completed_tests) && continue - fake = Test.DefaultTestSet(test) - Test.record(fake, Test.Error(:test_interrupted, test, nothing, - [("skipped", [])], LineNumberNode(1))) - Test.push_testset(fake) - Test.record(o_ts, fake) - Test.pop_testset() -end -println() -Test.print_test_results(o_ts, 1) -if !o_ts.anynonpass - println(" \033[32;1mSUCCESS\033[0m") -else - println(" \033[31;1mFAILURE\033[0m\n") - Test.print_test_errors(o_ts) - throw(Test.FallbackTestSetException("Test run finished with errors")) -end + +runtests(OpenCL, args; testsuite, init_code, test_worker) diff --git a/test/setup.jl b/test/setup.jl deleted file mode 100644 index 066424c6..00000000 --- a/test/setup.jl +++ /dev/null @@ -1,166 +0,0 @@ -using Distributed, Test -using OpenCL, pocl_jll -using IOCapture - -# KernelAbstractions has a testsuite that isn't part of the main package. -# Include it directly. -const KATestSuite = let - mod = @eval module $(gensym()) - using ..Test - import KernelAbstractions - kernelabstractions = pathof(KernelAbstractions) - kernelabstractions_root = dirname(dirname(kernelabstractions)) - include(joinpath(kernelabstractions_root, "test", "testsuite.jl")) - end - mod.Testsuite -end - -# GPUArrays has a testsuite that isn't part of the main package. -# Include it directly. -const GPUArraysTestSuite = let - mod = @eval module $(gensym()) - using ..Test - import GPUArrays - gpuarrays = pathof(GPUArrays) - gpuarrays_root = dirname(dirname(gpuarrays)) - include(joinpath(gpuarrays_root, "test", "testsuite.jl")) - end - mod.TestSuite -end -testf(f, xs...; kwargs...) = GPUArraysTestSuite.compare(f, CLArray, xs...; kwargs...) - -const device_eltypes = Dict() -function GPUArraysTestSuite.supported_eltypes(::Type{<:CLArray}) - get!(device_eltypes, cl.device()) do - types = [Int16, Int32, Int64, - Complex{Int16}, Complex{Int32}, Complex{Int64}, - Float32, ComplexF32] - if "cl_khr_fp64" in cl.device().extensions - push!(types, Float64) - push!(types, ComplexF64) - end - if "cl_khr_fp16" in cl.device().extensions - push!(types, Float16) - push!(types, ComplexF16) - end - return types - end -end - -using Random - - -## entry point - -const targets = [] - -function runtests(f, name, platform_filter) - old_print_setting = Test.TESTSET_PRINT_ENABLE[] - Test.TESTSET_PRINT_ENABLE[] = false - - if isempty(targets) - for platform in cl.platforms(), - device in cl.devices(platform) - if platform_filter !== nothing - # filter on the name or vendor - names = lowercase.([platform.name, platform.vendor]) - if !any(contains(platform_filter), names) - continue - end - end - push!(targets, (; platform, device)) - end - if isempty(targets) - if platform_filter === nothing - throw(ArgumentError("No OpenCL platforms found")) - else - throw(ArgumentError("No OpenCL platforms found matching $platform_filter")) - end - end - end - - try - # generate a temporary module to execute the tests in - mod_name = Symbol("Test", rand(1:100), "Main_", replace(name, '/' => '_')) - mod = @eval(Main, module $mod_name end) - @eval(mod, using Test, Random, OpenCL) - - let id = myid() - wait(@spawnat 1 print_testworker_started(name, id)) - end - - # some tests require native execution capabilities - requires_il = name in ["atomics", "execution", "intrinsics", "kernelabstractions"] || - startswith(name, "gpuarrays/") - - ex = quote - GC.gc(true) - Random.seed!(1) - OpenCL.allowscalar(false) - - @timed @testset $"$name" begin - @testset "\$(device.name)" for (; platform, device) in $targets - cl.platform!(platform) - cl.device!(device) - - if !$requires_il || "cl_khr_il_program" in device.extensions - $f() - end - end - end - end - data = Core.eval(mod, ex) - #data[1] is the testset - - # process results - cpu_rss = Sys.maxrss() - compilations = OpenCL.compilations[] - if VERSION >= v"1.11.0-DEV.1529" - tc = Test.get_test_counts(data[1]) - passes,fails,error,broken,c_passes,c_fails,c_errors,c_broken = - tc.passes, tc.fails, tc.errors, tc.broken, tc.cumulative_passes, - tc.cumulative_fails, tc.cumulative_errors, tc.cumulative_broken - else - passes,fails,errors,broken,c_passes,c_fails,c_errors,c_broken = - Test.get_test_counts(data[1]) - end - if data[1].anynonpass == false - data = ((passes+c_passes,broken+c_broken), - data[2], - data[3], - data[4], - data[5]) - end - res = vcat(collect(data), cpu_rss, compilations) - - GC.gc(true) - res - finally - Test.TESTSET_PRINT_ENABLE[] = old_print_setting - end -end - - -## auxiliary stuff - -# Run some code on-device -macro on_device(ex...) - code = ex[end] - kwargs = ex[1:end-1] - - @gensym kernel - esc(quote - let - function $kernel() - $code - return - end - - @opencl $(kwargs...) $kernel() - cl.finish(cl.queue()) - end - end) -end - - -nothing # File is loaded via a remotecall to "include". Ensure it returns "nothing".