diff --git a/.github/workflows/Test.yml b/.github/workflows/Test.yml index 6ef443b6..1e6fcd9b 100644 --- a/.github/workflows/Test.yml +++ b/.github/workflows/Test.yml @@ -38,6 +38,36 @@ jobs: token: ${{ secrets.CODECOV_TOKEN }} fail_ci_if_error: false files: lcov.info + ka: + name: KA 0.10 Julia latest - ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macOS-latest, windows-latest] + steps: + - uses: actions/checkout@v5 + - uses: julia-actions/setup-julia@v2 + with: + version: '1' + - uses: julia-actions/cache@v2 + - name: Develop subpackages + run: | + julia --project -e ' + using Pkg + Pkg.develop([PackageSpec(; name=basename(path), path) for path in ARGS]) + Pkg.add(url="https://github.com/JuliaGPU/KernelAbstractions.jl", rev="main") + ' lib/GPUArraysCore lib/JLArrays + - uses: julia-actions/julia-runtest@v1 + continue-on-error: true + - uses: julia-actions/julia-processcoverage@v1 + with: + directories: src,lib + - uses: codecov/codecov-action@v5 + with: + token: ${{ secrets.CODECOV_TOKEN }} + fail_ci_if_error: false + files: lcov.info opencl: name: OpenCL.jl runs-on: ubuntu-latest diff --git a/Project.toml b/Project.toml index ed415513..bbbb3db4 100644 --- a/Project.toml +++ b/Project.toml @@ -25,7 +25,7 @@ JLD2Ext = "JLD2" Adapt = "4.0" GPUArraysCore = "= 0.2.0" JLD2 = "0.4, 0.5, 0.6" -KernelAbstractions = "0.9.28" +KernelAbstractions = "0.9.28, 0.10" LLVM = "3.9, 4, 5, 6, 7, 8, 9" LinearAlgebra = "1" Printf = "1" diff --git a/lib/JLArrays/Project.toml b/lib/JLArrays/Project.toml index 55d9eb90..c1e71569 100644 --- a/lib/JLArrays/Project.toml +++ b/lib/JLArrays/Project.toml @@ -1,7 +1,7 @@ name = "JLArrays" uuid = "27aeb0d3-9eb9-45fb-866b-73c2ecf80fcb" authors = ["Tim Besard "] -version = "0.2.0" +version = "0.2.1" [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" @@ -12,6 +12,6 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" [compat] Adapt = "2.0, 3.0, 4.0" GPUArrays = "11.1" -KernelAbstractions = "0.9" +KernelAbstractions = "0.9, 0.10" Random = "1" julia = "1.8" diff --git a/lib/JLArrays/src/JLArrays.jl b/lib/JLArrays/src/JLArrays.jl index 6e5d38df..4b238fa0 100644 --- a/lib/JLArrays/src/JLArrays.jl +++ b/lib/JLArrays/src/JLArrays.jl @@ -15,6 +15,10 @@ using Adapt import KernelAbstractions import KernelAbstractions: Adapt, StaticArrays, Backend, Kernel, StaticSize, DynamicSize, partition, blocks, workitems, launch_config +@static if isdefined(JLArrays.KernelAbstractions, :POCL) # KA v0.10 + import KernelAbstractions: POCL +end + # # Device functionality @@ -40,30 +44,30 @@ Adapt.adapt_structure(to::Adaptor, r::Base.RefValue) = JlRefValue(adapt(to, r[]) ## executed on-device # array type +@static if !isdefined(JLArrays.KernelAbstractions, :POCL) # KA v0.9 + struct JLDeviceArray{T, N} <: AbstractDeviceArray{T, N} + data::Vector{UInt8} + offset::Int + dims::Dims{N} + end -struct JLDeviceArray{T, N} <: AbstractDeviceArray{T, N} - data::Vector{UInt8} - offset::Int - dims::Dims{N} -end + Base.elsize(::Type{<:JLDeviceArray{T}}) where {T} = sizeof(T) -Base.elsize(::Type{<:JLDeviceArray{T}}) where {T} = sizeof(T) + Base.size(x::JLDeviceArray) = x.dims + Base.sizeof(x::JLDeviceArray) = Base.elsize(x) * length(x) -Base.size(x::JLDeviceArray) = x.dims -Base.sizeof(x::JLDeviceArray) = Base.elsize(x) * length(x) + Base.unsafe_convert(::Type{Ptr{T}}, x::JLDeviceArray{T}) where {T} = + convert(Ptr{T}, pointer(x.data)) + x.offset*Base.elsize(x) -Base.unsafe_convert(::Type{Ptr{T}}, x::JLDeviceArray{T}) where {T} = - convert(Ptr{T}, pointer(x.data)) + x.offset*Base.elsize(x) + # conversion of untyped data to a typed Array + function typed_data(x::JLDeviceArray{T}) where {T} + unsafe_wrap(Array, pointer(x), x.dims) + end -# conversion of untyped data to a typed Array -function typed_data(x::JLDeviceArray{T}) where {T} - unsafe_wrap(Array, pointer(x), x.dims) + @inline Base.getindex(A::JLDeviceArray, index::Integer) = getindex(typed_data(A), index) + @inline Base.setindex!(A::JLDeviceArray, x, index::Integer) = setindex!(typed_data(A), x, index) end -@inline Base.getindex(A::JLDeviceArray, index::Integer) = getindex(typed_data(A), index) -@inline Base.setindex!(A::JLDeviceArray, x, index::Integer) = setindex!(typed_data(A), x, index) - - # # Host abstractions # @@ -236,7 +240,7 @@ Base.convert(::Type{T}, x::T) where T <: JLArray = x ## broadcast -using Base.Broadcast: BroadcastStyle, Broadcasted +import Base.Broadcast: BroadcastStyle, Broadcasted struct JLArrayStyle{N} <: AbstractGPUArrayStyle{N} end JLArrayStyle{M}(::Val{N}) where {N,M} = JLArrayStyle{N}() @@ -335,8 +339,15 @@ end ## GPUArrays interfaces -Adapt.adapt_storage(::Adaptor, x::JLArray{T,N}) where {T,N} = - JLDeviceArray{T,N}(x.data[], x.offset, x.dims) +@static if !isdefined(JLArrays.KernelAbstractions, :POCL) # KA v0.9 + Adapt.adapt_storage(::Adaptor, x::JLArray{T,N}) where {T,N} = + JLDeviceArray{T,N}(x.data[], x.offset, x.dims) +else + function Adapt.adapt_storage(::Adaptor, x::JLArray{T,N}) where {T,N} + arr = typed_data(x) + Adapt.adapt_storage(POCL.KernelAdaptor([pointer(arr)]), arr) + end +end function GPUArrays.mapreducedim!(f, op, R::AnyJLArray, A::Union{AbstractArray,Broadcast.Broadcasted}; init=nothing) @@ -377,10 +388,18 @@ KernelAbstractions.allocate(::JLBackend, ::Type{T}, dims::Tuple) where T = JLArr return ndrange, workgroupsize, iterspace, dynamic end -KernelAbstractions.isgpu(b::JLBackend) = false +@static if isdefined(JLArrays.KernelAbstractions, :isgpu) # KA v0.9 + KernelAbstractions.isgpu(b::JLBackend) = false +end -function convert_to_cpu(obj::Kernel{JLBackend, W, N, F}) where {W, N, F} - return Kernel{typeof(KernelAbstractions.CPU(; static = obj.backend.static)), W, N, F}(KernelAbstractions.CPU(; static = obj.backend.static), obj.f) +@static if !isdefined(JLArrays.KernelAbstractions, :POCL) # KA v0.9 + function convert_to_cpu(obj::Kernel{JLBackend, W, N, F}) where {W, N, F} + return Kernel{typeof(KernelAbstractions.CPU(; static = obj.backend.static)), W, N, F}(KernelAbstractions.CPU(; static = obj.backend.static), obj.f) + end +else + function convert_to_cpu(obj::Kernel{JLBackend, W, N, F}) where {W, N, F} + return Kernel{typeof(KernelAbstractions.POCLBackend()), W, N, F}(KernelAbstractions.POCLBackend(), obj.f) + end end function (obj::Kernel{JLBackend})(args...; ndrange=nothing, workgroupsize=nothing) @@ -391,6 +410,11 @@ end Adapt.adapt_storage(::JLBackend, a::Array) = Adapt.adapt(JLArrays.JLArray, a) Adapt.adapt_storage(::JLBackend, a::JLArrays.JLArray) = a -Adapt.adapt_storage(::KernelAbstractions.CPU, a::JLArrays.JLArray) = convert(Array, a) + +@static if !isdefined(JLArrays.KernelAbstractions, :POCL) # KA v0.9 + Adapt.adapt_storage(::KernelAbstractions.CPU, a::JLArrays.JLArray) = convert(Array, a) +else + Adapt.adapt_storage(::KernelAbstractions.POCLBackend, a::JLArrays.JLArray) = convert(Array, a) +end end diff --git a/test/setup.jl b/test/setup.jl index e0f233d6..aa1c12e5 100644 --- a/test/setup.jl +++ b/test/setup.jl @@ -2,6 +2,12 @@ using Distributed, Test, JLArrays include("testsuite.jl") +# Disable Float16-related tests until JuliaGPU/KernelAbstractions#600 is resolved +@static if isdefined(JLArrays.KernelAbstractions, :POCL) + TestSuite.supported_eltypes(::Type{<:JLArray}) = + setdiff(TestSuite.supported_eltypes(), [Float16, ComplexF16]) +end + using Random if VERSION >= v"1.13.0-DEV.1044"