Merge pull request #237 from JuliaGPU/tb/from_cuarrays_with_love

maleadt · web-flow · commit a89fa3c84e97 · 2020-01-29T13:24:32.000+01:00
Port functionality from CuArrays
diff --git a/src/host/base.jl b/src/host/base.jl
@@ -53,3 +53,13 @@ function Base.repeat(a::AbstractGPUVector, m::Int)
     end
     return b
 end
+
+## PermutedDimsArrays
+
+using Base: PermutedDimsArrays
+
+# PermutedDimsArrays' custom copyto! doesn't know how to deal with GPU arrays
+function PermutedDimsArrays._copy!(dest::PermutedDimsArray{T,N,<:Any,<:Any,<:AbstractGPUArray}, src) where {T,N}
+    dest .= src
+    dest
+end
diff --git a/src/host/indexing.jl b/src/host/indexing.jl
@@ -5,39 +5,72 @@ export allowscalar, @allowscalar, assertscalar
 
 # mechanism to disallow scalar operations
 
-const scalar_allowed = Ref(true)
+@enum ScalarIndexing ScalarAllowed ScalarWarned ScalarDisallowed
+
+const scalar_allowed = Ref(ScalarWarned)
 const scalar_warned = Ref(false)
 
-function allowscalar(flag = true)
-    scalar_allowed[] = flag
+"""
+    allowscalar(allow=true, warn=true)
+
+Configure whether scalar indexing is allowed depending on the value of `allow`.
+
+If allowed, `warn` can be set to throw a single warning instead. Calling this function will
+reset the state of the warning, and throw a new warning on subsequent scalar iteration.
+"""
+function allowscalar(allow::Bool=true, warn::Bool=true)
     scalar_warned[] = false
+    scalar_allowed[] = if allow && !warn
+        ScalarAllowed
+    elseif allow
+        ScalarWarned
+    else
+        ScalarDisallowed
+    end
     return
 end
 
+"""
+    assertscalar(op::String)
+
+Assert that a certain operation `op` performs scalar indexing. If this is not allowed, an
+error will be thrown ([`allowscalar`](@ref)).
+"""
 function assertscalar(op = "operation")
-    if !scalar_allowed[]
+    if scalar_allowed[] == ScalarDisallowed
         error("$op is disallowed")
-    elseif !scalar_warned[]
+    elseif scalar_allowed[] == ScalarWarned && !scalar_warned[]
         @warn "Performing scalar operations on GPU arrays: This is very slow, consider disallowing these operations with `allowscalar(false)`"
         scalar_warned[] = true
     end
     return
 end
 
+"""
+    @allowscalar ex...
+    @disallowscalar ex...
+
+Temporarily allow or disallow scalar iteration.
+
+Note that this functionality is intended for functionality that is known and allowed to use
+scalar iteration (or not), i.e., there is no option to throw a warning. Only use this on
+fine-grained expressions.
+"""
 macro allowscalar(ex)
     quote
         local prev = scalar_allowed[]
-        scalar_allowed[] = true
+        scalar_allowed[] = ScalarAllowed
         local ret = $(esc(ex))
         scalar_allowed[] = prev
         ret
     end
 end
 
+@doc (@doc @allowscalar) ->
 macro disallowscalar(ex)
     quote
         local prev = scalar_allowed[]
-        scalar_allowed[] = false
+        scalar_allowed[] = ScalarDisallowed
         local ret = $(esc(ex))
         scalar_allowed[] = prev
         ret
diff --git a/src/host/linalg.jl b/src/host/linalg.jl
@@ -1,26 +1,25 @@
 # integration with LinearAlgebra stdlib
 
-function LinearAlgebra.transpose!(At::AbstractGPUArray{T, 2}, A::AbstractGPUArray{T, 2}) where T
+## transpose and adjoint
+
+function transpose_f!(f, At::AbstractGPUArray{T, 2}, A::AbstractGPUArray{T, 2}) where T
     gpu_call(At, A) do ctx, At, A
         idx = @cartesianidx A ctx
-        @inbounds At[idx[2], idx[1]] = A[idx[1], idx[2]]
+        @inbounds At[idx[2], idx[1]] = f(A[idx[1], idx[2]])
         return
     end
     At
 end
 
-function genperm(I::CartesianIndex{N}, perm::NTuple{N}) where N
-    CartesianIndex(ntuple(d-> (@inbounds return I[perm[d]]), Val(N)))
+LinearAlgebra.transpose!(At::AbstractGPUArray, A::AbstractGPUArray) = transpose_f!(transpose, At, A)
+LinearAlgebra.adjoint!(At::AbstractGPUArray, A::AbstractGPUArray) = transpose_f!(adjoint, At, A)
+
+function Base.copyto!(A::AbstractGPUArray, B::Adjoint{T, <: AbstractGPUArray}) where T
+    adjoint!(A, B.parent)
 end
 
-function LinearAlgebra.permutedims!(dest::AbstractGPUArray, src::AbstractGPUArray, perm) where N
-    perm isa Tuple || (perm = Tuple(perm))
-    gpu_call(dest, src, perm) do ctx, dest, src, perm
-        I = @cartesianidx src ctx
-        @inbounds dest[genperm(I, perm)] = src[I]
-        return
-    end
-    return dest
+function Base.copyto!(A::AbstractGPUArray, B::Transpose{T, <: AbstractGPUArray}) where T
+    transpose!(A, B.parent)
 end
 
 function Base.copyto!(A::AbstractArray, B::Adjoint{<:Any, <:AbstractGPUArray})
@@ -29,17 +28,17 @@ end
 function Base.copyto!(A::AbstractArray, B::Transpose{<:Any, <:AbstractGPUArray})
     copyto!(A, Transpose(Array(parent(B))))
 end
+
+
+## triangular
+
 function Base.copyto!(A::AbstractArray, B::UpperTriangular{<:Any, <:AbstractGPUArray})
     copyto!(A, UpperTriangular(Array(parent(B))))
 end
 function Base.copyto!(A::AbstractArray, B::LowerTriangular{<:Any, <:AbstractGPUArray})
     copyto!(A, LowerTriangular(Array(parent(B))))
 end
 
-function Base.copyto!(A::AbstractGPUArray, B::Adjoint{T, <: AbstractGPUArray}) where T
-    transpose!(A, B.parent)
-end
-
 function LinearAlgebra.tril!(A::AbstractGPUMatrix{T}, d::Integer = 0) where T
   gpu_call(A, d) do ctx, _A, _d
     I = @cartesianidx _A
@@ -64,17 +63,8 @@ function LinearAlgebra.triu!(A::AbstractGPUMatrix{T}, d::Integer = 0) where T
   return A
 end
 
-function LinearAlgebra.copy_transpose!(dst::AbstractGPUArray, src::AbstractGPUArray)
-  gpu_call(st, src) do ctx, dst, src
-    I = @cartesianidx dst
-    dst[I...] = src[reverse(I)...]
-    return
-  end
-  return dst
-end
 
-
-# matrix multiplication
+## matrix multiplication
 
 function generic_matmatmul!(C::AbstractVecOrMat{R}, A::AbstractVecOrMat{T}, B::AbstractVecOrMat{S}) where {T,S,R}
     if size(A,2) != size(B,1)
@@ -137,3 +127,20 @@ function generic_lmul!(s::Number, X::AbstractGPUArray)
 end
 
 LinearAlgebra.lmul!(a::Number, B::AbstractGPUArray) = generic_lmul!(a, B)
+
+
+## permutedims
+
+function genperm(I::CartesianIndex{N}, perm::NTuple{N}) where N
+    CartesianIndex(ntuple(d-> (@inbounds return I[perm[d]]), Val(N)))
+end
+
+function LinearAlgebra.permutedims!(dest::AbstractGPUArray, src::AbstractGPUArray, perm) where N
+    perm isa Tuple || (perm = Tuple(perm))
+    gpu_call(dest, src, perm) do ctx, dest, src, perm
+        I = @cartesianidx src ctx
+        @inbounds dest[genperm(I, perm)] = src[I]
+        return
+    end
+    return dest
+end
diff --git a/src/host/mapreduce.jl b/src/host/mapreduce.jl
@@ -178,14 +178,3 @@ function acc_mapreduce(f, op, v0::OT, A::GPUSrcArray, rest...) where {OT}
              target=out, threads=threads, blocks=blocks)
     reduce(op, Array(out))
 end
-
-"""
-Same as Base.isapprox, but without keyword args and without nans
-"""
-function fast_isapprox(x::Number, y::Number, rtol::Real = Base.rtoldefault(x, y), atol::Real=0)
-    x == y || (isfinite(x) && isfinite(y) && abs(x-y) <= max(atol, rtol*max(abs(x), abs(y))))
-end
-
-Base.isapprox(A::AbstractGPUArray{T1}, B::AbstractGPUArray{T2}, rtol::Real = Base.rtoldefault(T1, T2, 0), atol::Real=0) where {T1, T2} = all(fast_isapprox.(A, B, T1(rtol)|>real, T1(atol)|>real))
-Base.isapprox(A::AbstractArray{T1}, B::AbstractGPUArray{T2}, rtol::Real = Base.rtoldefault(T1, T2, 0), atol::Real=0) where {T1, T2} = all(fast_isapprox.(A, Array(B), T1(rtol)|>real, T1(atol)|>real))
-Base.isapprox(A::AbstractGPUArray{T1}, B::AbstractArray{T2}, rtol::Real = Base.rtoldefault(T1, T2, 0), atol::Real=0) where {T1, T2} = all(fast_isapprox.(Array(A), B, T1(rtol)|>real, T1(atol)|>real))
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -4,5 +4,6 @@ include("testsuite.jl")
 
 @testset "JLArray" begin
     using GPUArrays.JLArrays
+    JLArrays.allowscalar(false)
     TestSuite.test(JLArray)
 end
diff --git a/test/testsuite/base.jl b/test/testsuite/base.jl
@@ -148,5 +148,12 @@ function test_base(AT)
             @test blocks == 1
             @test threads == 1
         end
+
+        @testset "permutedims" begin
+            @test compare(x->permutedims(x, [1, 2]), AT, rand(4, 4))
+
+            inds = rand(1:100, 150, 150)
+            @test compare(x->permutedims(view(x, inds, :), (3, 2, 1)), AT, rand(100, 100))
+        end
     end
 end
diff --git a/test/testsuite/linalg.jl b/test/testsuite/linalg.jl
@@ -1,8 +1,12 @@
 function test_linalg(AT)
     @testset "linear algebra" begin
-        @testset "transpose" begin
+        @testset "adjoint and transpose" begin
             @test compare(adjoint, AT, rand(Float32, 32, 32))
+            @test compare(adjoint!, AT, rand(Float32, 32, 32), rand(Float32, 32, 32))
             @test compare(transpose, AT, rand(Float32, 32, 32))
+            @test compare(transpose!, AT, rand(Float32, 32, 32), rand(Float32, 32, 32))
+            @test compare((x,y)->copyto!(x, adjoint(y)), AT, rand(Float32, 32, 32), rand(Float32, 32, 32))
+            @test compare((x,y)->copyto!(x, transpose(y)), AT, rand(Float32, 32, 32), rand(Float32, 32, 32))
             @test compare(transpose!, AT, Array{Float32}(undef, 32, 32), rand(Float32, 32, 32))
             @test compare(transpose!, AT, Array{Float32}(undef, 128, 32), rand(Float32, 32, 128))
         end
@@ -19,6 +23,7 @@ function test_linalg(AT)
             copyto!(ga, LowerTriangular(gb))
             @test ga == Array(collect(LowerTriangular(gb)))
         end
+
         @testset "permutedims" begin
             @test compare(x -> permutedims(x, (2, 1)), AT, rand(Float32, 2, 3))
             @test compare(x -> permutedims(x, (2, 1, 3)), AT, rand(Float32, 4, 5, 6))
diff --git a/test/testsuite/mapreduce.jl b/test/testsuite/mapreduce.jl
@@ -58,23 +58,5 @@ function test_mapreduce(AT)
                 @test A !== deepcopy(A)
             end
         end
-
-        @testset "isapprox" begin
-            for ET in supported_eltypes()
-                ET <: Complex && continue
-                A = fill(AT{ET}, ET(0), (100,))
-                B = ones(AT{ET}, 100)
-                @test !(A ≈ B)
-                @test !(A ≈ Array(B))
-                @test !(Array(A) ≈ B)
-
-
-                ca = AT(randn(ComplexF64,3,3))
-                cb = copy(ca)
-                cb[1:1, 1:1] .+= 1e-7im
-                @test isapprox(ca, cb, atol=1e-5)
-                @test !isapprox(ca, cb, atol=1e-9)
-            end
-        end
     end
 end