diff --git a/Project.toml b/Project.toml index 8ca217db8..dd10c3c8d 100644 --- a/Project.toml +++ b/Project.toml @@ -19,6 +19,7 @@ VectorInterface = "409d34a3-91d5-4945-b6ec-7529ddf182d8" [weakdeps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" +AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" @@ -26,12 +27,14 @@ cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1" [extensions] TensorKitAdaptExt = "Adapt" +TensorKitAMDGPUExt = "AMDGPU" TensorKitCUDAExt = ["CUDA", "cuTENSOR"] TensorKitChainRulesCoreExt = "ChainRulesCore" TensorKitFiniteDifferencesExt = "FiniteDifferences" [compat] Adapt = "4" +AMDGPU = "2" Aqua = "0.6, 0.7, 0.8" ArgParse = "1.2.0" CUDA = "5.9" @@ -61,6 +64,7 @@ julia = "1.10" [extras] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" +AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" @@ -78,4 +82,4 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1" [targets] -test = ["ArgParse", "Adapt", "Aqua", "Combinatorics", "CUDA", "cuTENSOR", "GPUArrays", "LinearAlgebra", "SafeTestsets", "TensorOperations", "Test", "TestExtras", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote"] +test = ["ArgParse", "Adapt", "Aqua", "Combinatorics", "AMDGPU", "CUDA", "cuTENSOR", "GPUArrays", "LinearAlgebra", "SafeTestsets", "TensorOperations", "Test", "TestExtras", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote"] diff --git a/ext/TensorKitAMDGPUExt/TensorKitAMDGPUExt.jl b/ext/TensorKitAMDGPUExt/TensorKitAMDGPUExt.jl new file mode 100644 index 000000000..9fa2dc2f8 --- /dev/null +++ b/ext/TensorKitAMDGPUExt/TensorKitAMDGPUExt.jl @@ -0,0 +1,20 @@ +module TensorKitAMDGPUExt + +using AMDGPU, AMDGPU.rocBLAS, AMDGPU.rocSOLVER, LinearAlgebra +using AMDGPU: @allowscalar +import AMDGPU: rand as rocrand, rand! as rocrand!, randn as rocrandn, randn! as rocrandn! + +using TensorKit +using TensorKit.Factorizations +using TensorKit.Strided +using TensorKit.Factorizations: AbstractAlgorithm +using TensorKit: SectorDict, tensormaptype, scalar, similarstoragetype, AdjointTensorMap, scalartype, project_symmetric_and_check +import TensorKit: randisometry, rand, randn + +using TensorKit: MatrixAlgebraKit + +using Random + +include("roctensormap.jl") + +end diff --git a/ext/TensorKitAMDGPUExt/roctensormap.jl b/ext/TensorKitAMDGPUExt/roctensormap.jl new file mode 100644 index 000000000..a9a937912 --- /dev/null +++ b/ext/TensorKitAMDGPUExt/roctensormap.jl @@ -0,0 +1,166 @@ +const ROCTensorMap{T, S, N₁, N₂} = TensorMap{T, S, N₁, N₂, ROCVector{T, AMDGPU.Mem.HIPBuffer}} +const ROCTensor{T, S, N} = ROCTensorMap{T, S, N, 0} + +const AdjointROCTensorMap{T, S, N₁, N₂} = AdjointTensorMap{T, S, N₁, N₂, ROCTensorMap{T, S, N₁, N₂}} + +function ROCTensorMap(t::TensorMap{T, S, N₁, N₂, A}) where {T, S, N₁, N₂, A} + return ROCTensorMap{T, S, N₁, N₂}(ROCArray{T}(t.data), space(t)) +end + +# project_symmetric! doesn't yet work for GPU types, so do this on the host, then copy +function TensorKit.project_symmetric_and_check(::Type{T}, ::Type{A}, data::AbstractArray, V::TensorMapSpace; tol = sqrt(eps(real(float(eltype(data)))))) where {T, A <: ROCVector{T}} + h_t = TensorKit.TensorMapWithStorage{T, Vector{T}}(undef, V) + h_t = TensorKit.project_symmetric!(h_t, Array(data)) + # verify result + isapprox(Array(reshape(data, dims(h_t))), convert(Array, h_t); atol = tol) || + throw(ArgumentError("Data has non-zero elements at incompatible positions")) + return TensorKit.TensorMapWithStorage{T, A}(A(h_t.data), V) +end + +for (fname, felt) in ((:zeros, :zero), (:ones, :one)) + @eval begin + function AMDGPU.$fname( + codomain::TensorSpace{S}, + domain::TensorSpace{S} = one(codomain) + ) where {S <: IndexSpace} + return AMDGPU.$fname(codomain ← domain) + end + function AMDGPU.$fname( + ::Type{T}, codomain::TensorSpace{S}, + domain::TensorSpace{S} = one(codomain) + ) where {T, S <: IndexSpace} + return AMDGPU.$fname(T, codomain ← domain) + end + AMDGPU.$fname(V::TensorMapSpace) = AMDGPU.$fname(Float64, V) + function AMDGPU.$fname(::Type{T}, V::TensorMapSpace) where {T} + t = ROCTensorMap{T}(undef, V) + fill!(t, $felt(T)) + return t + end + end +end + +for randfun in (:rocrand, :rocrandn) + randfun! = Symbol(randfun, :!) + @eval begin + # converting `codomain` and `domain` into `HomSpace` + function $randfun( + codomain::TensorSpace{S}, + domain::TensorSpace{S} = one(codomain), + ) where {S <: IndexSpace} + return $randfun(codomain ← domain) + end + function $randfun( + ::Type{T}, codomain::TensorSpace{S}, + domain::TensorSpace{S} = one(codomain), + ) where {T, S <: IndexSpace} + return $randfun(T, codomain ← domain) + end + function $randfun( + rng::Random.AbstractRNG, ::Type{T}, + codomain::TensorSpace{S}, + domain::TensorSpace{S} = one(codomain), + ) where {T, S <: IndexSpace} + return $randfun(rng, T, codomain ← domain) + end + + # filling in default eltype + $randfun(V::TensorMapSpace) = $randfun(Float64, V) + function $randfun(rng::Random.AbstractRNG, V::TensorMapSpace) + return $randfun(rng, Float64, V) + end + + # filling in default rng + function $randfun(::Type{T}, V::TensorMapSpace) where {T} + return $randfun(Random.default_rng(), T, V) + end + + # implementation + function $randfun( + rng::Random.AbstractRNG, ::Type{T}, + V::TensorMapSpace + ) where {T} + t = ROCTensorMap{T}(undef, V) + $randfun!(rng, t) + return t + end + + function $randfun!(rng::Random.AbstractRNG, t::ROCTensorMap) + for (_, b) in blocks(t) + $randfun!(rng, b) + end + return t + end + end +end + +# Scalar implementation +#----------------------- +function TensorKit.scalar(t::ROCTensorMap{T, S, 0, 0}) where {T, S} + inds = findall(!iszero, t.data) + return isempty(inds) ? zero(scalartype(t)) : @allowscalar @inbounds t.data[only(inds)] +end + +function Base.convert( + TT::Type{ROCTensorMap{T, S, N₁, N₂}}, + t::AbstractTensorMap{<:Any, S, N₁, N₂} + ) where {T, S, N₁, N₂} + if typeof(t) === TT + return t + else + tnew = TT(undef, space(t)) + return copy!(tnew, t) + end +end + +function LinearAlgebra.isposdef(t::ROCTensorMap) + domain(t) == codomain(t) || + throw(SpaceMismatch("`isposdef` requires domain and codomain to be the same")) + InnerProductStyle(spacetype(t)) === EuclideanInnerProduct() || return false + for (c, b) in blocks(t) + # do our own hermitian check + isherm = MatrixAlgebraKit.ishermitian(b; atol = eps(real(eltype(b))), rtol = eps(real(eltype(b)))) + isherm || return false + isposdef(Hermitian(b)) || return false + end + return true +end + +function Base.promote_rule( + ::Type{<:TT₁}, + ::Type{<:TT₂} + ) where { + S, N₁, N₂, TTT₁, TTT₂, + TT₁ <: ROCTensorMap{TTT₁, S, N₁, N₂}, + TT₂ <: ROCTensorMap{TTT₂, S, N₁, N₂}, + } + T = TensorKit.VectorInterface.promote_add(TTT₁, TTT₂) + return ROCTensorMap{T, S, N₁, N₂} +end + +# ROCTensorMap exponentation: +function TensorKit.exp!(t::ROCTensorMap) + domain(t) == codomain(t) || + error("Exponential of a tensor only exist when domain == codomain.") + !MatrixAlgebraKit.ishermitian(t) && throw(ArgumentError("`exp!` is currently only supported on hermitian AMDGPU tensors")) + for (c, b) in blocks(t) + copy!(b, parent(Base.exp(Hermitian(b)))) + end + return t +end + +# functions that don't map ℝ to (a subset of) ℝ +for f in (:sqrt, :log, :asin, :acos, :acosh, :atanh, :acoth) + sf = string(f) + @eval function Base.$f(t::ROCTensorMap) + domain(t) == codomain(t) || + throw(SpaceMismatch("`$($sf)` of a tensor only exists when domain == codomain")) + !MatrixAlgebraKit.ishermitian(t) && throw(ArgumentError("`$($sf)` is currently only supported on hermitian AMDGPU tensors")) + T = complex(float(scalartype(t))) + tf = similar(t, T) + for (c, b) in blocks(t) + copy!(block(tf, c), parent($f(Hermitian(b)))) + end + return tf + end +end diff --git a/test/amd/tensors.jl b/test/amd/tensors.jl new file mode 100644 index 000000000..459212fa5 --- /dev/null +++ b/test/amd/tensors.jl @@ -0,0 +1,606 @@ +using Adapt, AMDGPU +using Test, TestExtras +using TensorKit, Combinatorics +ad = adapt(Array) +const AMDGPUExt = Base.get_extension(TensorKit, :TensorKitAMDGPUExt) +@assert !isnothing(AMDGPUExt) +const ROCTensorMap = getglobal(AMDGPUExt, :ROCTensorMap) +const rocrand = getglobal(AMDGPUExt, :rocrand) +const rocrandn = getglobal(AMDGPUExt, :rocrandn) +const rocrand! = getglobal(AMDGPUExt, :rocrand!) +using AMDGPU: rand as rocrand, rand! as rocrand!, randn as rocrandn, randn! as rocrandn! + +@isdefined(TestSetup) || include("../setup.jl") +using .TestSetup + +for V in (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VfSU₂) #, VSU₃) + V1, V2, V3, V4, V5 = V + @assert V3 * V4 * V2 ≿ V1' * V5' # necessary for leftorth tests + @assert V3 * V4 ≾ V1' * V2' * V5' # necessary for rightorth tests +end + +spacelist = try + if ENV["CI"] == "true" + println("Detected running on CI") + if Sys.iswindows() + (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂) + elseif Sys.isapple() + (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VfU₁, VfSU₂) #, VSU₃) + else + (Vtr, Vℤ₂, Vfℤ₂, VU₁, VCU₁, VSU₂, VfSU₂) #, VSU₃) + end + else + (Vtr, VU₁, VSU₂, Vfℤ₂) + end +catch + (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VfSU₂) #, VSU₃) +end + +for V in spacelist + I = sectortype(first(V)) + Istr = TensorKit.type_repr(I) + println("---------------------------------------") + println("AMDGPU Tensors with symmetry: $Istr") + println("---------------------------------------") + @timedtestset "Tensors with symmetry: $Istr" verbose = true begin + V1, V2, V3, V4, V5 = V + @timedtestset "Basic tensor properties" begin + W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 + # test default pass-throughs + for f in (AMDGPU.zeros, AMDGPU.ones, rocrand, rocrandn) + t = @constinferred f(W) + @test scalartype(t) == Float64 + @test codomain(t) == W + @test space(t) == (W ← one(W)) + @test domain(t) == one(W) + @test typeof(t) == TensorMap{Float64, spacetype(t), 5, 0, ROCVector{Float64, AMDGPU.Mem.HIPBuffer}} + end + for f in (rand, randn) + t = @constinferred f(ROCVector{Float64, AMDGPU.Mem.HIPBuffer}, W) + @test scalartype(t) == Float64 + @test codomain(t) == W + @test space(t) == (W ← one(W)) + @test domain(t) == one(W) + @test typeof(t) == TensorMap{Float64, spacetype(t), 5, 0, ROCVector{Float64, AMDGPU.Mem.HIPBuffer}} + end + for f! in (rocrand!, rocrandn!) + t = @constinferred AMDGPU.zeros(W) + f!(t) + @test scalartype(t) == Float64 + @test codomain(t) == W + @test space(t) == (W ← one(W)) + @test domain(t) == one(W) + @test typeof(t) == TensorMap{Float64, spacetype(t), 5, 0, ROCVector{Float64, AMDGPU.Mem.HIPBuffer}} + end + for T in (Int, Float32, Float64, ComplexF32, ComplexF64) + t = @constinferred AMDGPU.zeros(T, W) + AMDGPU.@allowscalar begin + @test @constinferred(hash(t)) == hash(deepcopy(t)) + end + @test scalartype(t) == T + @test norm(t) == 0 + @test codomain(t) == W + @test space(t) == (W ← one(W)) + @test domain(t) == one(W) + @test typeof(t) == TensorMap{T, spacetype(t), 5, 0, ROCVector{T, AMDGPU.Mem.HIPBuffer}} + # blocks + bs = @constinferred blocks(t) + (c, b1), state = @constinferred Nothing iterate(bs) + @test c == first(blocksectors(W)) + next = @constinferred Nothing iterate(bs, state) + b2 = @constinferred block(t, first(blocksectors(t))) + @test b1 == b2 + @test_broken eltype(bs) === Pair{typeof(c), typeof(b1)} + @test_broken typeof(b1) === TensorKit.blocktype(t) + @test typeof(c) === sectortype(t) + end + end + @timedtestset "Conversion to/from host" begin + W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 + for T in (Int, Float32, ComplexF64) + h_t = @constinferred rand(T, W) + t1 = convert(ROCTensorMap{T}, h_t) + @test collect(t1.data) == h_t.data + @test space(t1) == space(h_t) + @test scalartype(t1) == T + @test codomain(t1) == W + @test space(t1) == (W ← one(W)) + @test domain(t1) == one(W) + t2 = ROCTensorMap(h_t) + @test collect(t2.data) == h_t.data + @test space(t2) == space(h_t) + @test scalartype(t2) == T + @test codomain(t2) == W + @test space(t2) == (W ← one(W)) + @test domain(t2) == one(W) + end + end + @timedtestset "Tensor Dict conversion" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + for T in (Int, Float32, ComplexF64) + t = @constinferred AMDGPU.rand(T, W) + d = convert(Dict, t) + @test TensorKit.to_cpu(t) == convert(TensorMap, d) + end + end + @timedtestset "Basic linear algebra" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + for T in (Float32, ComplexF64) + t = @constinferred AMDGPU.rand(T, W) + @test scalartype(t) == T + @test space(t) == W + @test space(t') == W' + @test dim(t) == dim(space(t)) + @test codomain(t) == codomain(W) + @test domain(t) == domain(W) + # blocks for adjoint + bs = @constinferred blocks(t') + (c, b1), state = @constinferred Nothing iterate(bs) + @test c == first(blocksectors(W')) + next = @constinferred Nothing iterate(bs, state) + b2 = @constinferred block(t', first(blocksectors(t'))) + @test b1 == b2 + @test_broken eltype(bs) === Pair{typeof(c), typeof(b1)} + @test_broken typeof(b1) === TensorKit.blocktype(t') + @test typeof(c) === sectortype(t) + # linear algebra + @test isa(@constinferred(norm(t)), real(T)) + @test norm(t)^2 ≈ dot(t, t) + α = rand(T) + @test norm(α * t) ≈ abs(α) * norm(t) + @test norm(t + t, 2) ≈ 2 * norm(t, 2) + @test norm(t + t, 1) ≈ 2 * norm(t, 1) + @test norm(t + t, Inf) ≈ 2 * norm(t, Inf) + p = 3 * rand(Float64) + @test norm(t + t, p) ≈ 2 * norm(t, p) + @test norm(t) ≈ norm(t') + + t2 = @constinferred rand!(similar(t)) + β = rand(T) + #@test @constinferred(dot(β * t2, α * t)) ≈ conj(β) * α * conj(dot(t, t2)) # broken for Irrep[CU₁] + @test dot(β * t2, α * t) ≈ conj(β) * α * conj(dot(t, t2)) + @test dot(t2, t) ≈ conj(dot(t, t2)) + @test dot(t2, t) ≈ conj(dot(t2', t')) + @test dot(t2, t) ≈ dot(t', t2') + + i1 = @constinferred(isomorphism(ROCVector{T, AMDGPU.Mem.HIPBuffer}, V1 ⊗ V2, V2 ⊗ V1)) + i2 = @constinferred(isomorphism(ROCVector{T, AMDGPU.Mem.HIPBuffer}, V2 ⊗ V1, V1 ⊗ V2)) + @test i1 * i2 == @constinferred(id(ROCVector{T, AMDGPU.Mem.HIPBuffer}, V1 ⊗ V2)) + @test i2 * i1 == @constinferred(id(ROCVector{T, AMDGPU.Mem.HIPBuffer}, V2 ⊗ V1)) + w = @constinferred(isometry(ROCVector{T, AMDGPU.Mem.HIPBuffer}, V1 ⊗ (oneunit(V1) ⊕ oneunit(V1)), V1)) + @test dim(w) == 2 * dim(V1 ← V1) + @test w' * w == id(ROCVector{T, AMDGPU.Mem.HIPBuffer}, V1) + @test w * w' == (w * w')^2 + end + end + @timedtestset "Trivial space insertion and removal" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + for T in (Float32, ComplexF64) + t = @constinferred AMDGPU.rand(T, W) + t2 = @constinferred insertleftunit(t) + @test t2 == @constinferred insertrightunit(t) + @test numind(t2) == numind(t) + 1 + @test space(t2) == insertleftunit(space(t)) + @test scalartype(t2) === T + @test t.data === t2.data + @test @constinferred(removeunit(t2, $(numind(t2)))) == t + t3 = @constinferred insertleftunit(t; copy = true) + @test t3 == @constinferred insertrightunit(t; copy = true) + @test t.data !== t3.data + for (c, b) in blocks(t) + @test b == block(t3, c) + end + @test @constinferred(removeunit(t3, $(numind(t3)))) == t + t4 = @constinferred insertrightunit(t, 3; dual = true) + @test numin(t4) == numin(t) && numout(t4) == numout(t) + 1 + for (c, b) in blocks(t) + @test b == block(t4, c) + end + @test @constinferred(removeunit(t4, 4)) == t + t5 = @constinferred insertleftunit(t, 4; dual = true) + @test numin(t5) == numin(t) + 1 && numout(t5) == numout(t) + for (c, b) in blocks(t) + @test b == block(t5, c) + end + @test @constinferred(removeunit(t5, 4)) == t + end + end + if hasfusiontensor(I) + @timedtestset "Basic linear algebra: test via CPU" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + for T in (Float32, ComplexF64) + t = AMDGPU.rand(T, W) + t2 = @constinferred AMDGPU.rand!(similar(t)) + α = rand(T) + @test norm(t, 2) ≈ norm(TensorKit.to_cpu(t), 2) + @test dot(t2, t) ≈ dot(TensorKit.to_cpu(t2), TensorKit.to_cpu(t)) + @test TensorKit.to_cpu(α * t) ≈ α * TensorKit.to_cpu(t) + @test TensorKit.to_cpu(t + t) ≈ 2 * TensorKit.to_cpu(t) + end + end + @timedtestset "Real and imaginary parts" begin + W = V1 ⊗ V2 + for T in (Float64, ComplexF64, ComplexF32) + t = @constinferred AMDGPU.randn(T, W, W) + + tr = @constinferred real(t) + @test scalartype(tr) <: Real + @test real(TensorKit.to_cpu(t)) == TensorKit.to_cpu(tr) + @test storagetype(tr) == ROCVector{real(T), AMDGPU.Mem.HIPBuffer} + + ti = @constinferred imag(t) + @test scalartype(ti) <: Real + @test imag(TensorKit.to_cpu(t)) == TensorKit.to_cpu(ti) + @test storagetype(ti) == ROCVector{real(T), AMDGPU.Mem.HIPBuffer} + + tc = @inferred complex(t) + @test scalartype(tc) <: Complex + @test complex(TensorKit.to_cpu(t)) == TensorKit.to_cpu(tc) + @test storagetype(tc) == ROCVector{complex(T), AMDGPU.Mem.HIPBuffer} + + tc2 = @inferred complex(tr, ti) + @test tc2 ≈ tc + @test storagetype(tc2) == ROCVector{complex(T), AMDGPU.Mem.HIPBuffer} + end + end + end + @timedtestset "Tensor conversion" begin # TODO adjoint conversion methods don't work yet + W = V1 ⊗ V2 + t = @constinferred AMDGPU.randn(W ← W) + #@test typeof(convert(TensorMap, t')) == typeof(t) # TODO Adjoint not supported yet + tc = complex(t) + @test convert(typeof(tc), t) == tc + @test typeof(convert(typeof(tc), t)) == typeof(tc) + # @test typeof(convert(typeof(tc), t')) == typeof(tc) # TODO Adjoint not supported yet + @test Base.promote_typeof(t, tc) == typeof(tc) + @test Base.promote_typeof(tc, t) == typeof(tc + t) + end + #=@timedtestset "diag/diagm" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + t = AMDGPU.randn(ComplexF64, W) + d = LinearAlgebra.diag(t) + # TODO find a way to use AMDGPU here + D = LinearAlgebra.diagm(codomain(t), domain(t), d) + @test LinearAlgebra.isdiag(D) + @test LinearAlgebra.diag(D) == d + end=# + @timedtestset "Permutations: test via inner product invariance" begin + W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 + t = AMDGPU.rand(ComplexF64, W) + t′ = AMDGPU.randn!(similar(t)) + for k in 0:5 + for p in permutations(1:5) + p1 = ntuple(n -> p[n], k) + p2 = ntuple(n -> p[k + n], 5 - k) + AMDGPU.@allowscalar begin + t2 = @constinferred permute(t, (p1, p2)) + t2 = permute(t, (p1, p2)) + @test norm(t2) ≈ norm(t) + t2′ = permute(t′, (p1, p2)) + @test dot(t2′, t2) ≈ dot(t′, t) ≈ dot(transpose(t2′), transpose(t2)) + end + end + + AMDGPU.@allowscalar begin + t3 = @constinferred repartition(t, $k) + t3 = repartition(t, k) + @test norm(t3) ≈ norm(t) + t3′ = @constinferred repartition!(similar(t3), t′) + @test norm(t3′) ≈ norm(t′) + @test dot(t′, t) ≈ dot(t3′, t3) + end + end + end + if BraidingStyle(I) isa SymmetricBraiding + @timedtestset "Permutations: test via CPU" begin + W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 + t = AMDGPU.rand(ComplexF64, W) + for k in 0:5 + for p in permutations(1:5) + p1 = ntuple(n -> p[n], k) + p2 = ntuple(n -> p[k + n], 5 - k) + dt2 = AMDGPU.@allowscalar permute(t, (p1, p2)) + ht2 = permute(TensorKit.to_cpu(t), (p1, p2)) + @test ht2 == TensorKit.to_cpu(dt2) + end + + dt3 = AMDGPU.@allowscalar repartition(t, k) + ht3 = repartition(TensorKit.to_cpu(t), k) + @test ht3 == TensorKit.to_cpu(dt3) + end + end + end + @timedtestset "Full trace: test self-consistency" begin + t = AMDGPU.rand(ComplexF64, V1 ⊗ V2' ⊗ V2 ⊗ V1') + AMDGPU.@allowscalar begin + t2 = permute(t, ((1, 2), (4, 3))) + s = @constinferred tr(t2) + @test conj(s) ≈ tr(t2') + if !isdual(V1) + t2 = twist!(t2, 1) + end + if isdual(V2) + t2 = twist!(t2, 2) + end + ss = tr(t2) + @tensor s2 = t[a, b, b, a] + @tensor t3[a, b] := t[a, c, c, b] + @tensor s3 = t3[a, a] + end + @test ss ≈ s2 + @test ss ≈ s3 + end + #=@timedtestset "Partial trace: test self-consistency" begin + t = AMDGPU.rand(ComplexF64, V1 ⊗ V2' ⊗ V3 ⊗ V2 ⊗ V1' ⊗ V3') + @tensor t2[a, b] := t[c, d, b, d, c, a] + @tensor t4[a, b, c, d] := t[d, e, b, e, c, a] + @tensor t5[a, b] := t4[a, b, c, c] + @test t2 ≈ t5 + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Trace: test via conversion" begin + t = AMDGPU.rand(ComplexF64, V1 ⊗ V2' ⊗ V3 ⊗ V2 ⊗ V1' ⊗ V3') + AMDGPU.@allowscalar begin + @tensor t2[a, b] := t[c, d, b, d, c, a] + @tensor t3[a, b] := ad(t)[c, d, b, d, c, a] + end + @test t3 ≈ ad(t2) + end + end + @timedtestset "Trace and contraction" begin + t1 = AMDGPU.rand(ComplexF64, V1 ⊗ V2 ⊗ V3) + t2 = AMDGPU.rand(ComplexF64, V2' ⊗ V4 ⊗ V1') + AMDGPU.@allowscalar begin + t3 = t1 ⊗ t2 + @tensor ta[a, b] := t1[x, y, a] * t2[y, b, x] + @tensor tb[a, b] := t3[x, y, a, y, b, x] + end + @test ta ≈ tb + end + #=if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Tensor contraction: test via CPU" begin + dA1 = AMDGPU.randn(ComplexF64, V1' * V2', V3') + dA2 = AMDGPU.randn(ComplexF64, V3 * V4, V5) + drhoL = AMDGPU.randn(ComplexF64, V1, V1) + drhoR = AMDGPU.randn(ComplexF64, V5, V5)' # test adjoint tensor + dH = AMDGPU.randn(ComplexF64, V2 * V4, V2 * V4) + @tensor dHrA12[a, s1, s2, c] := drhoL[a, a'] * conj(dA1[a', t1, b]) * + dA2[b, t2, c'] * drhoR[c', c] * + dH[s1, s2, t1, t2] + @tensor hHrA12[a, s1, s2, c] := TensorKit.to_cpu(drhoL)[a, a'] * conj(TensorKit.to_cpu(dA1)[a', t1, b]) * + TensorKit.to_cpu(dA2)[b, t2, c'] * TensorKit.to_cpu(drhoR)[c', c] * + TensorKit.to_cpu(dH)[s1, s2, t1, t2] + @test TensorKit.to_cpu(dHrA12) ≈ hHrA12 + end + end=# # doesn't yet work because of AdjointTensor + @timedtestset "Index flipping: test flipping inverse" begin + t = AMDGPU.rand(ComplexF64, V1 ⊗ V1' ← V1' ⊗ V1) + for i in 1:4 + AMDGPU.@allowscalar begin + @test t ≈ flip(flip(t, i), i; inv = true) + @test t ≈ flip(flip(t, i; inv = true), i) + end + end + end + #=@timedtestset "Index flipping: test via explicit flip" begin + t = AMDGPU.rand(ComplexF64, V1 ⊗ V1' ← V1' ⊗ V1) + F1 = unitary(flip(V1), V1) + + AMDGPU.@allowscalar begin + @tensor tf[a, b; c, d] := F1[a, a'] * t[a', b; c, d] + @test flip(t, 1) ≈ tf + @tensor tf[a, b; c, d] := conj(F1[b, b']) * t[a, b'; c, d] + @test twist!(flip(t, 2), 2) ≈ tf + @tensor tf[a, b; c, d] := F1[c, c'] * t[a, b; c', d] + @test flip(t, 3) ≈ tf + @tensor tf[a, b; c, d] := conj(F1[d, d']) * t[a, b; c, d'] + @test twist!(flip(t, 4), 4) ≈ tf + end + end + @timedtestset "Index flipping: test via contraction" begin + t1 = AMDGPU.rand(ComplexF64, V1 ⊗ V2 ⊗ V3 ← V4) + t2 = AMDGPU.rand(ComplexF64, V2' ⊗ V5 ← V4' ⊗ V1) + AMDGPU.@allowscalar begin + @tensor ta[a, b] := t1[x, y, a, z] * t2[y, b, z, x] + @tensor tb[a, b] := flip(t1, 1)[x, y, a, z] * flip(t2, 4)[y, b, z, x] + @test ta ≈ tb + @tensor tb[a, b] := flip(t1, (2, 4))[x, y, a, z] * flip(t2, (1, 3))[y, b, z, x] + @test ta ≈ tb + @tensor tb[a, b] := flip(t1, (1, 2, 4))[x, y, a, z] * flip(t2, (1, 3, 4))[y, b, z, x] + @tensor tb[a, b] := flip(t1, (1, 3))[x, y, a, z] * flip(t2, (2, 4))[y, b, z, x] + @test flip(ta, (1, 2)) ≈ tb + end + end=# # TODO =# # None of this works due to lack of HIPTensor support + @timedtestset "Multiplication of isometries: test properties" begin + W2 = V4 ⊗ V5 + W1 = W2 ⊗ (oneunit(V1) ⊕ oneunit(V1)) + for T in (Float64, ComplexF64) + t1 = randisometry(ROCMatrix{T}, W1, W2) + t2 = randisometry(ROCMatrix{T}, W2 ← W2) + @test isisometric(t1) + @test isunitary(t2) + P = t1 * t1' + @test P * P ≈ P + end + end + @timedtestset "Multiplication and inverse: test compatibility" begin + W1 = V1 ⊗ V2 ⊗ V3 + W2 = V4 ⊗ V5 + for T in (Float64, ComplexF64) + t1 = AMDGPU.rand(T, W1, W1) + t2 = AMDGPU.rand(T, W2, W2) + t = AMDGPU.rand(T, W1, W2) + @test t1 * (t1 \ t) ≈ t + @test (t / t2) * t2 ≈ t + @test t1 \ one(t1) ≈ inv(t1) + @test one(t1) / t1 ≈ pinv(t1) + @test_throws SpaceMismatch inv(t) + @test_throws SpaceMismatch t2 \ t + @test_throws SpaceMismatch t / t1 + tp = pinv(t) * t + @test tp ≈ tp * tp + end + end + @timedtestset "Multiplication and inverse: test via CPU" begin + W1 = V1 ⊗ V2 ⊗ V3 + W2 = V4 ⊗ V5 + for T in (Float32, Float64, ComplexF32, ComplexF64) + t1 = AMDGPU.rand(T, W1, W1) + t2 = AMDGPU.rand(T, W2, W2) + t = AMDGPU.rand(T, W1, W2) + ht1 = TensorKit.to_cpu(t1) + ht2 = TensorKit.to_cpu(t2) + ht = TensorKit.to_cpu(t) + @test TensorKit.to_cpu(t1 * t) ≈ ht1 * ht + @test TensorKit.to_cpu(t1' * t) ≈ ht1' * ht + @test TensorKit.to_cpu(t2 * t') ≈ ht2 * ht' + @test TensorKit.to_cpu(t2' * t') ≈ ht2' * ht' + + @test TensorKit.to_cpu(inv(t1)) ≈ inv(ht1) + @test TensorKit.to_cpu(pinv(t)) ≈ pinv(ht) + + if T == Float32 || T == ComplexF32 + continue + end + + @test TensorKit.to_cpu(t1 \ t) ≈ ht1 \ ht + @test TensorKit.to_cpu(t1' \ t) ≈ ht1' \ ht + @test TensorKit.to_cpu(t2 \ t') ≈ ht2 \ ht' + @test TensorKit.to_cpu(t2' \ t') ≈ ht2' \ ht' + + @test TensorKit.to_cpu(t2 / t) ≈ ht2 / ht + @test TensorKit.to_cpu(t2' / t) ≈ ht2' / ht + @test TensorKit.to_cpu(t1 / t') ≈ ht1 / ht' + @test TensorKit.to_cpu(t1' / t') ≈ ht1' / ht' + end + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Tensor functions" begin + W = V1 ⊗ V2 + for T in (Float64, ComplexF64) + t = project_hermitian!(AMDGPU.randn(T, W, W)) + s = dim(W) + #@test (@constinferred sqrt(t))^2 ≈ t + #@test TensorKit.to_cpu(sqrt(t)) ≈ sqrt(TensorKit.to_cpu(t)) + + expt = @constinferred exp(t) + @test TensorKit.to_cpu(expt) ≈ exp(TensorKit.to_cpu(t)) + + # log doesn't work on AMDGPU yet (scalar indexing) + #@test exp(@constinferred log(project_hermitian!(expt))) ≈ expt + #@test TensorKit.to_cpu(log(project_hermitian!(expt))) ≈ log(TensorKit.to_cpu(expt)) + + #=@test (@constinferred cos(t))^2 + (@constinferred sin(t))^2 ≈ + id(storagetype(t), W) + @test (@constinferred tan(t)) ≈ sin(t) / cos(t) + @test (@constinferred cot(t)) ≈ cos(t) / sin(t) + @test (@constinferred cosh(t))^2 - (@constinferred sinh(t))^2 ≈ + id(storagetype(t), W) + @test (@constinferred tanh(t)) ≈ sinh(t) / cosh(t) + @test (@constinferred coth(t)) ≈ cosh(t) / sinh(t)=# # TODO in AMDGPU + + #=t1 = sin(t) + @test sin(@constinferred asin(t1)) ≈ t1 + t2 = cos(t) + @test cos(@constinferred acos(t2)) ≈ t2 + t3 = sinh(t) + @test sinh(@constinferred asinh(t3)) ≈ t3 + t4 = cosh(t) + @test cosh(@constinferred acosh(t4)) ≈ t4 + t5 = tan(t) + @test tan(@constinferred atan(t5)) ≈ t5 + t6 = cot(t) + @test cot(@constinferred acot(t6)) ≈ t6 + t7 = tanh(t) + @test tanh(@constinferred atanh(t7)) ≈ t7 + t8 = coth(t) + @test coth(@constinferred acoth(t8)) ≈ t8=# + # TODO in AMDGPU + end + end + end + # Sylvester not defined for AMDGPU + # @timedtestset "Sylvester equation" begin + # for T in (Float32, ComplexF64) + # tA = AMDGPU.rand(T, V1 ⊗ V3, V1 ⊗ V3) + # tB = AMDGPU.rand(T, V2 ⊗ V4, V2 ⊗ V4) + # tA = 3 // 2 * leftorth(tA; alg=Polar())[1] + # tB = 1 // 5 * leftorth(tB; alg=Polar())[1] + # tC = AMDGPU.rand(T, V1 ⊗ V3, V2 ⊗ V4) + # t = @constinferred sylvester(tA, tB, tC) + # @test codomain(t) == V1 ⊗ V3 + # @test domain(t) == V2 ⊗ V4 + # @test norm(tA * t + t * tB + tC) < + # (norm(tA) + norm(tB) + norm(tC)) * eps(real(T))^(2 / 3) + # if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + # matrix(x) = reshape(convert(Array, x), dim(codomain(x)), dim(domain(x))) + # @test matrix(t) ≈ sylvester(matrix(tA), matrix(tB), matrix(tC)) + # end + # end + # end + # + # TODO + @timedtestset "Tensor product: test via norm preservation" begin + for T in (Float32, ComplexF64) + t1 = AMDGPU.rand(T, V2 ⊗ V3 ⊗ V1, V1 ⊗ V2) + t2 = AMDGPU.rand(T, V2 ⊗ V1 ⊗ V3, V1 ⊗ V1) + AMDGPU.@allowscalar begin + t = @constinferred (t1 ⊗ t2) + end + @test norm(t) ≈ norm(t1) * norm(t2) + end + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Tensor product: test via conversion" begin + for T in (Float32, ComplexF64) + t1 = AMDGPU.rand(T, V2 ⊗ V3 ⊗ V1, V1) + t2 = AMDGPU.rand(T, V2 ⊗ V1 ⊗ V3, V2) + d1 = dim(codomain(t1)) + d2 = dim(codomain(t2)) + d3 = dim(domain(t1)) + d4 = dim(domain(t2)) + AMDGPU.@allowscalar begin + t = @constinferred (t1 ⊗ t2) + At = ad(t) + @test ad(t) ≈ ad(t1) ⊗ ad(t2) + end + end + end + end + #=@timedtestset "Tensor product: test via tensor contraction" begin + for T in (Float32, ComplexF64) + t1 = AMDGPU.rand(T, V2 ⊗ V3 ⊗ V1) + t2 = AMDGPU.rand(T, V2 ⊗ V1 ⊗ V3) + AMDGPU.@allowscalar begin + t = @constinferred (t1 ⊗ t2) + @tensor t′[1, 2, 3, 4, 5, 6] := t1[1, 2, 3] * t2[4, 5, 6] + # @test t ≈ t′ # TODO broken for symmetry: Irrep[ℤ₃] + end + end + end=# # broken due to no HIPTensor + end + TensorKit.empty_globalcaches!() +end + +@timedtestset "Deligne tensor product: test via conversion" begin + Vlists1 = (Vtr,) # VSU₂) + Vlists2 = (Vtr,) # Vℤ₂) + @testset for Vlist1 in Vlists1, Vlist2 in Vlists2 + V1, V2, V3, V4, V5 = Vlist1 + W1, W2, W3, W4, W5 = Vlist2 + for T in (Float32, ComplexF64) + t1 = AMDGPU.rand(T, V1 ⊗ V2, V3' ⊗ V4) + t2 = AMDGPU.rand(T, W2, W1 ⊗ W1') + AMDGPU.@allowscalar begin + t = @constinferred (t1 ⊠ t2) + end + d1 = dim(codomain(t1)) + d2 = dim(codomain(t2)) + d3 = dim(domain(t1)) + d4 = dim(domain(t2)) + AMDGPU.@allowscalar begin + @test ad(t1) ⊠ ad(t2) ≈ ad(t1 ⊠ t2) + end + end + end +end diff --git a/test/runtests.jl b/test/runtests.jl index 3b0bfe8b0..3de200d3d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -51,6 +51,10 @@ istestfile(fn) = endswith(fn, ".jl") && !contains(fn, "setup") CUDA.functional() || continue @time include("cuda/tensors.jl") @time include("cuda/factorizations.jl") + elseif group == "amd" + using AMDGPU + AMDGPU.functional() || continue + @time include("amd/tensors.jl") elseif is_buildkite continue end