diff --git a/Project.toml b/Project.toml index a1847b6dd..132a19ae1 100644 --- a/Project.toml +++ b/Project.toml @@ -18,15 +18,24 @@ TupleTools = "9d95972d-f1c8-5527-a6e0-b4b365fa01f6" VectorInterface = "409d34a3-91d5-4945-b6ec-7529ddf182d8" [weakdeps] +AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" +cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" [extensions] +TensorKitAMDGPUExt = "AMDGPU" +TensorKitCUDAExt = ["CUDA", "cuTENSOR"] TensorKitChainRulesCoreExt = "ChainRulesCore" TensorKitFiniteDifferencesExt = "FiniteDifferences" [compat] +AMDGPU = "2" +Adapt = "4" Aqua = "0.6, 0.7, 0.8" +CUDA = "5" +cuTENSOR = "2" ChainRulesCore = "1" ChainRulesTestUtils = "1" Combinatorics = "1" @@ -49,7 +58,10 @@ Zygote = "0.7" julia = "1.10" [extras] +Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" +cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1" ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a" Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" @@ -61,4 +73,10 @@ TestExtras = "5ed8adda-3752-4e41-b88a-e8b09835ee3a" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [targets] -test = ["Aqua", "Combinatorics", "LinearAlgebra", "TensorOperations", "Test", "TestExtras", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote"] +test = ["Adapt", "Aqua", "Combinatorics", "CUDA", "cuTENSOR", "LinearAlgebra", "TensorOperations", "Test", "TestExtras", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote"] + +[sources] +CUDA = {url = "https://github.com/JuliaGPU/CUDA.jl", rev = "master"} +cuTENSOR = {url = "https://github.com/JuliaGPU/CUDA.jl", subdir="lib/cutensor", rev = "ksh/cutensor_bump"} +MatrixAlgebraKit = {url = "https://github.com/QuantumKitHub/MatrixAlgebraKit.jl", rev = "ksh/tk"} +TensorOperations = {url = "https://github.com/QuantumKitHub/TensorOperations.jl", rev = "ksh/cutensor_bump"} diff --git a/ext/TensorKitAMDGPUExt/TensorKitAMDGPUExt.jl b/ext/TensorKitAMDGPUExt/TensorKitAMDGPUExt.jl new file mode 100644 index 000000000..db5a5c733 --- /dev/null +++ b/ext/TensorKitAMDGPUExt/TensorKitAMDGPUExt.jl @@ -0,0 +1,10 @@ +module TensorKitAMDGPUExt + +using TensorKit +using TensorKit: SectorDict +using AMDGPU +using Random + +include("roctensormap.jl") + +end diff --git a/ext/TensorKitAMDGPUExt/roctensormap.jl b/ext/TensorKitAMDGPUExt/roctensormap.jl new file mode 100644 index 000000000..38a3e1355 --- /dev/null +++ b/ext/TensorKitAMDGPUExt/roctensormap.jl @@ -0,0 +1,103 @@ +const _ROCMatOrDict{I,T} = Union{ROCMatrix{T},SectorDict{I,ROCMatrix{T}}} +const ROCTensorMap{T,S,N₁,N₂,I,A<:_ROCMatOrDict{I,T}} = TensorMap{T,S,N₁,N₂,A} +const ROCTensor{T, S, N, I, A <: _ROCMatOrDict{I, T}} = ROCTensorMap{T, S, N, 0, I, A} + +function ROCTensorMap{T}(::UndefInitializer, V::TensorMapSpace{S, N₁, N₂}) where {T, S, N₁, N₂} + A = ROCMatrix{T, AMDGPU.default_memory} + TT = tensormaptype{S, N₁, N₂, A} + return TT(undef, codomain(V), domain(V)) +end + +function ROCTensorMap{T}(::UndefInitializer, codomain::TensorSpace{S}, + domain::TensorSpace{S}) where {T,S} + return ROCTensorMap{T}(undef, codomain ← domain) +end +function ROCTensor{T}(::UndefInitializer, V::TensorSpace{S}) where {T,S} + return ROCTensorMap{T}(undef, V ← one(V)) +end + +for (fname, felt) in ((:zeros, :zero), (:ones, :one)) + @eval begin + function AMDGPU.$fname(codomain::TensorSpace{S}, + domain::TensorSpace{S}=one(codomain)) where {S<:IndexSpace} + return AMDGPU.$fname(codomain ← domain) + end + function AMDGPU.$fname(::Type{T}, codomain::TensorSpace{S}, + domain::TensorSpace{S}=one(codomain)) where {T,S<:IndexSpace} + return AMDGPU.$fname(T, codomain ← domain) + end + AMDGPU.$fname(V::TensorMapSpace) = AMDGPU.$fname(Float64, V) + function AMDGPU.$fname(::Type{T}, V::TensorMapSpace) where {T} + t = ROCTensorMap{T}(undef, V) + fill!(t, $felt(T)) + return t + end + end +end + +for randfun in (:rand, :randn) + randfun! = Symbol(randfun, :!) + @eval begin + # converting `codomain` and `domain` into `HomSpace` + function AMDGPU.$randfun(codomain::TensorSpace{S}, + domain::TensorSpace{S}) where {S<:IndexSpace} + return AMDGPU.$randfun(codomain ← domain) + end + function AMDGPU.$randfun(::Type{T}, codomain::TensorSpace{S}, + domain::TensorSpace{S}) where {T,S<:IndexSpace} + return AMDGPU.$randfun(T, codomain ← domain) + end + function AMDGPU.$randfun(rng::Random.AbstractRNG, ::Type{T}, + codomain::TensorSpace{S}, + domain::TensorSpace{S}) where {T,S<:IndexSpace} + return AMDGPU.$randfun(rng, T, codomain ← domain) + end + + # accepting single `TensorSpace` + AMDGPU.$randfun(codomain::TensorSpace) = AMDGPU.$randfun(codomain ← one(codomain)) + function AMDGPU.$randfun(::Type{T}, codomain::TensorSpace) where {T} + return AMDGPU.$randfun(T, codomain ← one(codomain)) + end + function AMDGPU.$randfun(rng::Random.AbstractRNG, ::Type{T}, + codomain::TensorSpace) where {T} + return AMDGPU.$randfun(rng, T, codomain ← one(domain)) + end + + # filling in default eltype + AMDGPU.$randfun(V::TensorMapSpace) = AMDGPU.$randfun(Float64, V) + function AMDGPU.$randfun(rng::Random.AbstractRNG, V::TensorMapSpace) + return AMDGPU.$randfun(rng, Float64, V) + end + + # filling in default rng + function AMDGPU.$randfun(::Type{T}, V::TensorMapSpace) where {T} + return AMDGPU.$randfun(Random.default_rng(), T, V) + end + + # implementation + function AMDGPU.$randfun(rng::Random.AbstractRNG, ::Type{T}, + V::TensorMapSpace) where {T} + t = ROCTensorMap{T}(undef, V) + AMDGPU.$randfun!(rng, t) + return t + end + end +end + +# converters +# ---------- +function Base.convert(::Type{ROCTensorMap}, d::Dict{Symbol,Any}) + try + codomain = eval(Meta.parse(d[:codomain])) + domain = eval(Meta.parse(d[:domain])) + data = SectorDict(eval(Meta.parse(c)) => ROCArray(b) for (c, b) in d[:data]) + return TensorMap(data, codomain, domain) + catch e # sector unknown in TensorKit.jl; user-defined, hopefully accessible in Main + codomain = Base.eval(Main, Meta.parse(d[:codomain])) + domain = Base.eval(Main, Meta.parse(d[:domain])) + data = SectorDict(Base.eval(Main, Meta.parse(c)) => ROCArray(b) + for (c, b) in d[:data]) + return TensorMap(data, codomain, domain) + end +end + diff --git a/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl b/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl new file mode 100644 index 000000000..ae776673c --- /dev/null +++ b/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl @@ -0,0 +1,90 @@ +module TensorKitCUDAExt + +using CUDA, CUDA.CUBLAS, LinearAlgebra +using CUDA: @allowscalar +using cuTENSOR: cuTENSOR + +using TensorKit +import TensorKit.VectorInterface: scalartype as vi_scalartype +using TensorKit.Factorizations +using TensorKit.Factorizations: select_svd_algorithm, OFA, initialize_output, AbstractAlgorithm +using TensorKit: SectorDict, tensormaptype, scalar, similarstoragetype, AdjointTensorMap + +using TensorKit.MatrixAlgebraKit + +using Random + +include("cutensormap.jl") + +TensorKit.Factorizations.select_svd_algorithm(::CuTensorMap, ::TensorKit.Factorizations.SVD) = CUSOLVER_QRIteration() +TensorKit.Factorizations.select_svd_algorithm(::CuTensorMap, ::TensorKit.Factorizations.SDD) = throw(ArgumentError("DivideAndConquer unavailable on CUDA")) +TensorKit.Factorizations.select_svd_algorithm(::CuTensorMap, alg::OFA) = throw(ArgumentError(lazy"Unknown algorithm $alg")) + +const CuDiagonalTensorMap{T, S} = DiagonalTensorMap{T, S, CuVector{T, CUDA.DeviceMemory}} + +""" + CuDiagonalTensorMap{T}(undef, domain::S) where {T,S<:IndexSpace} + # expert mode: select storage type `A` + DiagonalTensorMap{T,S,A}(undef, domain::S) where {T,S<:IndexSpace,A<:DenseVector{T}} + +Construct a `DiagonalTensorMap` with uninitialized data. +""" +function CuDiagonalTensorMap{T}(::UndefInitializer, V::TensorMapSpace) where {T} + (numin(V) == numout(V) == 1 && domain(V) == codomain(V)) || + throw(ArgumentError("DiagonalTensorMap requires a space with equal domain and codomain and 2 indices")) + return CuDiagonalTensorMap{T}(undef, domain(V)) +end +function CuDiagonalTensorMap{T}(::UndefInitializer, V::ProductSpace) where {T} + length(V) == 1 || + throw(ArgumentError("DiagonalTensorMap requires `numin(d) == numout(d) == 1`")) + return CuDiagonalTensorMap{T}(undef, only(V)) +end +function CuDiagonalTensorMap{T}(::UndefInitializer, V::S) where {T,S<:IndexSpace} + return CuDiagonalTensorMap{T,S}(undef, V) +end +CuDiagonalTensorMap(::UndefInitializer, V::IndexSpace) = CuDiagonalTensorMap{Float64}(undef, V) + +function TensorKit.Factorizations.initialize_output(::typeof(svd_compact!), t::CuTensorMap, ::AbstractAlgorithm) + V_cod = V_dom = infimum(fuse(codomain(t)), fuse(domain(t))) + U = similar(t, codomain(t) ← V_cod) + S = CuDiagonalTensorMap{real(scalartype(t))}(undef, V_cod) + Vᴴ = similar(t, V_dom ← domain(t)) + return U, S, Vᴴ +end + +function TensorKit.Factorizations.initialize_output(::typeof(eigh_full!), t::CuTensorMap, ::AbstractAlgorithm) + V_D = fuse(domain(t)) + T = real(scalartype(t)) + D = CuDiagonalTensorMap{T}(undef, V_D) + V = similar(t, codomain(t) ← V_D) + return D, V +end + +function TensorKit.Factorizations.initialize_output(::typeof(eig_full!), t::CuTensorMap, ::AbstractAlgorithm) + V_D = fuse(domain(t)) + Tc = complex(scalartype(t)) + D = CuDiagonalTensorMap{Tc}(undef, V_D) + V = similar(t, Tc, codomain(t) ← V_D) + return D, V +end + +function TensorKit.Factorizations.initialize_output(::typeof(eigh_vals!), t::CuTensorMap, alg::AbstractAlgorithm) + V_D = fuse(domain(t)) + T = real(scalartype(t)) + return D = CuDiagonalTensorMap{Tc}(undef, V_D) +end + +function TensorKit.Factorizations.initialize_output(::typeof(eig_vals!), t::CuTensorMap, alg::AbstractAlgorithm) + V_D = fuse(domain(t)) + Tc = complex(scalartype(t)) + return D = CuDiagonalTensorMap{Tc}(undef, V_D) +end + + +# TODO +# add VectorInterface extensions for proper CUDA promotion +function TensorKit.VectorInterface.promote_add(TA::Type{<:CUDA.StridedCuMatrix{Tx}}, TB::Type{<:CUDA.StridedCuMatrix{Ty}}, α::Tα = TensorKit.VectorInterface.One(), β::Tβ = TensorKit.VectorInterface.One()) where {Tx, Ty, Tα, Tβ} + return Base.promote_op(add, Tx, Ty, Tα, Tβ) +end + +end diff --git a/ext/TensorKitCUDAExt/cutensormap.jl b/ext/TensorKitCUDAExt/cutensormap.jl new file mode 100644 index 000000000..e3f6cf203 --- /dev/null +++ b/ext/TensorKitCUDAExt/cutensormap.jl @@ -0,0 +1,207 @@ +const CuTensorMap{T,S,N₁,N₂} = TensorMap{T,S,N₁,N₂, CuVector{T,CUDA.DeviceMemory}} +const CuTensor{T, S, N} = CuTensorMap{T, S, N, 0} + +function TensorKit.tensormaptype(S::Type{<:IndexSpace}, N₁, N₂, TorA::Type{<:StridedCuArray}) + if TorA <: CuArray + return TensorMap{eltype(TorA),S,N₁,N₂,CuVector{eltype(TorA), CUDA.DeviceMemory}} + else + throw(ArgumentError("argument $TorA should specify a scalar type (`<:Number`) or a storage type `<:CuVector{<:Number}`")) + end +end + +function CuTensorMap{T}(::UndefInitializer, V::TensorMapSpace{S, N₁, N₂}) where {T, S, N₁, N₂} + return CuTensorMap{T,S,N₁,N₂}(undef, V) +end + +function CuTensorMap{T}(::UndefInitializer, codomain::TensorSpace{S}, + domain::TensorSpace{S}) where {T,S} + return CuTensorMap{T}(undef, codomain ← domain) +end +function CuTensor{T}(::UndefInitializer, V::TensorSpace{S}) where {T,S} + return CuTensorMap{T}(undef, V ← one(V)) +end +# constructor starting from block data +""" + CuTensorMap(data::AbstractDict{<:Sector,<:CuMatrix}, codomain::ProductSpace{S,N₁}, + domain::ProductSpace{S,N₂}) where {S<:ElementarySpace,N₁,N₂} + CuTensorMap(data, codomain ← domain) + CuTensorMap(data, domain → codomain) + +Construct a `CuTensorMap` by explicitly specifying its block data. + +## Arguments +- `data::AbstractDict{<:Sector,<:CuMatrix}`: dictionary containing the block data for + each coupled sector `c` as a matrix of size `(blockdim(codomain, c), blockdim(domain, c))`. +- `codomain::ProductSpace{S,N₁}`: the codomain as a `ProductSpace` of `N₁` spaces of type + `S<:ElementarySpace`. +- `domain::ProductSpace{S,N₂}`: the domain as a `ProductSpace` of `N₂` spaces of type + `S<:ElementarySpace`. + +Alternatively, the domain and codomain can be specified by passing a [`HomSpace`](@ref) +using the syntax `codomain ← domain` or `domain → codomain`. +""" +function CuTensorMap(data::AbstractDict{<:Sector,<:CuArray}, + V::TensorMapSpace{S,N₁,N₂}) where {S,N₁,N₂} + T = eltype(valtype(data)) + t = CuTensorMap{T}(undef, V) + for (c, b) in blocks(t) + haskey(data, c) || throw(SectorMismatch("no data for block sector $c")) + datac = data[c] + size(datac) == size(b) || + throw(DimensionMismatch("wrong size of block for sector $c")) + copy!(b, datac) + end + for (c, b) in data + c ∈ blocksectors(t) || isempty(b) || + throw(SectorMismatch("data for block sector $c not expected")) + end + return t +end +function CuTensorMap{T}(data::DenseVector{T}, codomain::TensorSpace{S}, + domain::TensorSpace{S}) where {T,S} + return CuTensorMap(data, codomain ← domain) +end +function CuTensorMap(data::AbstractDict{<:Sector,<:CuMatrix}, codom::TensorSpace{S}, + dom::TensorSpace{S}) where {S} + return CuTensorMap(data, codom ← dom) +end + +for (fname, felt) in ((:zeros, :zero), (:ones, :one)) + @eval begin + function CUDA.$fname(codomain::TensorSpace{S}, + domain::TensorSpace{S}=one(codomain)) where {S<:IndexSpace} + return CUDA.$fname(codomain ← domain) + end + function CUDA.$fname(::Type{T}, codomain::TensorSpace{S}, + domain::TensorSpace{S}=one(codomain)) where {T,S<:IndexSpace} + return CUDA.$fname(T, codomain ← domain) + end + CUDA.$fname(V::TensorMapSpace) = CUDA.$fname(Float64, V) + function CUDA.$fname(::Type{T}, V::TensorMapSpace) where {T} + t = CuTensorMap{T}(undef, V) + fill!(t, $felt(T)) + return t + end + end +end + +for randfun in (:rand, :randn) + randfun! = Symbol(randfun, :!) + @eval begin + # converting `codomain` and `domain` into `HomSpace` + function CUDA.$randfun(codomain::TensorSpace{S}, + domain::TensorSpace{S}) where {S<:IndexSpace} + return CUDA.$randfun(codomain ← domain) + end + function CUDA.$randfun(::Type{T}, codomain::TensorSpace{S}, + domain::TensorSpace{S}) where {T,S<:IndexSpace} + return CUDA.$randfun(T, codomain ← domain) + end + function CUDA.$randfun(rng::Random.AbstractRNG, ::Type{T}, + codomain::TensorSpace{S}, + domain::TensorSpace{S}) where {T,S<:IndexSpace} + return CUDA.$randfun(rng, T, codomain ← domain) + end + + # accepting single `TensorSpace` + CUDA.$randfun(codomain::TensorSpace) = CUDA.$randfun(codomain ← one(codomain)) + function CUDA.$randfun(::Type{T}, codomain::TensorSpace) where {T} + return CUDA.$randfun(T, codomain ← one(codomain)) + end + function CUDA.$randfun(rng::Random.AbstractRNG, ::Type{T}, + codomain::TensorSpace) where {T} + return CUDA.$randfun(rng, T, codomain ← one(domain)) + end + + # filling in default eltype + CUDA.$randfun(V::TensorMapSpace) = CUDA.$randfun(Float64, V) + function CUDA.$randfun(rng::Random.AbstractRNG, V::TensorMapSpace) + return CUDA.$randfun(rng, Float64, V) + end + + # filling in default rng + function CUDA.$randfun(::Type{T}, V::TensorMapSpace) where {T} + return CUDA.$randfun(Random.default_rng(), T, V) + end + + # implementation + function CUDA.$randfun(rng::Random.AbstractRNG, ::Type{T}, + V::TensorMapSpace) where {T} + t = CuTensorMap{T}(undef, V) + CUDA.$randfun!(rng, t) + return t + end + end +end + +# converters +# ---------- +function Base.convert(::Type{CuTensorMap}, d::Dict{Symbol,Any}) + try + codomain = eval(Meta.parse(d[:codomain])) + domain = eval(Meta.parse(d[:domain])) + data = SectorDict(eval(Meta.parse(c)) => CuArray(b) for (c, b) in d[:data]) + return CuTensorMap(data, codomain, domain) + catch e # sector unknown in TensorKit.jl; user-defined, hopefully accessible in Main + codomain = Base.eval(Main, Meta.parse(d[:codomain])) + domain = Base.eval(Main, Meta.parse(d[:domain])) + data = SectorDict(Base.eval(Main, Meta.parse(c)) => CuArray(b) + for (c, b) in d[:data]) + return CuTensorMap(data, codomain, domain) + end +end + +function Base.convert(::Type{CuTensorMap}, t::AbstractTensorMap) + return copy!(CuTensorMap{scalartype(t)}(undef, space(t)), t) +end + +# Scalar implementation +#----------------------- +function TensorKit.scalar(t::CuTensorMap) + + # TODO: should scalar only work if N₁ == N₂ == 0? + return @allowscalar dim(codomain(t)) == dim(domain(t)) == 1 ? + first(blocks(t))[2][1, 1] : throw(DimensionMismatch()) +end + +TensorKit.scalartype(A::StridedCuArray{T}) where {T} = T +vi_scalartype(::Type{<:CuTensorMap{T}}) where {T} = T +vi_scalartype(::Type{<:CuArray{T}}) where {T} = T + +function TensorKit.similarstoragetype(TT::Type{<:CuTensorMap{TTT,S,N₁,N₂}}, ::Type{T}) where {TTT,T,S,N₁,N₂} + return CuVector{T, CUDA.DeviceMemory} +end + +function Base.convert(TT::Type{CuTensorMap{T,S,N₁,N₂}}, + t::AbstractTensorMap{<:Any,S,N₁,N₂}) where {T,S,N₁,N₂} + if typeof(t) === TT + return t + else + tnew = TT(undef, space(t)) + return copy!(tnew, t) + end +end + +function Base.copy!(tdst::CuTensorMap{T, S, N₁, N₂}, tsrc::CuTensorMap{T, S, N₁, N₂}) where {T, S, N₁, N₂} + space(tdst) == space(tsrc) || throw(SpaceMismatch("$(space(tdst)) ≠ $(space(tsrc))")) + for ((c, bdst), (_, bsrc)) in zip(blocks(tdst), blocks(tsrc)) + copy!(bdst, bsrc) + end + return tdst +end + +function Base.copy!(tdst::CuTensorMap, tsrc::TensorKit.AdjointTensorMap) + space(tdst) == space(tsrc) || throw(SpaceMismatch("$(space(tdst)) ≠ $(space(tsrc))")) + for ((c, bdst), (_, bsrc)) in zip(blocks(tdst), blocks(tsrc)) + copy!(bdst, bsrc) + end + return tdst +end + +function Base.promote_rule(::Type{<:TT₁}, + ::Type{<:TT₂}) where {S,N₁,N₂, TTT₁, TTT₂, + TT₁<:CuTensorMap{TTT₁,S,N₁,N₂}, + TT₂<:CuTensorMap{TTT₂,S,N₁,N₂}} + T = TensorKit.VectorInterface.promote_add(TTT₁, TTT₂) + return CuTensorMap{T,S,N₁,N₂} +end diff --git a/src/tensors/factorizations/factorizations.jl b/src/tensors/factorizations/factorizations.jl index db1b1c3c7..f56e44a1b 100644 --- a/src/tensors/factorizations/factorizations.jl +++ b/src/tensors/factorizations/factorizations.jl @@ -99,10 +99,10 @@ eigh!(d::DiagonalTensorMap{<:Real}) = d, one(d) eigh!(d::DiagonalTensorMap{<:Complex}) = DiagonalTensorMap(real(d.data), d.domain), one(d) function LinearAlgebra.svdvals(d::DiagonalTensorMap) - return SectorDict(c => LinearAlgebra.svdvals(b) for (c, b) in blocks(d)) + return SectorDict(c => svd_vals(b) for (c, b) in blocks(d)) end function LinearAlgebra.eigvals(d::DiagonalTensorMap) - return SectorDict(c => LinearAlgebra.eigvals(b) for (c, b) in blocks(d)) + return SectorDict(c => eig_vals(b) for (c, b) in blocks(d)) end function LinearAlgebra.cond(d::DiagonalTensorMap, p::Real=2) @@ -121,11 +121,11 @@ LinearAlgebra.svdvals!(t::AdjointTensorMap) = svdvals!(adjoint(t)) #--------------------------# function LinearAlgebra.eigvals!(t::TensorMap{<:RealOrComplexFloat}; kwargs...) - return SectorDict(c => complex(LinearAlgebra.eigvals!(b; kwargs...)) + return SectorDict(c => complex(eig_vals!(b; kwargs...)) for (c, b) in blocks(t)) end function LinearAlgebra.eigvals!(t::AdjointTensorMap{<:RealOrComplexFloat}; kwargs...) - return SectorDict(c => conj!(complex(LinearAlgebra.eigvals!(b; kwargs...))) + return SectorDict(c => conj!(complex(eig_vals!(b; kwargs...))) for (c, b) in blocks(t)) end diff --git a/src/tensors/factorizations/implementations.jl b/src/tensors/factorizations/implementations.jl index d00703549..f341b4e70 100644 --- a/src/tensors/factorizations/implementations.jl +++ b/src/tensors/factorizations/implementations.jl @@ -3,6 +3,11 @@ _kindof(::Union{QR,QRpos}) = :qr _kindof(::Union{LQ,LQpos}) = :lq _kindof(::Polar) = :polar +select_svd_algorithm(t, ::SVD) = LAPACK_QRIteration() +select_svd_algorithm(t, ::SDD) = LAPACK_DivideAndConquer() +select_svd_algorithm(t, alg::OFA) = throw(ArgumentError(lazy"Unknown algorithm $alg")) +select_svd_algorithm(t::Base.ReshapedArray, alg) = select_svd_algorithm(parent(parent(t)), alg) + leftorth!(t; alg=nothing, kwargs...) = _leftorth!(t, alg; kwargs...) function _leftorth!(t::AbstractTensorMap, alg::Nothing, ; kwargs...) @@ -26,9 +31,7 @@ function _leftorth!(t, alg::OFA; kwargs...) kind = _kindof(alg) if kind == :svd - alg_svd = alg === SVD() ? LAPACK_QRIteration() : - alg === SDD() ? LAPACK_DivideAndConquer() : - throw(ArgumentError(lazy"Unknown algorithm $alg")) + alg_svd = select_svd_algorithm(t, alg) return left_orth!(t; kind, alg_svd, trunc) elseif kind == :qr alg_qr = (; positive=(alg == QRpos())) @@ -52,9 +55,7 @@ function leftnull!(t::AbstractTensorMap; kind = _kindof(alg) if kind == :svd - alg_svd = alg === SVD() ? LAPACK_QRIteration() : - alg === SDD() ? LAPACK_DivideAndConquer() : - throw(ArgumentError(lazy"Unknown algorithm $alg")) + alg_svd = select_svd_algorithm(t, alg) return left_null!(t; kind, alg_svd, trunc) elseif kind == :qr alg_qr = (; positive=(alg == QRpos())) @@ -82,9 +83,7 @@ function rightorth!(t::AbstractTensorMap; kind = _kindof(alg) if kind == :svd - alg_svd = alg === SVD() ? LAPACK_QRIteration() : - alg === SDD() ? LAPACK_DivideAndConquer() : - throw(ArgumentError(lazy"Unknown algorithm $alg")) + alg_svd = select_svd_algorithm(t, alg) return right_orth!(t; kind, alg_svd, trunc) elseif kind == :lq alg_lq = (; positive=(alg == LQpos())) @@ -106,9 +105,7 @@ function rightnull!(t::AbstractTensorMap; kind = _kindof(alg) if kind == :svd - alg_svd = alg === SVD() ? LAPACK_QRIteration() : - alg === SDD() ? LAPACK_DivideAndConquer() : - throw(ArgumentError(lazy"Unknown algorithm $alg")) + alg_svd = select_svd_algorithm(t, alg) return right_null!(t; kind, alg_svd, trunc) elseif kind == :lq alg_lq = (; positive=(alg == LQpos())) @@ -149,16 +146,16 @@ function tsvd!(t::AbstractTensorMap; trunc=notrunc(), p=nothing, alg=nothing, kw InnerProductStyle(t) === EuclideanInnerProduct() || throw_invalid_innerproduct(:tsvd!) isnothing(p) || Base.depwarn("p is no longer supported", :tsvd!) - if alg isa OFA + if alg isa Union{OFA, SVD} Base.depwarn(lazy"$alg is deprecated", :tsvd!) - alg = alg === SVD() ? LAPACK_QRIteration() : - alg === SDD() ? LAPACK_DivideAndConquer() : - throw(ArgumentError(lazy"Unknown algorithm $alg")) + alg_svd = select_svd_algorithm(t, alg) + else + alg_svd = alg end if trunc == notrunc() - return svd_compact!(t; alg, kwargs...) + return svd_compact!(t; alg_svd, kwargs...) else - return svd_trunc!(t; trunc, alg, kwargs...) + return svd_trunc!(t; trunc, alg_svd, kwargs...) end end diff --git a/src/tensors/factorizations/matrixalgebrakit.jl b/src/tensors/factorizations/matrixalgebrakit.jl index eb27654a5..2ddf3eb5e 100644 --- a/src/tensors/factorizations/matrixalgebrakit.jl +++ b/src/tensors/factorizations/matrixalgebrakit.jl @@ -29,7 +29,6 @@ for f! in (:qr_compact!, :qr_full!, :left_orth!, :right_orth!) @eval function $f!(t::AbstractTensorMap, F, alg::AbstractAlgorithm) check_input($f!, t, F, alg) - foreachblock(t, F...) do _, bs factors = Base.tail(bs) factors′ = $f!(first(bs), factors, alg) diff --git a/src/tensors/factorizations/truncation.jl b/src/tensors/factorizations/truncation.jl index d553c5c93..b37f6dd8a 100644 --- a/src/tensors/factorizations/truncation.jl +++ b/src/tensors/factorizations/truncation.jl @@ -24,26 +24,25 @@ truncspace(space::ElementarySpace) = TruncationSpace(space) function truncate!(::typeof(svd_trunc!), (U, S, Vᴴ)::_T_USVᴴ, strategy::TruncationStrategy) ind = findtruncated_sorted(diagview(S), strategy) V_truncated = spacetype(S)(c => length(I) for (c, I) in ind) - Ũ = similar(U, codomain(U) ← V_truncated) for (c, b) in blocks(Ũ) I = get(ind, c, nothing) @assert !isnothing(I) - copy!(b, @view(block(U, c)[:, I])) + copyto!(b, @view(block(U, c)[:, I])) end - S̃ = DiagonalTensorMap{scalartype(S)}(undef, V_truncated) + S̃ = DiagonalTensorMap{scalartype(S), spacetype(S), storagetype(S)}(undef, V_truncated) for (c, b) in blocks(S̃) I = get(ind, c, nothing) @assert !isnothing(I) - copy!(b.diag, @view(block(S, c).diag[I])) + copyto!(b.diag, @view(block(S, c).diag[I])) end Ṽᴴ = similar(Vᴴ, V_truncated ← domain(Vᴴ)) for (c, b) in blocks(Ṽᴴ) I = get(ind, c, nothing) @assert !isnothing(I) - copy!(b, @view(block(Vᴴ, c)[I, :])) + copyto!(b, @view(block(Vᴴ, c)[I, :])) end return Ũ, S̃, Ṽᴴ @@ -68,8 +67,8 @@ end function truncate!(::typeof(eigh_trunc!), (D, V)::_T_DV, strategy::TruncationStrategy) ind = findtruncated(diagview(D), strategy) V_truncated = spacetype(D)(c => length(I) for (c, I) in ind) - - D̃ = DiagonalTensorMap{scalartype(D)}(undef, V_truncated) + + D̃ = DiagonalTensorMap{scalartype(D), spacetype(D), storagetype(D)}(undef, V_truncated) for (c, b) in blocks(D̃) I = get(ind, c, nothing) @assert !isnothing(I) @@ -89,7 +88,7 @@ function truncate!(::typeof(eig_trunc!), (D, V)::_T_DV, strategy::TruncationStra ind = findtruncated(diagview(D), strategy) V_truncated = spacetype(D)(c => length(I) for (c, I) in ind) - D̃ = DiagonalTensorMap{scalartype(D)}(undef, V_truncated) + D̃ = DiagonalTensorMap{scalartype(D), spacetype(D), storagetype(D)}(undef, V_truncated) for (c, b) in blocks(D̃) I = get(ind, c, nothing) @assert !isnothing(I) diff --git a/src/tensors/linalg.jl b/src/tensors/linalg.jl index 51567dec7..4f00f1444 100644 --- a/src/tensors/linalg.jl +++ b/src/tensors/linalg.jl @@ -268,7 +268,7 @@ function _norm(blockiter, p::Real, init::Real) end elseif p == 2 n² = mapreduce(+, blockiter; init=init) do (c, b) - return isempty(b) ? init : oftype(init, dim(c) * LinearAlgebra.norm2(b)^2) + return isempty(b) ? init : oftype(init, dim(c) * norm(b, 2)^2) end return sqrt(n²) elseif p == 1 @@ -277,7 +277,7 @@ function _norm(blockiter, p::Real, init::Real) end elseif p > 0 nᵖ = mapreduce(+, blockiter; init=init) do (c, b) - return isempty(b) ? init : oftype(init, dim(c) * LinearAlgebra.normp(b, p)^p) + return isempty(b) ? init : oftype(init, dim(c) * norm(b, p)^p) end return (nᵖ)^inv(oftype(nᵖ, p)) else @@ -544,8 +544,8 @@ function ⊗(t1::AbstractTensorMap, t2::AbstractTensorMap) d4 = dim(dom2, f2r.uncoupled) m1 = sreshape(t1[f1l, f1r], (d1, 1, d3, 1)) m2 = sreshape(t2[f2l, f2r], (1, d2, 1, d4)) - m = sreshape(t[fl, fr], (d1, d2, d3, d4)) - m .+= coeff1 .* conj(coeff2) .* m1 .* m2 + m = sreshape(t[fl, fr], (d1, d2, d3, d4)) + m .+= coeff1 .* conj.(coeff2) .* m1 .* m2 end end end diff --git a/src/tensors/tensor.jl b/src/tensors/tensor.jl index a918478c3..914da3c3b 100644 --- a/src/tensors/tensor.jl +++ b/src/tensors/tensor.jl @@ -138,7 +138,7 @@ function TensorMap(data::AbstractDict{<:Sector,<:AbstractMatrix}, datac = data[c] size(datac) == size(b) || throw(DimensionMismatch("wrong size of block for sector $c")) - copy!(b, datac) + copyto!(b, datac) end for (c, b) in data c ∈ blocksectors(t) || isempty(b) || diff --git a/test/amdgpu.jl b/test/amdgpu.jl new file mode 100644 index 000000000..b7d1bfb41 --- /dev/null +++ b/test/amdgpu.jl @@ -0,0 +1,597 @@ +using Adapt +ad = adapt(Array) + +const AMDGPUExt = Base.get_extension(TensorKit, :TensorKitAMDGPUExt) +@assert !isnothing(AMDGPUExt) +# const ROCTensorMap{T,S,N1,N2,I,A} = AMDGPUExt.ROCTensorMap{T,S,N1,N2,I,A} +const ROCTensorMap = getglobal(AMDGPUExt, :ROCTensorMap) + +for V in (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VfSU₂)#, VSU₃) + V1, V2, V3, V4, V5 = V + @assert V3 * V4 * V2 ≿ V1' * V5' # necessary for leftorth tests + @assert V3 * V4 ≾ V1' * V2' * V5' # necessary for rightorth tests +end + +spacelist = try + if ENV["CI"] == "true" + println("Detected running on CI") + if Sys.iswindows() + (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂) + elseif Sys.isapple() + (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VfU₁, VfSU₂)#, VSU₃) + else + (Vtr, Vℤ₂, Vfℤ₂, VU₁, VCU₁, VSU₂, VfSU₂)#, VSU₃) + end + else + (Vtr, VU₁, VSU₂, Vfℤ₂) + end +catch + (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VfSU₂)#, VSU₃) +end + +for V in spacelist + I = sectortype(first(V)) + Istr = TensorKit.type_repr(I) + println("---------------------------------------") + println("AMDGPU Tensors with symmetry: $Istr") + println("---------------------------------------") + @timedtestset "Tensors with symmetry: $Istr" verbose = true begin + V1, V2, V3, V4, V5 = V + @timedtestset "Basic tensor properties" begin + W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 + for T in (Int, Float32, Float64, ComplexF32, ComplexF64) + t = @constinferred AMDGPU.zeros(T, W) + # @test @constinferred(hash(t)) == hash(deepcopy(t)) # hash is not defined for CuArray? + @test scalartype(t) == T + @test norm(t) == 0 + @test codomain(t) == W + @test space(t) == (W ← one(W)) + @test domain(t) == one(W) + @test typeof(t) == + @constinferred tensormaptype(spacetype(t), 5, 0, storagetype(t)) + end + end + @timedtestset "Tensor Dict conversion" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + for T in (Int, Float32, ComplexF64) + t = @constinferred AMDGPU.rand(T, W) + d = convert(Dict, t) + @test t == convert(ROCTensorMap, d) + end + end + if hasfusiontensor(I) + @timedtestset "Tensor Array conversion" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + + # cuTENSOR does not support Int + Ts = sectortype(W) === Trivial ? (Int, Float32, ComplexF64) : + (Float32, ComplexF64) + for T in Ts + t = @constinferred AMDGPU.rand(T, W) + a = @constinferred convert(CuArray, t) + @test t ≈ @constinferred TensorMap(a, W) + # also test if input is matrix + a2 = reshape(a, prod(dim, codomain(t)), prod(dim, domain(t))) + @test t ≈ @constinferred TensorMap(a2, codomain(t), domain(t)) + end + end + end + @timedtestset "Basic linear algebra" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + for T in (Float32, ComplexF64) + t = @constinferred AMDGPU.rand(T, W) + @test scalartype(t) == T + @test space(t) == W + @test space(t') == W' + @test dim(t) == dim(space(t)) + @test codomain(t) == codomain(W) + @test domain(t) == domain(W) + @test isa(@constinferred(norm(t)), real(T)) + @test norm(t)^2 ≈ dot(t, t) + α = rand(T) + @test norm(α * t) ≈ abs(α) * norm(t) + @test norm(t + t, 2) ≈ 2 * norm(t, 2) + @test norm(t + t, 1) ≈ 2 * norm(t, 1) + @test norm(t + t, Inf) ≈ 2 * norm(t, Inf) + p = 3 * rand(Float64) + @test norm(t + t, p) ≈ 2 * norm(t, p) + @test norm(t) ≈ norm(t') + + t2 = @constinferred rand!(similar(t)) + β = rand(T) + @test @constinferred(dot(β * t2, α * t)) ≈ conj(β) * α * conj(dot(t, t2)) + @test dot(t2, t) ≈ conj(dot(t, t2)) + @test dot(t2, t) ≈ conj(dot(t2', t')) + @test dot(t2, t) ≈ dot(t', t2') + + A = storagetype(t) + + i1 = @constinferred(isomorphism(CuMatrix{T}, V1 ⊗ V2, V2 ⊗ V1)) + i2 = @constinferred(isomorphism(CuMatrix{T}, V2 ⊗ V1, V1 ⊗ V2)) + @test i1 * i2 == @constinferred(id(CuMatrix{T}, V1 ⊗ V2)) + @test i2 * i1 == @constinferred(id(CuMatrix{T}, V2 ⊗ V1)) + + w = @constinferred(isometry(CuMatrix{T}, V1 ⊗ (oneunit(V1) ⊕ oneunit(V1)), + V1)) + @test dim(w) == 2 * dim(V1 ← V1) + @test w' * w == id(CuMatrix{T}, V1) + @test w * w' == (w * w')^2 + end + end + if hasfusiontensor(I) + @timedtestset "Basic linear algebra: test via conversion" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + for T in (Float32, ComplexF64) + t = AMDGPU.rand(T, W) + t2 = @constinferred rand!(similar(t)) + @test norm(t, 2) ≈ norm(ad(t), 2) + @test dot(t2, t) ≈ dot(ad(t2), ad(t)) + α = rand(T) + @test ad(α * t) ≈ α * ad(t) + @test ad(t + t) ≈ 2 * ad(t) + end + end + @timedtestset "Real and imaginary parts" begin + W = V1 ⊗ V2 + for T in (Float64, ComplexF64, ComplexF32) + t = @constinferred AMDGPU.randn(T, W, W) + @test real(ad(t)) == ad(@constinferred real(t)) + @test imag(ad(t)) == ad(@constinferred imag(t)) + end + end + end + @timedtestset "Tensor conversion" begin + W = V1 ⊗ V2 + t = @constinferred AMDGPU.randn(W ← W) + @test typeof(convert(TensorMap, t')) == typeof(t) + tc = complex(t) + @test convert(typeof(tc), t) == tc + @test typeof(convert(typeof(tc), t)) == typeof(tc) + @test typeof(convert(typeof(tc), t')) == typeof(tc) + @test Base.promote_typeof(t, tc) == typeof(tc) + @test Base.promote_typeof(tc, t) == typeof(tc + t) + end + @timedtestset "diag/diagm" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + t = AMDGPU.randn(ComplexF64, W) + d = LinearAlgebra.diag(t) + D = LinearAlgebra.diagm(codomain(t), domain(t), d) + @test LinearAlgebra.isdiag(D) + @test LinearAlgebra.diag(D) == d + end + @timedtestset "Permutations: test via inner product invariance" begin + W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 + t = AMDGPU.rand(ComplexF64, W) + t′ = randn!(similar(t)) + for k in 0:5 + for p in permutations(1:5) + p1 = ntuple(n -> p[n], k) + p2 = ntuple(n -> p[k + n], 5 - k) + t2 = @constinferred permute(t, (p1, p2)) + @test norm(t2) ≈ norm(t) + t2′ = permute(t′, (p1, p2)) + @test dot(t2′, t2) ≈ dot(t′, t) ≈ dot(transpose(t2′), transpose(t2)) + end + + t3 = VERSION < v"1.7" ? repartition(t, k) : + @constinferred repartition(t, $k) + @test norm(t3) ≈ norm(t) + t3′ = @constinferred repartition!(similar(t3), t′) + @test norm(t3′) ≈ norm(t′) + @test dot(t′, t) ≈ dot(t3′, t3) + end + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Permutations: test via conversion" begin + W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 + t = AMDGPU.rand(ComplexF64, W) + a = ad(t) + for k in 0:5 + for p in permutations(1:5) + p1 = ntuple(n -> p[n], k) + p2 = ntuple(n -> p[k + n], 5 - k) + t2 = permute(t, (p1, p2)) + a2 = ad(t2) + @test a2 ≈ permute(a, (p1, p2)) + @test ad(transpose(t2)) ≈ transpose(a2) + end + + t3 = repartition(t, k) + a3 = ad(t3) + @test a3 ≈ repartition(a, k) + end + end + end + @timedtestset "Full trace: test self-consistency" begin + t = AMDGPU.rand(ComplexF64, V1 ⊗ V2' ⊗ V2 ⊗ V1') + t2 = permute(t, ((1, 2), (4, 3))) + s = @constinferred tr(t2) + @test conj(s) ≈ tr(t2') + if !isdual(V1) + t2 = twist!(t2, 1) + end + if isdual(V2) + t2 = twist!(t2, 2) + end + ss = tr(t2) + @tensor s2 = t[a, b, b, a] + @tensor t3[a, b] := t[a, c, c, b] + @tensor s3 = t3[a, a] + @test ss ≈ s2 + @test ss ≈ s3 + end + @timedtestset "Partial trace: test self-consistency" begin + t = AMDGPU.rand(ComplexF64, V1 ⊗ V2' ⊗ V3 ⊗ V2 ⊗ V1' ⊗ V3') + @tensor t2[a, b] := t[c, d, b, d, c, a] + @tensor t4[a, b, c, d] := t[d, e, b, e, c, a] + @tensor t5[a, b] := t4[a, b, c, c] + @test t2 ≈ t5 + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Trace: test via conversion" begin + t = AMDGPU.rand(ComplexF64, V1 ⊗ V2' ⊗ V3 ⊗ V2 ⊗ V1' ⊗ V3') + @tensor t2[a, b] := t[c, d, b, d, c, a] + @tensor t3[a, b] := ad(t)[c, d, b, d, c, a] + @test t3 ≈ ad(t2) + end + end + @timedtestset "Trace and contraction" begin + t1 = AMDGPU.rand(ComplexF64, V1 ⊗ V2 ⊗ V3) + t2 = AMDGPU.rand(ComplexF64, V2' ⊗ V4 ⊗ V1') + t3 = t1 ⊗ t2 + @tensor ta[a, b] := t1[x, y, a] * t2[y, b, x] + @tensor tb[a, b] := t3[x, y, a, y, b, x] + @test ta ≈ tb + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Tensor contraction: test via conversion" begin + A1 = AMDGPU.randn(ComplexF64, V1' * V2', V3') + A2 = AMDGPU.randn(ComplexF64, V3 * V4, V5) + rhoL = AMDGPU.randn(ComplexF64, V1, V1) + rhoR = AMDGPU.randn(ComplexF64, V5, V5)' # test adjoint tensor + H = AMDGPU.randn(ComplexF64, V2 * V4, V2 * V4) + @tensor HrA12[a, s1, s2, c] := rhoL[a, a'] * conj(A1[a', t1, b]) * + A2[b, t2, c'] * rhoR[c', c] * + H[s1, s2, t1, t2] + + @tensor HrA12array[a, s1, s2, c] := ad(rhoL)[a, a'] * + conj(ad(A1)[a', t1, b]) * + ad(A2)[b, t2, c'] * + ad(rhoR)[c', c] * + ad(H)[s1, s2, t1, t2] + + @test HrA12array ≈ ad(HrA12) + end + end + @timedtestset "Multiplication and inverse: test compatibility" begin + W1 = V1 ⊗ V2 ⊗ V3 + W2 = V4 ⊗ V5 + for T in (Float64, ComplexF64) + t1 = AMDGPU.rand(T, W1, W1) + t2 = AMDGPU.rand(T, W2, W2) + t = AMDGPU.rand(T, W1, W2) + @test t1 * (t1 \ t) ≈ t + @test (t / t2) * t2 ≈ t + @test t1 \ one(t1) ≈ inv(t1) + @test one(t1) / t1 ≈ pinv(t1) + @test_throws SpaceMismatch inv(t) + @test_throws SpaceMismatch t2 \ t + @test_throws SpaceMismatch t / t1 + tp = pinv(t) * t + @test tp ≈ tp * tp + end + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Multiplication and inverse: test via conversion" begin + W1 = V1 ⊗ V2 ⊗ V3 + W2 = V4 ⊗ V5 + for T in (Float32, Float64, ComplexF32, ComplexF64) + t1 = AMDGPU.rand(T, W1, W1) + t2 = AMDGPU.rand(T, W2, W2) + t = AMDGPU.rand(T, W1, W2) + d1 = dim(W1) + d2 = dim(W2) + At1 = reshape(convert(Array, t1), d1, d1) + At2 = reshape(convert(Array, t2), d2, d2) + At = reshape(convert(Array, t), d1, d2) + @test ad(t1 * t) ≈ ad(t1) * ad(t) + @test ad(t1' * t) ≈ ad(t1)' * ad(t) + @test ad(t2 * t') ≈ ad(t2) * ad(t)' + @test ad(t2' * t') ≈ ad(t2)' * ad(t)' + @test ad(inv(t1)) ≈ inv(ad(t1)) + @test ad(pinv(t)) ≈ pinv(ad(t)) + + if T == Float32 || T == ComplexF32 + continue + end + + @test ad(t1 \ t) ≈ ad(t1) \ ad(t) + @test ad(t1' \ t) ≈ ad(t1)' \ ad(t) + @test ad(t2 \ t') ≈ ad(t2) \ ad(t)' + @test ad(t2' \ t') ≈ ad(t2)' \ ad(t)' + + @test ad(t2 / t) ≈ ad(t2) / ad(t) + @test ad(t2' / t) ≈ ad(t2)' / ad(t) + @test ad(t1 / t') ≈ ad(t1) / ad(t)' + @test ad(t1' / t') ≈ ad(t1)' / ad(t)' + end + end + end + @timedtestset "Factorization" begin + W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 + for T in (Float32, ComplexF64) + # Test both a normal tensor and an adjoint one. + ts = (AMDGPU.rand(T, W), AMDGPU.rand(T, W)') + for t in ts + @testset "leftorth with $alg" for alg in + (TensorKit.QR(), TensorKit.QRpos(), + TensorKit.QL(), TensorKit.QLpos(), + TensorKit.Polar(), # TensorKit.SVD(), + TensorKit.SDD()) + Q, R = @constinferred leftorth(t, ((3, 4, 2), (1, 5)); alg=alg) + QdQ = Q' * Q + @test QdQ ≈ one(QdQ) + @test Q * R ≈ permute(t, ((3, 4, 2), (1, 5))) + if alg isa Polar + # @test isposdef(R) # not defined for AMDGPU + @test domain(R) == codomain(R) == space(t, 1)' ⊗ space(t, 5)' + end + end + @testset "leftnull with $alg" for alg in + (TensorKit.QR(), # TensorKit.SVD(), + TensorKit.SDD()) + N = @constinferred leftnull(t, ((3, 4, 2), (1, 5)); alg=alg) + NdN = N' * N + @test NdN ≈ one(NdN) + @test norm(N' * permute(t, ((3, 4, 2), (1, 5)))) < + 100 * eps(norm(t)) + end + @testset "rightorth with $alg" for alg in + (TensorKit.RQ(), TensorKit.RQpos(), + TensorKit.LQ(), TensorKit.LQpos(), + TensorKit.Polar(), #TensorKit.SVD(), + TensorKit.SDD()) + # cusolver SVD requires m >= n for some reason + L, Q = @constinferred rightorth(t, ((3, 4), (2, 1, 5)); alg=alg) + QQd = Q * Q' + @test QQd ≈ one(QQd) + @test L * Q ≈ permute(t, ((3, 4), (2, 1, 5))) + if alg isa Polar + # @test isposdef(L) # not defined for AMDGPU + @test domain(L) == codomain(L) == space(t, 3) ⊗ space(t, 4) + end + end + @testset "rightnull with $alg" for alg in + (TensorKit.LQ(), # TensorKit.SVD(), + TensorKit.SDD()) + M = @constinferred rightnull(t, ((3, 4), (2, 1, 5)); alg=alg) + MMd = M * M' + @test MMd ≈ one(MMd) + @test norm(permute(t, ((3, 4), (2, 1, 5))) * M') < + 100 * eps(norm(t)) + end + @testset "tsvd with $alg" for alg in (TensorKit.SDD(),) + U, S, V = @constinferred tsvd(t, ((3, 4, 2), (1, 5)); alg=alg) + UdU = U' * U + @test UdU ≈ one(UdU) + VVd = V * V' + @test VVd ≈ one(VVd) + t2 = permute(t, ((3, 4, 2), (1, 5))) + @test U * S * V ≈ t2 + + s = LinearAlgebra.svdvals(t2) + s′ = LinearAlgebra.diag(S) + for (c, b) in s + @test b ≈ s′[c] + end + end + end + @testset "empty tensor" begin + t = AMDGPU.randn(T, V1 ⊗ V2, typeof(V1)()) + @testset "leftorth with $alg" for alg in + (TensorKit.QR(), TensorKit.QRpos(), + TensorKit.QL(), TensorKit.QLpos(), + TensorKit.Polar(), TensorKit.SVD(), + TensorKit.SDD()) + Q, R = @constinferred leftorth(t; alg=alg) + @test Q == t + @test dim(Q) == dim(R) == 0 + end + @testset "leftnull with $alg" for alg in + (TensorKit.QR(), TensorKit.SVD(), + TensorKit.SDD()) + N = @constinferred leftnull(t; alg=alg) + @test N' * N ≈ id(storagetype(t), domain(N)) + @test N * N' ≈ id(storagetype(t), codomain(N)) + end + @testset "rightorth with $alg" for alg in + (TensorKit.RQ(), TensorKit.RQpos(), + TensorKit.LQ(), TensorKit.LQpos(), + TensorKit.Polar(), TensorKit.SVD(), + TensorKit.SDD()) + L, Q = @constinferred rightorth(copy(t'); alg=alg) + @test Q == t' + @test dim(Q) == dim(L) == 0 + end + @testset "rightnull with $alg" for alg in + (TensorKit.LQ(), TensorKit.SVD(), + TensorKit.SDD()) + M = @constinferred rightnull(copy(t'); alg=alg) + @test M * M' ≈ id(storagetype(t), codomain(M)) + @test M' * M ≈ id(storagetype(t), domain(M)) + end + @testset "tsvd with $alg" for alg in (TensorKit.SVD(), TensorKit.SDD()) + U, S, V = @constinferred tsvd(t; alg=alg) + @test U == t + @test dim(U) == dim(S) == dim(V) + end + end + + # AMDGPU only supports symmetric/hermitian eigen + @testset "eig and isposdef" begin + t = AMDGPU.rand(T, V1 ⊗ V2 ← V1 ⊗ V2) + t += t' + D, V = eigen(t) + @test t * V ≈ V * D + + # d = LinearAlgebra.eigvals(t; sortby=nothing) + # d′ = LinearAlgebra.diag(D) + # for (c, b) in d + # @test b ≈ d′[c] + # end + + # Somehow moving these test before the previous one gives rise to errors + # with T=Float32 on x86 platforms. Is this an OpenBLAS issue? + # VdV = V' * V + # VdV = (VdV + VdV') / 2 + # @test isposdef(VdV) + # + # @test !isposdef(t2) # unlikely for non-hermitian map + # t2 = (t2 + t2') + # D, V = eigen(t2) + # VdV = V' * V + # @test VdV ≈ one(VdV) + # D̃, Ṽ = @constinferred eigh(t2) + # @test D ≈ D̃ + # @test V ≈ Ṽ + # λ = minimum(minimum(real(LinearAlgebra.diag(b))) + # for (c, b) in blocks(D)) + # @test isposdef(t2) == isposdef(λ) + # @test isposdef(t2 - λ * one(t2) + 0.1 * one(t2)) + # @test !isposdef(t2 - λ * one(t2) - 0.1 * one(t2)) + end + end + end + @timedtestset "Tensor truncation" begin + for T in (Float32, ComplexF64) + for p in (1, 2, 3, Inf) + # Test both a normal tensor and an adjoint one. + ts = (AMDGPU.randn(T, V1 ⊗ V2 ⊗ V3, V4 ⊗ V5), + AMDGPU.randn(T, V4 ⊗ V5, V1 ⊗ V2 ⊗ V3)') + for t in ts + U₀, S₀, V₀, = tsvd(t) + t = rmul!(t, 1 / norm(S₀, p)) + # Probably shouldn't allow truncerr and truncdim, as these require scalar indexing? + U, S, V, ϵ = tsvd(t; trunc=truncbelow(1 / dim(domain(S₀))), p=p) + U′, S′, V′, ϵ′ = tsvd(t; trunc=truncspace(space(S, 1)), p=p) + @test (U, S, V, ϵ) == (U′, S′, V′, ϵ′) + end + end + end + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Tensor functions" begin + W = V1 ⊗ V2 + for T in (Float64, ComplexF64) + t = AMDGPU.randn(T, W, W) + s = dim(W) + expt = @constinferred exp(t) + @test ad(expt) ≈ exp(ad(t)) + + @test (@constinferred sqrt(t))^2 ≈ t + @test ad(sqrt(t)) ≈ sqrt(ad(t)) + + @test exp(@constinferred log(expt)) ≈ expt + @test ad(log(expt)) ≈ log(ad(expt)) + + @test (@constinferred cos(t))^2 + (@constinferred sin(t))^2 ≈ + id(storagetype(t), W) + @test (@constinferred tan(t)) ≈ sin(t) / cos(t) + @test (@constinferred cot(t)) ≈ cos(t) / sin(t) + @test (@constinferred cosh(t))^2 - (@constinferred sinh(t))^2 ≈ + id(storagetype(t), W) + @test (@constinferred tanh(t)) ≈ sinh(t) / cosh(t) + @test (@constinferred coth(t)) ≈ cosh(t) / sinh(t) + + t1 = sin(t) + @test sin(@constinferred asin(t1)) ≈ t1 + t2 = cos(t) + @test cos(@constinferred acos(t2)) ≈ t2 + t3 = sinh(t) + @test sinh(@constinferred asinh(t3)) ≈ t3 + t4 = cosh(t) + @test cosh(@constinferred acosh(t4)) ≈ t4 + t5 = tan(t) + @test tan(@constinferred atan(t5)) ≈ t5 + t6 = cot(t) + @test cot(@constinferred acot(t6)) ≈ t6 + t7 = tanh(t) + @test tanh(@constinferred atanh(t7)) ≈ t7 + t8 = coth(t) + @test coth(@constinferred acoth(t8)) ≈ t8 + end + end + end + # Sylvester not defined for AMDGPU + # @timedtestset "Sylvester equation" begin + # for T in (Float32, ComplexF64) + # tA = AMDGPU.rand(T, V1 ⊗ V3, V1 ⊗ V3) + # tB = AMDGPU.rand(T, V2 ⊗ V4, V2 ⊗ V4) + # tA = 3 // 2 * leftorth(tA; alg=Polar())[1] + # tB = 1 // 5 * leftorth(tB; alg=Polar())[1] + # tC = AMDGPU.rand(T, V1 ⊗ V3, V2 ⊗ V4) + # t = @constinferred sylvester(tA, tB, tC) + # @test codomain(t) == V1 ⊗ V3 + # @test domain(t) == V2 ⊗ V4 + # @test norm(tA * t + t * tB + tC) < + # (norm(tA) + norm(tB) + norm(tC)) * eps(real(T))^(2 / 3) + # if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + # matrix(x) = reshape(convert(Array, x), dim(codomain(x)), dim(domain(x))) + # @test matrix(t) ≈ sylvester(matrix(tA), matrix(tB), matrix(tC)) + # end + # end + # end + @timedtestset "Tensor product: test via norm preservation" begin + for T in (Float32, ComplexF64) + t1 = AMDGPU.rand(T, V2 ⊗ V3 ⊗ V1, V1 ⊗ V2) + t2 = AMDGPU.rand(T, V2 ⊗ V1 ⊗ V3, V1 ⊗ V1) + t = @constinferred (t1 ⊗ t2) + @test norm(t) ≈ norm(t1) * norm(t2) + end + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Tensor product: test via conversion" begin + for T in (Float32, ComplexF64) + t1 = AMDGPU.rand(T, V2 ⊗ V3 ⊗ V1, V1) + t2 = AMDGPU.rand(T, V2 ⊗ V1 ⊗ V3, V2) + t = @constinferred (t1 ⊗ t2) + d1 = dim(codomain(t1)) + d2 = dim(codomain(t2)) + d3 = dim(domain(t1)) + d4 = dim(domain(t2)) + At = ad(t) + @test ad(t) ≈ ad(t1) ⊗ ad(t2) + end + end + end + @timedtestset "Tensor product: test via tensor contraction" begin + for T in (Float32, ComplexF64) + t1 = AMDGPU.rand(T, V2 ⊗ V3 ⊗ V1) + t2 = AMDGPU.rand(T, V2 ⊗ V1 ⊗ V3) + t = @constinferred (t1 ⊗ t2) + @tensor t′[1, 2, 3, 4, 5, 6] := t1[1, 2, 3] * t2[4, 5, 6] + @test t ≈ t′ + end + end + end +end + +@timedtestset "Deligne tensor product: test via conversion" begin + Vlists1 = (Vtr,) # VSU₂) + Vlists2 = (Vtr,) # Vℤ₂) + @testset for Vlist1 in Vlists1, Vlist2 in Vlists2 + V1, V2, V3, V4, V5 = Vlist1 + W1, W2, W3, W4, W5 = Vlist2 + for T in (Float32, ComplexF64) + t1 = AMDGPU.rand(T, V1 ⊗ V2, V3' ⊗ V4) + t2 = AMDGPU.rand(T, W2, W1 ⊗ W1') + t = @constinferred (t1 ⊠ t2) + d1 = dim(codomain(t1)) + d2 = dim(codomain(t2)) + d3 = dim(domain(t1)) + d4 = dim(domain(t2)) + @test ad(t1) ⊠ ad(t2) ≈ ad(t1 ⊠ t2) + end + end +end + diff --git a/test/cuda.jl b/test/cuda.jl new file mode 100644 index 000000000..f40402a1c --- /dev/null +++ b/test/cuda.jl @@ -0,0 +1,591 @@ +using Adapt +ad = adapt(Array) + +const CUDAExt = Base.get_extension(TensorKit, :TensorKitCUDAExt) +@assert !isnothing(CUDAExt) +# const CuTensorMap{T,S,N1,N2,I,A} = CUDAExt.CuTensorMap{T,S,N1,N2,I,A} +const CuTensorMap = getglobal(CUDAExt, :CuTensorMap) + +for V in (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VfSU₂)#, VSU₃) + V1, V2, V3, V4, V5 = V + @assert V3 * V4 * V2 ≿ V1' * V5' # necessary for leftorth tests + @assert V3 * V4 ≾ V1' * V2' * V5' # necessary for rightorth tests +end + +spacelist = try + if ENV["CI"] == "true" + println("Detected running on CI") + if Sys.iswindows() + (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂) + elseif Sys.isapple() + (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VfU₁, VfSU₂)#, VSU₃) + else + (Vtr, Vℤ₂, Vfℤ₂, VU₁, VCU₁, VSU₂, VfSU₂)#, VSU₃) + end + else + (Vtr, VU₁, VSU₂, Vfℤ₂) + end +catch + (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VfSU₂)#, VSU₃) +end + +for V in spacelist + I = sectortype(first(V)) + Istr = TensorKit.type_repr(I) + println("---------------------------------------") + println("CUDA Tensors with symmetry: $Istr") + println("---------------------------------------") + @timedtestset "Tensors with symmetry: $Istr" verbose = true begin + V1, V2, V3, V4, V5 = V + @timedtestset "Basic tensor properties" begin + W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 + for T in (Int, Float32, Float64, ComplexF32, ComplexF64) + t = @constinferred CUDA.zeros(T, W) + # @test @constinferred(hash(t)) == hash(deepcopy(t)) # hash is not defined for CuArray? + @test scalartype(t) == T + @test norm(t) == 0 + @test codomain(t) == W + @test space(t) == (W ← one(W)) + @test domain(t) == one(W) + @test typeof(t) == + @constinferred tensormaptype(spacetype(t), 5, 0, storagetype(t)) + end + end + @timedtestset "Tensor Dict conversion" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + for T in (Int, Float32, ComplexF64) + t = @constinferred CUDA.rand(T, W) + d = convert(Dict, t) + @test t == convert(CuTensorMap, d) + end + end + if hasfusiontensor(I) + @timedtestset "Tensor Array conversion" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + + # cuTENSOR does not support Int + Ts = sectortype(W) === Trivial ? (Int, Float32, ComplexF64) : + (Float32, ComplexF64) + for T in Ts + t = @constinferred CUDA.rand(T, W) + a = @constinferred convert(CuArray, t) + @test t ≈ @constinferred TensorMap(a, W) + # also test if input is matrix + a2 = reshape(a, prod(dim, codomain(t)), prod(dim, domain(t))) + @test t ≈ @constinferred TensorMap(a2, codomain(t), domain(t)) + end + end + end + @timedtestset "Basic linear algebra" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + for T in (Float32, ComplexF64) + t = @constinferred CUDA.rand(T, W) + @test scalartype(t) == T + @test space(t) == W + @test space(t') == W' + @test dim(t) == dim(space(t)) + @test codomain(t) == codomain(W) + @test domain(t) == domain(W) + @test isa(@constinferred(norm(t)), real(T)) + @test norm(t)^2 ≈ dot(t, t) + α = rand(T) + @test norm(α * t) ≈ abs(α) * norm(t) + @test norm(t + t, 2) ≈ 2 * norm(t, 2) + @test norm(t + t, 1) ≈ 2 * norm(t, 1) + @test norm(t + t, Inf) ≈ 2 * norm(t, Inf) + p = 3 * rand(Float64) + @test norm(t + t, p) ≈ 2 * norm(t, p) + @test norm(t) ≈ norm(t') + + t2 = @constinferred CUDA.rand!(similar(t)) + β = rand(T) + @test @constinferred(dot(β * t2, α * t)) ≈ conj(β) * α * conj(dot(t, t2)) + @test dot(t2, t) ≈ conj(dot(t, t2)) + @test dot(t2, t) ≈ conj(dot(t2', t')) + @test dot(t2, t) ≈ dot(t', t2') + + A = storagetype(t) + + i1 = @constinferred(isomorphism(CuMatrix{T}, V1 ⊗ V2, V2 ⊗ V1)) + i2 = @constinferred(isomorphism(CuMatrix{T}, V2 ⊗ V1, V1 ⊗ V2)) + @test i1 * i2 == @constinferred(id(CuMatrix{T}, V1 ⊗ V2)) + @test i2 * i1 == @constinferred(id(CuMatrix{T}, V2 ⊗ V1)) + + w = @constinferred(isometry(CuMatrix{T}, V1 ⊗ (oneunit(V1) ⊕ oneunit(V1)), + V1)) + @test dim(w) == 2 * dim(V1 ← V1) + @test w' * w == id(CuMatrix{T}, V1) + @test w * w' == (w * w')^2 + end + end + if hasfusiontensor(I) + @timedtestset "Basic linear algebra: test via conversion" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + for T in (Float32, ComplexF64) + t = CUDA.rand(T, W) + t2 = @constinferred CUDA.rand!(similar(t)) + @test norm(t, 2) ≈ norm(ad(t), 2) + @test dot(t2, t) ≈ dot(ad(t2), ad(t)) + α = rand(T) + @test ad(α * t) ≈ α * ad(t) + @test ad(t + t) ≈ 2 * ad(t) + end + end + @timedtestset "Real and imaginary parts" begin + W = V1 ⊗ V2 + for T in (Float64, ComplexF64, ComplexF32) + t = @constinferred CUDA.randn(T, W, W) + @test real(ad(t)) == ad(@constinferred real(t)) + @test imag(ad(t)) == ad(@constinferred imag(t)) + end + end + end + @timedtestset "Tensor conversion" begin + W = V1 ⊗ V2 + t = @constinferred CUDA.randn(W ← W) + @test typeof(convert(CuTensorMap, t')) == typeof(t) + tc = complex(t) + @test convert(typeof(tc), t) == tc + @test typeof(convert(typeof(tc), t)) == typeof(tc) + @test typeof(convert(typeof(tc), t')) == typeof(tc) + @test Base.promote_typeof(t, tc) == typeof(tc) + @test Base.promote_typeof(tc, t) == typeof(tc + t) + end + #=@timedtestset "diag/diagm" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + t = CUDA.randn(ComplexF64, W) + d = LinearAlgebra.diag(t) + # TODO find a way to use CUDA here + D = LinearAlgebra.diagm(codomain(t), domain(t), d) + @test LinearAlgebra.isdiag(D) + @test LinearAlgebra.diag(D) == d + end=# + @timedtestset "Permutations: test via inner product invariance" begin + W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 + t = CUDA.rand(ComplexF64, W) + t′ = CUDA.randn!(similar(t)) + for k in 0:5 + for p in permutations(1:5) + p1 = ntuple(n -> p[n], k) + p2 = ntuple(n -> p[k + n], 5 - k) + t2 = @constinferred permute(t, (p1, p2)) + t2 = permute(t, (p1, p2)) + @test norm(t2) ≈ norm(t) + t2′ = permute(t′, (p1, p2)) + @test dot(t2′, t2) ≈ dot(t′, t) ≈ dot(transpose(t2′), transpose(t2)) + end + + t3 = @constinferred repartition(t, $k) + t3 = repartition(t, k) + @test norm(t3) ≈ norm(t) + t3′ = @constinferred repartition!(similar(t3), t′) + @test norm(t3′) ≈ norm(t′) + @test dot(t′, t) ≈ dot(t3′, t3) + end + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Permutations: test via conversion" begin + W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 + t = CUDA.rand(ComplexF64, W) + a = ad(t) + for k in 0:5 + for p in permutations(1:5) + p1 = ntuple(n -> p[n], k) + p2 = ntuple(n -> p[k + n], 5 - k) + t2 = permute(t, (p1, p2)) + a2 = ad(t2) + @test a2 ≈ permute(a, (p1, p2)) + @test ad(transpose(t2)) ≈ transpose(a2) + end + + t3 = repartition(t, k) + a3 = ad(t3) + @test a3 ≈ repartition(a, k) + end + end + end + @timedtestset "Full trace: test self-consistency" begin + t = CUDA.rand(ComplexF64, V1 ⊗ V2' ⊗ V2 ⊗ V1') + t2 = permute(t, ((1, 2), (4, 3))) + s = @constinferred tr(t2) + @test conj(s) ≈ tr(t2') + if !isdual(V1) + t2 = twist!(t2, 1) + end + if isdual(V2) + t2 = twist!(t2, 2) + end + ss = tr(t2) + @tensor s2 = t[a, b, b, a] + @tensor t3[a, b] := t[a, c, c, b] + @tensor s3 = t3[a, a] + @test ss ≈ s2 + @test ss ≈ s3 + end + @timedtestset "Partial trace: test self-consistency" begin + t = CUDA.rand(ComplexF64, V1 ⊗ V2' ⊗ V3 ⊗ V2 ⊗ V1' ⊗ V3') + @tensor t2[a, b] := t[c, d, b, d, c, a] + @tensor t4[a, b, c, d] := t[d, e, b, e, c, a] + @tensor t5[a, b] := t4[a, b, c, c] + @test t2 ≈ t5 + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Trace: test via conversion" begin + t = CUDA.rand(ComplexF64, V1 ⊗ V2' ⊗ V3 ⊗ V2 ⊗ V1' ⊗ V3') + @tensor t2[a, b] := t[c, d, b, d, c, a] + @tensor t3[a, b] := ad(t)[c, d, b, d, c, a] + @test t3 ≈ ad(t2) + end + end + @timedtestset "Trace and contraction" begin + t1 = CUDA.rand(ComplexF64, V1 ⊗ V2 ⊗ V3) + t2 = CUDA.rand(ComplexF64, V2' ⊗ V4 ⊗ V1') + t3 = t1 ⊗ t2 + @tensor ta[a, b] := t1[x, y, a] * t2[y, b, x] + @tensor tb[a, b] := t3[x, y, a, y, b, x] + @test ta ≈ tb + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Tensor contraction: test via conversion" begin + A1 = CUDA.randn(ComplexF64, V1' * V2', V3') + A2 = CUDA.randn(ComplexF64, V3 * V4, V5) + rhoL = CUDA.randn(ComplexF64, V1, V1) + rhoR = CUDA.randn(ComplexF64, V5, V5)' # test adjoint tensor + H = CUDA.randn(ComplexF64, V2 * V4, V2 * V4) + @tensor HrA12[a, s1, s2, c] := rhoL[a, a'] * conj(A1[a', t1, b]) * + A2[b, t2, c'] * rhoR[c', c] * + H[s1, s2, t1, t2] + + @tensor HrA12array[a, s1, s2, c] := ad(rhoL)[a, a'] * + conj(ad(A1)[a', t1, b]) * + ad(A2)[b, t2, c'] * + ad(rhoR)[c', c] * + ad(H)[s1, s2, t1, t2] + + @test HrA12array ≈ ad(HrA12) + end + end + @timedtestset "Multiplication and inverse: test compatibility" begin + W1 = V1 ⊗ V2 ⊗ V3 + W2 = V4 ⊗ V5 + for T in (Float64, ComplexF64) + t1 = CUDA.rand(T, W1, W1) + t2 = CUDA.rand(T, W2, W2) + t = CUDA.rand(T, W1, W2) + @test t1 * (t1 \ t) ≈ t + @test (t / t2) * t2 ≈ t + @test t1 \ one(t1) ≈ inv(t1) + # @test one(t1) / t1 ≈ pinv(t1) # pinv not available in CUDA + @test_throws SpaceMismatch inv(t) + @test_throws SpaceMismatch t2 \ t + @test_throws SpaceMismatch t / t1 + # pinv not available in CUDA + # tp = pinv(t) * t + # @test tp ≈ tp * tp + end + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Multiplication and inverse: test via conversion" begin + W1 = V1 ⊗ V2 ⊗ V3 + W2 = V4 ⊗ V5 + for T in (Float32, Float64, ComplexF32, ComplexF64) + t1 = CUDA.rand(T, W1, W1) + t2 = CUDA.rand(T, W2, W2) + t = CUDA.rand(T, W1, W2) + d1 = dim(W1) + d2 = dim(W2) + At1 = reshape(convert(Array, t1), d1, d1) + At2 = reshape(convert(Array, t2), d2, d2) + At = reshape(convert(Array, t), d1, d2) + @test ad(t1 * t) ≈ ad(t1) * ad(t) + @test ad(t1' * t) ≈ ad(t1)' * ad(t) + @test ad(t2 * t') ≈ ad(t2) * ad(t)' + @test ad(t2' * t') ≈ ad(t2)' * ad(t)' + @test ad(inv(t1)) ≈ inv(ad(t1)) + # pinv not available in CUDA + #@test ad(pinv(t)) ≈ pinv(ad(t)) + + if T == Float32 || T == ComplexF32 + continue + end + + @test ad(t1 \ t) ≈ ad(t1) \ ad(t) + @test ad(t1' \ t) ≈ ad(t1)' \ ad(t) + @test ad(t2 \ t') ≈ ad(t2) \ ad(t)' + @test ad(t2' \ t') ≈ ad(t2)' \ ad(t)' + + @test ad(t2 / t) ≈ ad(t2) / ad(t) + @test ad(t2' / t) ≈ ad(t2)' / ad(t) + @test ad(t1 / t') ≈ ad(t1) / ad(t)' + @test ad(t1' / t') ≈ ad(t1)' / ad(t)' + end + end + end + @timedtestset "Factorization" begin + W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 + for T in (Float32, ComplexF64) + # Test both a normal tensor and an adjoint one. + ts = (CUDA.rand(T, W), CUDA.rand(T, W)') + for t in ts + @testset "leftorth with $alg" for alg in + (TensorKit.QR(), TensorKit.QRpos(), + TensorKit.QL(), TensorKit.QLpos(), + TensorKit.Polar(), TensorKit.SVD(), + ) + Q, R = @constinferred leftorth(t, ((3, 4, 2), (1, 5)); alg=alg) + @test isisometry(Q) + @test Q * R ≈ permute(t, ((3, 4, 2), (1, 5))) + end + @testset "leftnull with $alg" for alg in + (TensorKit.QR(), TensorKit.SVD(), + ) + N = @constinferred leftnull(t, ((3, 4, 2), (1, 5)); alg=alg) + @test isisometry(N) + @test norm(N' * permute(t, ((3, 4, 2), (1, 5)))) < + 100 * eps(norm(t)) + end + @testset "rightorth with $alg" for alg in + (TensorKit.RQ(), TensorKit.RQpos(), + TensorKit.LQ(), TensorKit.LQpos(), + TensorKit.Polar(), TensorKit.SVD(), + ) + # cusolver SVD requires m >= n for some reason + L, Q = @constinferred rightorth(t, ((3, 4), (2, 1, 5)); alg=alg) + @test isisometry(Q; side=:right) + @test L * Q ≈ permute(t, ((3, 4), (2, 1, 5))) + end + @testset "rightnull with $alg" for alg in + (TensorKit.LQ(), TensorKit.SVD(), + ) + M = @constinferred rightnull(t, ((3, 4), (2, 1, 5)); alg=alg) + @test isisometry(M; side=:right) + @test norm(permute(t, ((3, 4), (2, 1, 5))) * M') < + 100 * eps(norm(t)) + end + @testset "tsvd with $alg" for alg in (TensorKit.SVD(),) + U, S, V = @constinferred tsvd(t, ((3, 4, 2), (1, 5)); alg=alg) + @test isisometry(U) + @test isisometry(V; side=:right) + t2 = permute(t, ((3, 4, 2), (1, 5))) + US = U * S + USV = US * V + @test U * S * V ≈ t2 + + s = LinearAlgebra.svdvals(t2) + s′ = LinearAlgebra.diag(S) + for (c, b) in s + @test b ≈ s′[c] + end + end + end + @testset "empty tensor" begin + t = CUDA.randn(T, V1 ⊗ V2, typeof(V1)()) + @testset "leftorth with $alg" for alg in + (TensorKit.QR(), TensorKit.QRpos(), + TensorKit.QL(), TensorKit.QLpos(), + TensorKit.Polar(), TensorKit.SVD(), + ) + Q, R = @constinferred leftorth(t; alg=alg) + @test Q == t + @test dim(Q) == dim(R) == 0 + end + @testset "leftnull with $alg" for alg in + (TensorKit.QR(), TensorKit.SVD(), + ) + N = @constinferred leftnull(t; alg=alg) + @test N' * N ≈ id(storagetype(t), domain(N)) + @test N * N' ≈ id(storagetype(t), codomain(N)) + end + @testset "rightorth with $alg" for alg in + (TensorKit.RQ(), TensorKit.RQpos(), + TensorKit.LQ(), TensorKit.LQpos(), + TensorKit.Polar(), TensorKit.SVD(), + ) + L, Q = @constinferred rightorth(copy(t'); alg=alg) + @test Q == t' + @test dim(Q) == dim(L) == 0 + end + @testset "rightnull with $alg" for alg in + (TensorKit.LQ(), TensorKit.SVD(), + ) + M = @constinferred rightnull(copy(t'); alg=alg) + @test M * M' ≈ id(storagetype(t), codomain(M)) + @test M' * M ≈ id(storagetype(t), domain(M)) + end + @testset "tsvd with $alg" for alg in (TensorKit.SVD(), ) + U, S, V = @constinferred tsvd(t; alg=alg) + @test U == t + @test dim(U) == dim(S) == dim(V) + end + end + + # CUDA only supports symmetric/hermitian eigen + @testset "eig and isposdef" begin + t = CUDA.rand(T, V1 ⊗ V2 ← V1 ⊗ V2) + t += t' + D, V = eigen(t) + @test t * V ≈ V * D + + d = LinearAlgebra.eigvals(t; sortby=nothing) + d′ = LinearAlgebra.diag(D) + for (c, b) in d + @test sort(real.(b)) ≈ sort(real.(d′[c])) + end + + # Somehow moving these test before the previous one gives rise to errors + # with T=Float32 on x86 platforms. Is this an OpenBLAS issue? + # VdV = V' * V + # VdV = (VdV + VdV') / 2 + # @test isposdef(VdV) + # + # @test !isposdef(t2) # unlikely for non-hermitian map + # t2 = (t2 + t2') + # D, V = eigen(t2) + # VdV = V' * V + # @test VdV ≈ one(VdV) + # D̃, Ṽ = @constinferred eigh(t2) + # @test D ≈ D̃ + # @test V ≈ Ṽ + # λ = minimum(minimum(real(LinearAlgebra.diag(b))) + # for (c, b) in blocks(D)) + # @test isposdef(t2) == isposdef(λ) + # @test isposdef(t2 - λ * one(t2) + 0.1 * one(t2)) + # @test !isposdef(t2 - λ * one(t2) - 0.1 * one(t2)) + end + end + end + @timedtestset "Tensor truncation" begin + for T in (Float32, ComplexF64) + for p in (1, 2, 3, Inf) + # Test both a normal tensor and an adjoint one. + ts = (CUDA.randn(T, V1 ⊗ V2 ⊗ V3, V4 ⊗ V5), + CUDA.randn(T, V4 ⊗ V5, V1 ⊗ V2 ⊗ V3)') + for t in ts + U₀, S₀, V₀, = tsvd(t) + t = rmul!(t, 1 / norm(S₀, p)) + # Probably shouldn't allow truncerr and truncdim, as these require scalar indexing? + U, S, V = tsvd(t; trunc=truncbelow(1 / dim(domain(S₀))), p=p) + U′, S′, V′ = tsvd(t; trunc=truncspace(space(S, 1)), p=p) + @test (U, S, V) == (U′, S′, V′) + end + end + end + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Tensor functions" begin + W = V1 ⊗ V2 + for T in (Float64, ComplexF64) + t = CUDA.randn(T, W, W) + s = dim(W) + expt = @constinferred exp(t) + @test ad(expt) ≈ exp(ad(t)) + + @test (@constinferred sqrt(t))^2 ≈ t + @test ad(sqrt(t)) ≈ sqrt(ad(t)) + + @test exp(@constinferred log(expt)) ≈ expt + @test ad(log(expt)) ≈ log(ad(expt)) + + @test (@constinferred cos(t))^2 + (@constinferred sin(t))^2 ≈ + id(storagetype(t), W) + @test (@constinferred tan(t)) ≈ sin(t) / cos(t) + @test (@constinferred cot(t)) ≈ cos(t) / sin(t) + @test (@constinferred cosh(t))^2 - (@constinferred sinh(t))^2 ≈ + id(storagetype(t), W) + @test (@constinferred tanh(t)) ≈ sinh(t) / cosh(t) + @test (@constinferred coth(t)) ≈ cosh(t) / sinh(t) + + t1 = sin(t) + @test sin(@constinferred asin(t1)) ≈ t1 + t2 = cos(t) + @test cos(@constinferred acos(t2)) ≈ t2 + t3 = sinh(t) + @test sinh(@constinferred asinh(t3)) ≈ t3 + t4 = cosh(t) + @test cosh(@constinferred acosh(t4)) ≈ t4 + t5 = tan(t) + @test tan(@constinferred atan(t5)) ≈ t5 + t6 = cot(t) + @test cot(@constinferred acot(t6)) ≈ t6 + t7 = tanh(t) + @test tanh(@constinferred atanh(t7)) ≈ t7 + t8 = coth(t) + @test coth(@constinferred acoth(t8)) ≈ t8 + end + end + end + # Sylvester not defined for CUDA + # @timedtestset "Sylvester equation" begin + # for T in (Float32, ComplexF64) + # tA = CUDA.rand(T, V1 ⊗ V3, V1 ⊗ V3) + # tB = CUDA.rand(T, V2 ⊗ V4, V2 ⊗ V4) + # tA = 3 // 2 * leftorth(tA; alg=Polar())[1] + # tB = 1 // 5 * leftorth(tB; alg=Polar())[1] + # tC = CUDA.rand(T, V1 ⊗ V3, V2 ⊗ V4) + # t = @constinferred sylvester(tA, tB, tC) + # @test codomain(t) == V1 ⊗ V3 + # @test domain(t) == V2 ⊗ V4 + # @test norm(tA * t + t * tB + tC) < + # (norm(tA) + norm(tB) + norm(tC)) * eps(real(T))^(2 / 3) + # if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + # matrix(x) = reshape(convert(Array, x), dim(codomain(x)), dim(domain(x))) + # @test matrix(t) ≈ sylvester(matrix(tA), matrix(tB), matrix(tC)) + # end + # end + # end + # + # TODO + @timedtestset "Tensor product: test via norm preservation" begin + for T in (Float32, ComplexF64) + t1 = CUDA.rand(T, V2 ⊗ V3 ⊗ V1, V1 ⊗ V2) + t2 = CUDA.rand(T, V2 ⊗ V1 ⊗ V3, V1 ⊗ V1) + t = @constinferred (t1 ⊗ t2) + @test norm(t) ≈ norm(t1) * norm(t2) + end + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Tensor product: test via conversion" begin + for T in (Float32, ComplexF64) + t1 = CUDA.rand(T, V2 ⊗ V3 ⊗ V1, V1) + t2 = CUDA.rand(T, V2 ⊗ V1 ⊗ V3, V2) + t = @constinferred (t1 ⊗ t2) + d1 = dim(codomain(t1)) + d2 = dim(codomain(t2)) + d3 = dim(domain(t1)) + d4 = dim(domain(t2)) + At = ad(t) + @test ad(t) ≈ ad(t1) ⊗ ad(t2) + end + end + end + @timedtestset "Tensor product: test via tensor contraction" begin + for T in (Float32, ComplexF64) + t1 = CUDA.rand(T, V2 ⊗ V3 ⊗ V1) + t2 = CUDA.rand(T, V2 ⊗ V1 ⊗ V3) + t = @constinferred (t1 ⊗ t2) + @tensor t′[1, 2, 3, 4, 5, 6] := t1[1, 2, 3] * t2[4, 5, 6] + @test t ≈ t′ + end + end + end +end + +@timedtestset "Deligne tensor product: test via conversion" begin + Vlists1 = (Vtr,) # VSU₂) + Vlists2 = (Vtr,) # Vℤ₂) + @testset for Vlist1 in Vlists1, Vlist2 in Vlists2 + V1, V2, V3, V4, V5 = Vlist1 + W1, W2, W3, W4, W5 = Vlist2 + for T in (Float32, ComplexF64) + t1 = CUDA.rand(T, V1 ⊗ V2, V3' ⊗ V4) + t2 = CUDA.rand(T, W2, W1 ⊗ W1') + t = @constinferred (t1 ⊠ t2) + d1 = dim(codomain(t1)) + d2 = dim(codomain(t2)) + d3 = dim(domain(t1)) + d4 = dim(domain(t2)) + @test ad(t1) ⊠ ad(t2) ≈ ad(t1 ⊠ t2) + end + end +end + diff --git a/test/runtests.jl b/test/runtests.jl index 9e44b8a3a..70124f857 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -112,24 +112,39 @@ VSU₂U₁ = (Vect[SU2Irrep ⊠ U1Irrep]((0, 0) => 1, (1 // 2, -1) => 1), # ℂ[SU3Irrep]((1, 0, 0) => 1, (2, 0, 0) => 1), # ℂ[SU3Irrep]((0, 0, 0) => 1, (1, 0, 0) => 1, (1, 1, 0) => 1)') +is_buildkite = get(ENV, "BUILDKITE", "false") == "true" + Ti = time() -include("fusiontrees.jl") -include("spaces.jl") -include("tensors.jl") -include("diagonal.jl") -include("planar.jl") -# TODO: remove once we know AD is slow on macOS CI -if !(Sys.isapple() && get(ENV, "CI", "false") == "true") && isempty(VERSION.prerelease) - include("ad.jl") +if !is_buildkite + include("fusiontrees.jl") + include("spaces.jl") + include("tensors.jl") + include("diagonal.jl") + include("planar.jl") + # TODO: remove once we know AD is slow on macOS CI + if !(Sys.isapple() && get(ENV, "CI", "false") == "true") && isempty(VERSION.prerelease) + include("ad.jl") + end + include("bugfixes.jl") + Tf = time() + @testset "Aqua" verbose = true begin + using Aqua + Aqua.test_all(TensorKit) + end +else + using CUDA, cuTENSOR + if CUDA.functional() + include("cuda.jl") + end + + using AMDGPU + if AMDGPU.functional() + include("amdgpu.jl") + end + Tf = time() end -include("bugfixes.jl") -Tf = time() + printstyled("Finished all tests in ", string(round((Tf - Ti) / 60; sigdigits=3)), " minutes."; bold=true, color=Base.info_color()) println() - -@testset "Aqua" verbose = true begin - using Aqua - Aqua.test_all(TensorKit) -end