From 60551f88535b354c5b600dbb730ef7e73ba89a02 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Tue, 7 Oct 2025 10:45:42 -0400 Subject: [PATCH] Start on GPU extensions --- Project.toml | 25 +- ext/TensorKitAMDGPUExt/TensorKitAMDGPUExt.jl | 109 ++++ ext/TensorKitAMDGPUExt/roctensormap.jl | 278 +++++++++ ext/TensorKitCUDAExt/TensorKitCUDAExt.jl | 107 ++++ ext/TensorKitCUDAExt/cutensormap.jl | 340 ++++++++++ src/TensorKit.jl | 2 +- src/auxiliary/random.jl | 13 +- src/factorizations/adjoint.jl | 8 +- src/factorizations/factorizations.jl | 14 +- src/factorizations/matrixalgebrakit.jl | 14 +- src/factorizations/truncation.jl | 18 +- src/tensors/diagonal.jl | 2 +- src/tensors/linalg.jl | 12 +- src/tensors/tensor.jl | 4 +- test/amd/factorizations.jl | 406 ++++++++++++ test/amd/tensors.jl | 609 ++++++++++++++++++ test/cuda/factorizations.jl | 385 ++++++++++++ test/cuda/tensors.jl | 625 +++++++++++++++++++ test/factorizations.jl | 62 +- test/runtests.jl | 17 +- test/tensors.jl | 2 +- 21 files changed, 2970 insertions(+), 82 deletions(-) create mode 100644 ext/TensorKitAMDGPUExt/TensorKitAMDGPUExt.jl create mode 100644 ext/TensorKitAMDGPUExt/roctensormap.jl create mode 100644 ext/TensorKitCUDAExt/TensorKitCUDAExt.jl create mode 100644 ext/TensorKitCUDAExt/cutensormap.jl create mode 100644 test/amd/factorizations.jl create mode 100644 test/amd/tensors.jl create mode 100644 test/cuda/factorizations.jl create mode 100644 test/cuda/tensors.jl diff --git a/Project.toml b/Project.toml index aacd69f6..66c31fd7 100644 --- a/Project.toml +++ b/Project.toml @@ -18,19 +18,34 @@ TupleTools = "9d95972d-f1c8-5527-a6e0-b4b365fa01f6" VectorInterface = "409d34a3-91d5-4945-b6ec-7529ddf182d8" [weakdeps] +AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" +cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1" + +[sources] +GPUArrays = {rev = "master", url = "https://github.com/JuliaGPU/GPUArrays.jl"} +MatrixAlgebraKit = {rev = "ksh/tk", url = "https://github.com/QuantumKitHub/MatrixAlgebraKit.jl"} +AMDGPU = {rev = "master", url = "https://github.com/JuliaGPU/AMDGPU.jl"} +cuTENSOR = {subdir = "lib/cutensor", url = "https://github.com/JuliaGPU/CUDA.jl", rev="master"} [extensions] +TensorKitAMDGPUExt = "AMDGPU" +TensorKitCUDAExt = ["CUDA", "cuTENSOR"] TensorKitChainRulesCoreExt = "ChainRulesCore" TensorKitFiniteDifferencesExt = "FiniteDifferences" [compat] +AMDGPU = "2" +Adapt = "4" Aqua = "0.6, 0.7, 0.8" +CUDA = "5.8.4" ChainRulesCore = "1" ChainRulesTestUtils = "1" Combinatorics = "1" FiniteDifferences = "0.12" +GPUArrays = "11.2.6" LRUCache = "1.0.2" LinearAlgebra = "1" MatrixAlgebraKit = "0.5.0" @@ -39,26 +54,32 @@ PackageExtensionCompat = "1" Random = "1" ScopedValues = "1.3.0" Strided = "2" -TensorKitSectors = "0.3" +TensorKitSectors = "0.2" TensorOperations = "5.1" Test = "1" TestExtras = "0.2,0.3" TupleTools = "1.1" VectorInterface = "0.4.8, 0.5" Zygote = "0.7" +cuTENSOR = "2" julia = "1.10" [extras] +Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" +AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a" Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" +GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" TensorOperations = "6aa20fa7-93e2-5fca-9bc0-fbd0db3c71a2" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" TestExtras = "5ed8adda-3752-4e41-b88a-e8b09835ee3a" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" +cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1" [targets] -test = ["Aqua", "Combinatorics", "LinearAlgebra", "TensorOperations", "Test", "TestExtras", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote"] +test = ["Adapt", "AMDGPU", "Aqua", "Combinatorics", "CUDA", "cuTENSOR", "GPUArrays", "LinearAlgebra", "TensorOperations", "Test", "TestExtras", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote"] diff --git a/ext/TensorKitAMDGPUExt/TensorKitAMDGPUExt.jl b/ext/TensorKitAMDGPUExt/TensorKitAMDGPUExt.jl new file mode 100644 index 00000000..f3a67ca4 --- /dev/null +++ b/ext/TensorKitAMDGPUExt/TensorKitAMDGPUExt.jl @@ -0,0 +1,109 @@ +module TensorKitAMDGPUExt + +using AMDGPU, AMDGPU.rocBLAS, LinearAlgebra +using AMDGPU: @allowscalar +import AMDGPU: rand as rocrand, rand! as rocrand!, randn as rocrandn, randn! as rocrandn! + +using TensorKit +import TensorKit.VectorInterface: scalartype as vi_scalartype +using TensorKit.Factorizations +using TensorKit.Strided +using TensorKit.Factorizations: AbstractAlgorithm +using TensorKit: SectorDict, tensormaptype, scalar, similarstoragetype, AdjointTensorMap + +using TensorKit.MatrixAlgebraKit + +using Random + +include("roctensormap.jl") + +const ROCDiagonalTensorMap{T, S} = DiagonalTensorMap{T, S, ROCVector{T, AMDGPU.Mem.HIPBuffer}} + +""" + ROCDiagonalTensorMap{T}(undef, domain::S) where {T,S<:IndexSpace} + # expert mode: select storage type `A` + DiagonalTensorMap{T,S,A}(undef, domain::S) where {T,S<:IndexSpace,A<:DenseVector{T}} + +Construct a `DiagonalTensorMap` with uninitialized data. +""" +function ROCDiagonalTensorMap{T}(::UndefInitializer, V::TensorMapSpace) where {T} + (numin(V) == numout(V) == 1 && domain(V) == codomain(V)) || + throw(ArgumentError("DiagonalTensorMap requires a space with equal domain and codomain and 2 indices")) + return ROCDiagonalTensorMap{T}(undef, domain(V)) +end +function ROCDiagonalTensorMap{T}(::UndefInitializer, V::ProductSpace) where {T} + length(V) == 1 || + throw(ArgumentError("DiagonalTensorMap requires `numin(d) == numout(d) == 1`")) + return ROCDiagonalTensorMap{T}(undef, only(V)) +end +function ROCDiagonalTensorMap{T}(::UndefInitializer, V::S) where {T, S <: IndexSpace} + return ROCDiagonalTensorMap{T, S}(undef, V) +end +ROCDiagonalTensorMap(::UndefInitializer, V::IndexSpace) = ROCDiagonalTensorMap{Float64}(undef, V) + +function ROCDiagonalTensorMap(data::ROCVector{T}, V::S) where {T, S} + return ROCDiagonalTensorMap{T, S}(data, V) +end + +function ROCDiagonalTensorMap(data::Vector{T}, V::S) where {T, S} + return ROCDiagonalTensorMap{T, S}(ROCVector{T}(data), V) +end + +function TensorKit.Factorizations.MAK.initialize_output(::typeof(svd_full!), t::ROCDiagonalTensorMap, alg::DiagonalAlgorithm) + V_cod = fuse(codomain(t)) + V_dom = fuse(domain(t)) + U = similar(t, codomain(t) ← V_cod) + S = ROCDiagonalTensorMap{real(scalartype(t))}(undef, V_cod ← V_dom) + Vᴴ = similar(t, V_dom ← domain(t)) + return U, S, Vᴴ +end + +function TensorKit.Factorizations.MAK.initialize_output(::typeof(svd_vals!), t::ROCTensorMap, alg::AbstractAlgorithm) + V_cod = infimum(fuse(codomain(t)), fuse(domain(t))) + return ROCDiagonalTensorMap{real(scalartype(t))}(undef, V_cod) +end + +function TensorKit.Factorizations.MAK.initialize_output(::typeof(svd_compact!), t::ROCTensorMap, ::AbstractAlgorithm) + V_cod = V_dom = infimum(fuse(codomain(t)), fuse(domain(t))) + U = similar(t, codomain(t) ← V_cod) + S = ROCDiagonalTensorMap{real(scalartype(t))}(undef, V_cod) + Vᴴ = similar(t, V_dom ← domain(t)) + return U, S, Vᴴ +end + +function TensorKit.Factorizations.MAK.initialize_output(::typeof(eigh_full!), t::ROCTensorMap, ::AbstractAlgorithm) + V_D = fuse(domain(t)) + T = real(scalartype(t)) + D = ROCDiagonalTensorMap{T}(undef, V_D) + V = similar(t, codomain(t) ← V_D) + return D, V +end + +function TensorKit.Factorizations.MAK.initialize_output(::typeof(eig_full!), t::ROCTensorMap, ::AbstractAlgorithm) + V_D = fuse(domain(t)) + Tc = complex(scalartype(t)) + D = ROCDiagonalTensorMap{Tc}(undef, V_D) + V = similar(t, Tc, codomain(t) ← V_D) + return D, V +end + +function TensorKit.Factorizations.MAK.initialize_output(::typeof(eigh_vals!), t::ROCTensorMap, alg::AbstractAlgorithm) + V_D = fuse(domain(t)) + T = real(scalartype(t)) + return D = ROCDiagonalTensorMap{Tc}(undef, V_D) +end + +function TensorKit.Factorizations.MAK.initialize_output(::typeof(eig_vals!), t::ROCTensorMap, alg::AbstractAlgorithm) + V_D = fuse(domain(t)) + Tc = complex(scalartype(t)) + return D = ROCDiagonalTensorMap{Tc}(undef, V_D) +end + + +# TODO +# add VectorInterface extensions for proper AMDGPU promotion +function TensorKit.VectorInterface.promote_add(TA::Type{<:AMDGPU.StridedROCMatrix{Tx}}, TB::Type{<:AMDGPU.StridedROCMatrix{Ty}}, α::Tα = TensorKit.VectorInterface.One(), β::Tβ = TensorKit.VectorInterface.One()) where {Tx, Ty, Tα, Tβ} + return Base.promote_op(add, Tx, Ty, Tα, Tβ) +end + +end diff --git a/ext/TensorKitAMDGPUExt/roctensormap.jl b/ext/TensorKitAMDGPUExt/roctensormap.jl new file mode 100644 index 00000000..88b69ea0 --- /dev/null +++ b/ext/TensorKitAMDGPUExt/roctensormap.jl @@ -0,0 +1,278 @@ +const ROCTensorMap{T, S, N₁, N₂} = TensorMap{T, S, N₁, N₂, ROCVector{T, AMDGPU.Mem.HIPBuffer}} +const ROCTensor{T, S, N} = ROCTensorMap{T, S, N, 0} + +const AdjointROCTensorMap{T, S, N₁, N₂} = AdjointTensorMap{T, S, N₁, N₂, ROCTensorMap{T, S, N₁, N₂}} + +function TensorKit.tensormaptype(S::Type{<:IndexSpace}, N₁, N₂, TorA::Type{<:StridedROCArray}) + if TorA <: ROCArray + return TensorMap{eltype(TorA), S, N₁, N₂, ROCVector{eltype(TorA), AMDGPU.Mem.HIPBuffer}} + else + throw(ArgumentError("argument $TorA should specify a scalar type (`<:Number`) or a storage type `<:ROCVector{<:Number}`")) + end +end + +function ROCTensorMap{T}(::UndefInitializer, V::TensorMapSpace{S, N₁, N₂}) where {T, S, N₁, N₂} + return ROCTensorMap{T, S, N₁, N₂}(undef, V) +end + +function ROCTensorMap{T}( + ::UndefInitializer, codomain::TensorSpace{S}, + domain::TensorSpace{S} + ) where {T, S} + return ROCTensorMap{T}(undef, codomain ← domain) +end +function ROCTensor{T}(::UndefInitializer, V::TensorSpace{S}) where {T, S} + return ROCTensorMap{T}(undef, V ← one(V)) +end +# constructor starting from block data +""" + ROCTensorMap(data::AbstractDict{<:Sector,<:ROCMatrix}, codomain::ProductSpace{S,N₁}, + domain::ProductSpace{S,N₂}) where {S<:ElementarySpace,N₁,N₂} + ROCTensorMap(data, codomain ← domain) + ROCTensorMap(data, domain → codomain) + +Construct a `ROCTensorMap` by explicitly specifying its block data. + +## Arguments +- `data::AbstractDict{<:Sector,<:ROCMatrix}`: dictionary containing the block data for + each coupled sector `c` as a matrix of size `(blockdim(codomain, c), blockdim(domain, c))`. +- `codomain::ProductSpace{S,N₁}`: the codomain as a `ProductSpace` of `N₁` spaces of type + `S<:ElementarySpace`. +- `domain::ProductSpace{S,N₂}`: the domain as a `ProductSpace` of `N₂` spaces of type + `S<:ElementarySpace`. + +Alternatively, the domain and codomain can be specified by passing a [`HomSpace`](@ref) +using the syntax `codomain ← domain` or `domain → codomain`. +""" +function ROCTensorMap( + data::AbstractDict{<:Sector, <:ROCArray}, + V::TensorMapSpace{S, N₁, N₂} + ) where {S, N₁, N₂} + T = eltype(valtype(data)) + t = ROCTensorMap{T}(undef, V) + for (c, b) in blocks(t) + haskey(data, c) || throw(SectorMismatch("no data for block sector $c")) + datac = data[c] + size(datac) == size(b) || + throw(DimensionMismatch("wrong size of block for sector $c")) + copy!(b, datac) + end + for (c, b) in data + c ∈ blocksectors(t) || isempty(b) || + throw(SectorMismatch("data for block sector $c not expected")) + end + return t +end +function ROCTensorMap{T}( + data::DenseVector{T}, codomain::TensorSpace{S}, + domain::TensorSpace{S} + ) where {T, S} + return ROCTensorMap(data, codomain ← domain) +end +function ROCTensorMap( + data::AbstractDict{<:Sector, <:ROCMatrix}, codom::TensorSpace{S}, + dom::TensorSpace{S} + ) where {S} + return ROCTensorMap(data, codom ← dom) +end + +function ROCTensorMap(ts::TensorMap{T, S, N₁, N₂, A}) where {T, S, N₁, N₂, A} + return ROCTensorMap{T, S, N₁, N₂}(ROCArray(ts.data), ts.space) +end + +for (fname, felt) in ((:zeros, :zero), (:ones, :one)) + @eval begin + function AMDGPU.$fname( + codomain::TensorSpace{S}, + domain::TensorSpace{S} = one(codomain) + ) where {S <: IndexSpace} + return AMDGPU.$fname(codomain ← domain) + end + function AMDGPU.$fname( + ::Type{T}, codomain::TensorSpace{S}, + domain::TensorSpace{S} = one(codomain) + ) where {T, S <: IndexSpace} + return AMDGPU.$fname(T, codomain ← domain) + end + AMDGPU.$fname(V::TensorMapSpace) = AMDGPU.$fname(Float64, V) + function AMDGPU.$fname(::Type{T}, V::TensorMapSpace) where {T} + t = ROCTensorMap{T}(undef, V) + fill!(t, $felt(T)) + return t + end + end +end + +for randfun in (:rocrand, :rocrandn) + randfun! = Symbol(randfun, :!) + @eval begin + # converting `codomain` and `domain` into `HomSpace` + function $randfun( + codomain::TensorSpace{S}, + domain::TensorSpace{S} + ) where {S <: IndexSpace} + return $randfun(codomain ← domain) + end + function $randfun( + ::Type{T}, codomain::TensorSpace{S}, + domain::TensorSpace{S} + ) where {T, S <: IndexSpace} + return $randfun(T, codomain ← domain) + end + function $randfun( + rng::Random.AbstractRNG, ::Type{T}, + codomain::TensorSpace{S}, + domain::TensorSpace{S} + ) where {T, S <: IndexSpace} + return $randfun(rng, T, codomain ← domain) + end + + # accepting single `TensorSpace` + $randfun(codomain::TensorSpace) = $randfun(codomain ← one(codomain)) + function $randfun(::Type{T}, codomain::TensorSpace) where {T} + return $randfun(T, codomain ← one(codomain)) + end + function $randfun( + rng::Random.AbstractRNG, ::Type{T}, + codomain::TensorSpace + ) where {T} + return $randfun(rng, T, codomain ← one(domain)) + end + + # filling in default eltype + $randfun(V::TensorMapSpace) = $randfun(Float64, V) + function $randfun(rng::Random.AbstractRNG, V::TensorMapSpace) + return $randfun(rng, Float64, V) + end + + # filling in default rng + function $randfun(::Type{T}, V::TensorMapSpace) where {T} + return $randfun(Random.default_rng(), T, V) + end + + # implementation + function $randfun( + rng::Random.AbstractRNG, ::Type{T}, + V::TensorMapSpace + ) where {T} + t = ROCTensorMap{T}(undef, V) + $randfun!(rng, t) + return t + end + end +end + +# converters +# ---------- +function Base.convert(::Type{ROCTensorMap}, d::Dict{Symbol, Any}) + try + codomain = eval(Meta.parse(d[:codomain])) + domain = eval(Meta.parse(d[:domain])) + data = SectorDict(eval(Meta.parse(c)) => ROCArray(b) for (c, b) in d[:data]) + return ROCTensorMap(data, codomain, domain) + catch e # sector unknown in TensorKit.jl; user-defined, hopefully accessible in Main + codomain = Base.eval(Main, Meta.parse(d[:codomain])) + domain = Base.eval(Main, Meta.parse(d[:domain])) + data = SectorDict( + Base.eval(Main, Meta.parse(c)) => ROCArray(b) + for (c, b) in d[:data] + ) + return ROCTensorMap(data, codomain, domain) + end +end + +function Base.convert(::Type{ROCTensorMap}, t::AbstractTensorMap) + return copy!(ROCTensorMap{scalartype(t)}(undef, space(t)), t) +end + +# Scalar implementation +#----------------------- +function TensorKit.scalar(t::ROCTensorMap) + + # TODO: should scalar only work if N₁ == N₂ == 0? + return @allowscalar dim(codomain(t)) == dim(domain(t)) == 1 ? + first(blocks(t))[2][1, 1] : throw(DimensionMismatch()) +end + +TensorKit.scalartype(A::StridedROCArray{T}) where {T} = T +vi_scalartype(::Type{<:ROCTensorMap{T}}) where {T} = T +vi_scalartype(::Type{<:ROCArray{T}}) where {T} = T + +function TensorKit.similarstoragetype(TT::Type{<:ROCTensorMap{TTT, S, N₁, N₂}}, ::Type{T}) where {TTT, T, S, N₁, N₂} + return ROCVector{T, AMDGPU.Mem.HIPBuffer} +end + +function Base.convert( + TT::Type{ROCTensorMap{T, S, N₁, N₂}}, + t::AbstractTensorMap{<:Any, S, N₁, N₂} + ) where {T, S, N₁, N₂} + if typeof(t) === TT + return t + else + tnew = TT(undef, space(t)) + return copy!(tnew, t) + end +end + +function Base.copy!(tdst::ROCTensorMap{T, S, N₁, N₂}, tsrc::ROCTensorMap{T, S, N₁, N₂}) where {T, S, N₁, N₂} + space(tdst) == space(tsrc) || throw(SpaceMismatch("$(space(tdst)) ≠ $(space(tsrc))")) + for ((c, bdst), (_, bsrc)) in zip(blocks(tdst), blocks(tsrc)) + copy!(bdst, bsrc) + end + return tdst +end + +function Base.copy!(tdst::ROCTensorMap, tsrc::TensorKit.AdjointTensorMap) + space(tdst) == space(tsrc) || throw(SpaceMismatch("$(space(tdst)) ≠ $(space(tsrc))")) + for ((c, bdst), (_, bsrc)) in zip(blocks(tdst), blocks(tsrc)) + copy!(bdst, bsrc) + end + return tdst +end + +function Base.promote_rule( + ::Type{<:TT₁}, + ::Type{<:TT₂} + ) where { + S, N₁, N₂, TTT₁, TTT₂, + TT₁ <: ROCTensorMap{TTT₁, S, N₁, N₂}, + TT₂ <: ROCTensorMap{TTT₂, S, N₁, N₂}, + } + T = TensorKit.VectorInterface.promote_add(TTT₁, TTT₂) + return ROCTensorMap{T, S, N₁, N₂} +end + +function LinearAlgebra.isposdef(t::ROCTensorMap) + domain(t) == codomain(t) || + throw(SpaceMismatch("`isposdef` requires domain and codomain to be the same")) + InnerProductStyle(spacetype(t)) === EuclideanInnerProduct() || return false + for (c, b) in blocks(t) + # do our own hermitian check + isherm = TensorKit.MatrixAlgebraKit.ishermitian(b; atol = eps(real(eltype(b))), rtol = eps(real(eltype(b)))) + isherm || return false + isposdef(Hermitian(b)) || return false + end + return true +end + +# Conversion to ROCArray: +#---------------------- +# probably not optimized for speed, only for checking purposes +function Base.convert(::Type{ROCArray}, t::AbstractTensorMap) + I = sectortype(t) + if I === Trivial + convert(ROCArray, t[]) + else + cod = codomain(t) + dom = domain(t) + T = sectorscalartype(I) <: Complex ? complex(scalartype(t)) : + sectorscalartype(I) <: Integer ? scalartype(t) : float(scalartype(t)) + A = AMDGPU.zeros(T, dims(cod)..., dims(dom)...) + for (f₁, f₂) in fusiontrees(t) + F = convert(ROCArray, (f₁, f₂)) + Aslice = StridedView(A)[axes(cod, f₁.uncoupled)..., axes(dom, f₂.uncoupled)...] + add!(Aslice, StridedView(TensorKit._kron(convert(ROCArray, t[f₁, f₂]), F))) + end + return A + end +end diff --git a/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl b/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl new file mode 100644 index 00000000..dae307d3 --- /dev/null +++ b/ext/TensorKitCUDAExt/TensorKitCUDAExt.jl @@ -0,0 +1,107 @@ +module TensorKitCUDAExt + +using CUDA, CUDA.CUBLAS, LinearAlgebra +using CUDA: @allowscalar +using cuTENSOR: cuTENSOR +import CUDA: rand as curand, rand! as curand!, randn as curandn, randn! as curandn! + +using TensorKit +import TensorKit.VectorInterface: scalartype as vi_scalartype +using TensorKit.Factorizations +using TensorKit.Strided +using TensorKit.Factorizations: AbstractAlgorithm +using TensorKit: SectorDict, tensormaptype, scalar, similarstoragetype, AdjointTensorMap +import TensorKit: randisometry + +using TensorKit.MatrixAlgebraKit + +using Random + +include("cutensormap.jl") + +const CuDiagonalTensorMap{T, S} = DiagonalTensorMap{T, S, CuVector{T, CUDA.DeviceMemory}} + +""" + CuDiagonalTensorMap{T}(undef, domain::S) where {T,S<:IndexSpace} + # expert mode: select storage type `A` + DiagonalTensorMap{T,S,A}(undef, domain::S) where {T,S<:IndexSpace,A<:DenseVector{T}} + +Construct a `DiagonalTensorMap` with uninitialized data. +""" +function CuDiagonalTensorMap{T}(::UndefInitializer, V::TensorMapSpace) where {T} + (numin(V) == numout(V) == 1 && domain(V) == codomain(V)) || + throw(ArgumentError("DiagonalTensorMap requires a space with equal domain and codomain and 2 indices")) + return CuDiagonalTensorMap{T}(undef, domain(V)) +end +function CuDiagonalTensorMap{T}(::UndefInitializer, V::ProductSpace) where {T} + length(V) == 1 || + throw(ArgumentError("DiagonalTensorMap requires `numin(d) == numout(d) == 1`")) + return CuDiagonalTensorMap{T}(undef, only(V)) +end +function CuDiagonalTensorMap{T}(::UndefInitializer, V::S) where {T, S <: IndexSpace} + return CuDiagonalTensorMap{T, S}(undef, V) +end +CuDiagonalTensorMap(::UndefInitializer, V::IndexSpace) = CuDiagonalTensorMap{Float64}(undef, V) + +function CuDiagonalTensorMap(data::CuVector{T}, V::S) where {T, S} + return CuDiagonalTensorMap{T, S}(data, V) +end + +function TensorKit.Factorizations.MAK.initialize_output(::typeof(svd_full!), t::CuDiagonalTensorMap, alg::DiagonalAlgorithm) + V_cod = fuse(codomain(t)) + V_dom = fuse(domain(t)) + U = similar(t, codomain(t) ← V_cod) + S = CuDiagonalTensorMap{real(scalartype(t))}(undef, V_cod ← V_dom) + Vᴴ = similar(t, V_dom ← domain(t)) + return U, S, Vᴴ +end + +function TensorKit.Factorizations.MAK.initialize_output(::typeof(svd_vals!), t::CuTensorMap, alg::AbstractAlgorithm) + V_cod = infimum(fuse(codomain(t)), fuse(domain(t))) + return CuDiagonalTensorMap{real(scalartype(t))}(undef, V_cod) +end + +function TensorKit.Factorizations.MAK.initialize_output(::typeof(svd_compact!), t::CuTensorMap, ::AbstractAlgorithm) + V_cod = V_dom = infimum(fuse(codomain(t)), fuse(domain(t))) + U = similar(t, codomain(t) ← V_cod) + S = CuDiagonalTensorMap{real(scalartype(t))}(undef, V_cod) + Vᴴ = similar(t, V_dom ← domain(t)) + return U, S, Vᴴ +end + +function TensorKit.Factorizations.MAK.initialize_output(::typeof(eigh_full!), t::CuTensorMap, ::AbstractAlgorithm) + V_D = fuse(domain(t)) + T = real(scalartype(t)) + D = CuDiagonalTensorMap{T}(undef, V_D) + V = similar(t, codomain(t) ← V_D) + return D, V +end + +function TensorKit.Factorizations.MAK.initialize_output(::typeof(eig_full!), t::CuTensorMap, ::AbstractAlgorithm) + V_D = fuse(domain(t)) + Tc = complex(scalartype(t)) + D = CuDiagonalTensorMap{Tc}(undef, V_D) + V = similar(t, Tc, codomain(t) ← V_D) + return D, V +end + +function TensorKit.Factorizations.MAK.initialize_output(::typeof(eigh_vals!), t::CuTensorMap, alg::AbstractAlgorithm) + V_D = fuse(domain(t)) + T = real(scalartype(t)) + return D = CuDiagonalTensorMap{Tc}(undef, V_D) +end + +function TensorKit.Factorizations.MAK.initialize_output(::typeof(eig_vals!), t::CuTensorMap, alg::AbstractAlgorithm) + V_D = fuse(domain(t)) + Tc = complex(scalartype(t)) + return D = CuDiagonalTensorMap{Tc}(undef, V_D) +end + + +# TODO +# add VectorInterface extensions for proper CUDA promotion +function TensorKit.VectorInterface.promote_add(TA::Type{<:CUDA.StridedCuMatrix{Tx}}, TB::Type{<:CUDA.StridedCuMatrix{Ty}}, α::Tα = TensorKit.VectorInterface.One(), β::Tβ = TensorKit.VectorInterface.One()) where {Tx, Ty, Tα, Tβ} + return Base.promote_op(add, Tx, Ty, Tα, Tβ) +end + +end diff --git a/ext/TensorKitCUDAExt/cutensormap.jl b/ext/TensorKitCUDAExt/cutensormap.jl new file mode 100644 index 00000000..1f751c56 --- /dev/null +++ b/ext/TensorKitCUDAExt/cutensormap.jl @@ -0,0 +1,340 @@ +const CuTensorMap{T, S, N₁, N₂} = TensorMap{T, S, N₁, N₂, CuVector{T, CUDA.DeviceMemory}} +const CuTensor{T, S, N} = CuTensorMap{T, S, N, 0} + +const AdjointCuTensorMap{T, S, N₁, N₂} = AdjointTensorMap{T, S, N₁, N₂, CuTensorMap{T, S, N₁, N₂}} + +function TensorKit.tensormaptype(S::Type{<:IndexSpace}, N₁, N₂, TorA::Type{<:StridedCuArray}) + if TorA <: CuArray + return TensorMap{eltype(TorA), S, N₁, N₂, CuVector{eltype(TorA), CUDA.DeviceMemory}} + else + throw(ArgumentError("argument $TorA should specify a scalar type (`<:Number`) or a storage type `<:CuVector{<:Number}`")) + end +end + +function CuTensorMap{T}(::UndefInitializer, V::TensorMapSpace{S, N₁, N₂}) where {T, S, N₁, N₂} + return CuTensorMap{T, S, N₁, N₂}(undef, V) +end + +function CuTensorMap{T}( + ::UndefInitializer, codomain::TensorSpace{S}, + domain::TensorSpace{S} + ) where {T, S} + return CuTensorMap{T}(undef, codomain ← domain) +end +function CuTensor{T}(::UndefInitializer, V::TensorSpace{S}) where {T, S} + return CuTensorMap{T}(undef, V ← one(V)) +end +# constructor starting from block data +""" + CuTensorMap(data::AbstractDict{<:Sector,<:CuMatrix}, codomain::ProductSpace{S,N₁}, + domain::ProductSpace{S,N₂}) where {S<:ElementarySpace,N₁,N₂} + CuTensorMap(data, codomain ← domain) + CuTensorMap(data, domain → codomain) + +Construct a `CuTensorMap` by explicitly specifying its block data. + +## Arguments +- `data::AbstractDict{<:Sector,<:CuMatrix}`: dictionary containing the block data for + each coupled sector `c` as a matrix of size `(blockdim(codomain, c), blockdim(domain, c))`. +- `codomain::ProductSpace{S,N₁}`: the codomain as a `ProductSpace` of `N₁` spaces of type + `S<:ElementarySpace`. +- `domain::ProductSpace{S,N₂}`: the domain as a `ProductSpace` of `N₂` spaces of type + `S<:ElementarySpace`. + +Alternatively, the domain and codomain can be specified by passing a [`HomSpace`](@ref) +using the syntax `codomain ← domain` or `domain → codomain`. +""" +function CuTensorMap( + data::AbstractDict{<:Sector, <:CuArray}, + V::TensorMapSpace{S, N₁, N₂} + ) where {S, N₁, N₂} + T = eltype(valtype(data)) + t = CuTensorMap{T}(undef, V) + for (c, b) in blocks(t) + haskey(data, c) || throw(SectorMismatch("no data for block sector $c")) + datac = data[c] + size(datac) == size(b) || + throw(DimensionMismatch("wrong size of block for sector $c")) + copy!(b, datac) + end + for (c, b) in data + c ∈ blocksectors(t) || isempty(b) || + throw(SectorMismatch("data for block sector $c not expected")) + end + return t +end +function CuTensorMap{T}( + data::DenseVector{T}, codomain::TensorSpace{S}, + domain::TensorSpace{S} + ) where {T, S} + return CuTensorMap(data, codomain ← domain) +end +function CuTensorMap( + data::AbstractDict{<:Sector, <:CuMatrix}, codom::TensorSpace{S}, + dom::TensorSpace{S} + ) where {S} + return CuTensorMap(data, codom ← dom) +end +function CuTensorMap(data::DenseVector{T}, V::TensorMapSpace{S, N₁, N₂}) where {T, S, N₁, N₂} + return CuTensorMap{T, S, N₁, N₂}(data, V) +end +function CuTensorMap(data::CuArray{T}, V::TensorMapSpace{S, N₁, N₂}) where {T, S, N₁, N₂} + return CuTensorMap{T, S, N₁, N₂}(vec(data), V) +end + +for (fname, felt) in ((:zeros, :zero), (:ones, :one)) + @eval begin + function CUDA.$fname( + codomain::TensorSpace{S}, + domain::TensorSpace{S} = one(codomain) + ) where {S <: IndexSpace} + return CUDA.$fname(codomain ← domain) + end + function CUDA.$fname( + ::Type{T}, codomain::TensorSpace{S}, + domain::TensorSpace{S} = one(codomain) + ) where {T, S <: IndexSpace} + return CUDA.$fname(T, codomain ← domain) + end + CUDA.$fname(V::TensorMapSpace) = CUDA.$fname(Float64, V) + function CUDA.$fname(::Type{T}, V::TensorMapSpace) where {T} + t = CuTensorMap{T}(undef, V) + fill!(t, $felt(T)) + return t + end + end +end + +for randfun in (:curand, :curandn) + randfun! = Symbol(randfun, :!) + @eval begin + # converting `codomain` and `domain` into `HomSpace` + function $randfun( + codomain::TensorSpace{S}, + domain::TensorSpace{S} + ) where {S <: IndexSpace} + return $randfun(codomain ← domain) + end + function $randfun( + ::Type{T}, codomain::TensorSpace{S}, + domain::TensorSpace{S} + ) where {T, S <: IndexSpace} + return $randfun(T, codomain ← domain) + end + function $randfun( + rng::Random.AbstractRNG, ::Type{T}, + codomain::TensorSpace{S}, + domain::TensorSpace{S} + ) where {T, S <: IndexSpace} + return $randfun(rng, T, codomain ← domain) + end + + # accepting single `TensorSpace` + $randfun(codomain::TensorSpace) = $randfun(codomain ← one(codomain)) + function $randfun(::Type{T}, codomain::TensorSpace) where {T} + return $randfun(T, codomain ← one(codomain)) + end + function $randfun( + rng::Random.AbstractRNG, ::Type{T}, + codomain::TensorSpace + ) where {T} + return $randfun(rng, T, codomain ← one(domain)) + end + + # filling in default eltype + $randfun(V::TensorMapSpace) = $randfun(Float64, V) + function $randfun(rng::Random.AbstractRNG, V::TensorMapSpace) + return $randfun(rng, Float64, V) + end + + # filling in default rng + function $randfun(::Type{T}, V::TensorMapSpace) where {T} + return $randfun(Random.default_rng(), T, V) + end + + # implementation + function $randfun( + rng::Random.AbstractRNG, ::Type{T}, + V::TensorMapSpace + ) where {T} + t = CuTensorMap{T}(undef, V) + $randfun!(rng, t) + return t + end + end +end + +for randfun in (:rand, :randn, :randisometry) + randfun! = Symbol(randfun, :!) + @eval begin + # converting `codomain` and `domain` into `HomSpace` + function $randfun( + ::Type{A}, codomain::TensorSpace{S}, + domain::TensorSpace{S} + ) where {A <: CuArray, S <: IndexSpace} + return $randfun(A, codomain ← domain) + end + function $randfun( + ::Type{T}, ::Type{A}, codomain::TensorSpace{S}, + domain::TensorSpace{S} + ) where {T, S <: IndexSpace, A <: CuArray{T}} + return $randfun(T, A, codomain ← domain) + end + function $randfun( + rng::Random.AbstractRNG, ::Type{T}, ::Type{A}, + codomain::TensorSpace{S}, + domain::TensorSpace{S} + ) where {T, S <: IndexSpace, A <: CuArray{T}} + return $randfun(rng, T, A, codomain ← domain) + end + + # accepting single `TensorSpace` + $randfun(::Type{A}, codomain::TensorSpace) where {A <: CuArray} = $randfun(A, codomain ← one(codomain)) + function $randfun(::Type{T}, ::Type{A}, codomain::TensorSpace) where {T, A <: CuArray{T}} + return $randfun(T, A, codomain ← one(codomain)) + end + function $randfun( + rng::Random.AbstractRNG, ::Type{T}, + ::Type{A}, codomain::TensorSpace + ) where {T, A <: CuArray{T}} + return $randfun(rng, T, A, codomain ← one(domain)) + end + + # filling in default eltype + $randfun(::Type{A}, V::TensorMapSpace) where {A <: CuArray} = $randfun(eltype(A), A, V) + function $randfun(rng::Random.AbstractRNG, ::Type{A}, V::TensorMapSpace) where {A <: CuArray} + return $randfun(rng, eltype(A), A, V) + end + + # filling in default rng + function $randfun(::Type{T}, ::Type{A}, V::TensorMapSpace) where {T, A <: CuArray{T}} + return $randfun(Random.default_rng(), T, A, V) + end + + # implementation + function $randfun( + rng::Random.AbstractRNG, ::Type{T}, + ::Type{A}, V::TensorMapSpace + ) where {T, A <: CuArray{T}} + t = CuTensorMap{T}(undef, V) + $randfun!(rng, t) + return t + end + end +end + +# converters +# ---------- +function Base.convert(::Type{CuTensorMap}, d::Dict{Symbol, Any}) + try + codomain = eval(Meta.parse(d[:codomain])) + domain = eval(Meta.parse(d[:domain])) + data = SectorDict(eval(Meta.parse(c)) => CuArray(b) for (c, b) in d[:data]) + return CuTensorMap(data, codomain, domain) + catch e # sector unknown in TensorKit.jl; user-defined, hopefully accessible in Main + codomain = Base.eval(Main, Meta.parse(d[:codomain])) + domain = Base.eval(Main, Meta.parse(d[:domain])) + data = SectorDict( + Base.eval(Main, Meta.parse(c)) => CuArray(b) + for (c, b) in d[:data] + ) + return CuTensorMap(data, codomain, domain) + end +end + +function Base.convert(::Type{CuTensorMap}, t::AbstractTensorMap) + return copy!(CuTensorMap{scalartype(t)}(undef, space(t)), t) +end + +# Scalar implementation +#----------------------- +function TensorKit.scalar(t::CuTensorMap) + + # TODO: should scalar only work if N₁ == N₂ == 0? + return @allowscalar dim(codomain(t)) == dim(domain(t)) == 1 ? + first(blocks(t))[2][1, 1] : throw(DimensionMismatch()) +end + +TensorKit.scalartype(A::StridedCuArray{T}) where {T} = T +vi_scalartype(::Type{<:CuTensorMap{T}}) where {T} = T +vi_scalartype(::Type{<:CuArray{T}}) where {T} = T + +function TensorKit.similarstoragetype(TT::Type{<:CuTensorMap{TTT, S, N₁, N₂}}, ::Type{T}) where {TTT, T, S, N₁, N₂} + return CuVector{T, CUDA.DeviceMemory} +end + +function Base.convert( + TT::Type{CuTensorMap{T, S, N₁, N₂}}, + t::AbstractTensorMap{<:Any, S, N₁, N₂} + ) where {T, S, N₁, N₂} + if typeof(t) === TT + return t + else + tnew = TT(undef, space(t)) + return copy!(tnew, t) + end +end + +function Base.copy!(tdst::CuTensorMap{T, S, N₁, N₂}, tsrc::CuTensorMap{T, S, N₁, N₂}) where {T, S, N₁, N₂} + space(tdst) == space(tsrc) || throw(SpaceMismatch("$(space(tdst)) ≠ $(space(tsrc))")) + for ((c, bdst), (_, bsrc)) in zip(blocks(tdst), blocks(tsrc)) + copy!(bdst, bsrc) + end + return tdst +end + +function Base.copy!(tdst::CuTensorMap, tsrc::TensorKit.AdjointTensorMap) + space(tdst) == space(tsrc) || throw(SpaceMismatch("$(space(tdst)) ≠ $(space(tsrc))")) + for ((c, bdst), (_, bsrc)) in zip(blocks(tdst), blocks(tsrc)) + copy!(bdst, bsrc) + end + return tdst +end + +function Base.promote_rule( + ::Type{<:TT₁}, + ::Type{<:TT₂} + ) where { + S, N₁, N₂, TTT₁, TTT₂, + TT₁ <: CuTensorMap{TTT₁, S, N₁, N₂}, + TT₂ <: CuTensorMap{TTT₂, S, N₁, N₂}, + } + T = TensorKit.VectorInterface.promote_add(TTT₁, TTT₂) + return CuTensorMap{T, S, N₁, N₂} +end + +function LinearAlgebra.isposdef(t::CuTensorMap) + domain(t) == codomain(t) || + throw(SpaceMismatch("`isposdef` requires domain and codomain to be the same")) + InnerProductStyle(spacetype(t)) === EuclideanInnerProduct() || return false + for (c, b) in blocks(t) + # do our own hermitian check + isherm = TensorKit.MatrixAlgebraKit.ishermitian(b; atol = eps(real(eltype(b))), rtol = eps(real(eltype(b)))) + isherm || return false + isposdef(Hermitian(b)) || return false + end + return true +end + + +# Conversion to CuArray: +#---------------------- +# probably not optimized for speed, only for checking purposes +function Base.convert(::Type{CuArray}, t::AbstractTensorMap) + I = sectortype(t) + if I === Trivial + convert(CuArray, t[]) + else + cod = codomain(t) + dom = domain(t) + T = sectorscalartype(I) <: Complex ? complex(scalartype(t)) : + sectorscalartype(I) <: Integer ? scalartype(t) : float(scalartype(t)) + A = CUDA.zeros(T, dims(cod)..., dims(dom)...) + for (f₁, f₂) in fusiontrees(t) + F = convert(CuArray, (f₁, f₂)) + Aslice = StridedView(A)[axes(cod, f₁.uncoupled)..., axes(dom, f₂.uncoupled)...] + add!(Aslice, StridedView(TensorKit._kron(convert(CuArray, t[f₁, f₂]), F))) + end + return A + end +end diff --git a/src/TensorKit.jl b/src/TensorKit.jl index 6b59dac9..0061d8f8 100644 --- a/src/TensorKit.jl +++ b/src/TensorKit.jl @@ -82,7 +82,7 @@ export left_orth, right_orth, left_null, right_null, eigh_full!, eigh_full, eigh_trunc!, eigh_trunc, eig_full!, eig_full, eig_trunc!, eig_trunc, eigh_vals!, eigh_vals, eig_vals!, eig_vals, - isposdef, isposdef!, ishermitian, isisometry, isunitary, sylvester, rank, cond + isposdef, isposdef!, ishermitian, isisometric, isunitary, sylvester, rank, cond export braid, braid!, permute, permute!, transpose, transpose!, twist, twist!, repartition, repartition! diff --git a/src/auxiliary/random.jl b/src/auxiliary/random.jl index 02280951..9aed1650 100644 --- a/src/auxiliary/random.jl +++ b/src/auxiliary/random.jl @@ -1,17 +1,20 @@ """ - randisometry([::Type{T}=Float64], dims::Dims{2}) -> Array{T,2} - randhaar([::Type{T}=Float64], dims::Dims{2}) -> Array{T,2} + randisometry([::Type{T}=Float64], [::Type{A}=Matrix{T}], dims::Dims{2}) -> A + randhaar([::Type{T}=Float64], [::Type{A}=Matrix{T}], dims::Dims{2}) -> A Create a random isometry of size `dims`, uniformly distributed according to the Haar measure. See also [`randuniform`](@ref) and [`randnormal`](@ref). """ -randisometry(dims::Base.Dims{2}) = randisometry(Float64, dims) +randisometry(dims::Base.Dims{2}) = randisometry(Float64, Matrix{Float64}, dims) function randisometry(::Type{T}, dims::Base.Dims{2}) where {T <: Number} return randisometry(Random.default_rng(), T, dims) end -function randisometry(rng::Random.AbstractRNG, ::Type{T}, dims::Base.Dims{2}) where {T <: Number} - return randisometry!(rng, Matrix{T}(undef, dims)) +function randisometry(::Type{T}, ::Type{A}, dims::Base.Dims{2}) where {T <: Number, A <: AbstractArray{T}} + return randisometry(Random.default_rng(), T, A, dims) +end +function randisometry(rng::Random.AbstractRNG, ::Type{T}, ::Type{A}, dims::Base.Dims{2}) where {T <: Number, A <: AbstractArray{T}} + return randisometry!(rng, A(undef, dims)) end randisometry!(A::AbstractMatrix) = randisometry!(Random.default_rng(), A) diff --git a/src/factorizations/adjoint.jl b/src/factorizations/adjoint.jl index 30a68c13..ef7e5e26 100644 --- a/src/factorizations/adjoint.jl +++ b/src/factorizations/adjoint.jl @@ -29,11 +29,11 @@ function MAK.right_null!(t::AdjointTensorMap, N, alg::AbstractAlgorithm) return N end -function MAK.is_left_isometry(t::AdjointTensorMap; kwargs...) - return is_right_isometry(adjoint(t); kwargs...) +function MAK.is_left_isometric(t::AdjointTensorMap; kwargs...) + return is_right_isometric(adjoint(t); kwargs...) end -function MAK.is_right_isometry(t::AdjointTensorMap; kwargs...) - return is_left_isometry(adjoint(t); kwargs...) +function MAK.is_right_isometric(t::AdjointTensorMap; kwargs...) + return is_left_isometric(adjoint(t); kwargs...) end # 2-arg functions diff --git a/src/factorizations/factorizations.jl b/src/factorizations/factorizations.jl index 29d2c16a..c53b9ecf 100644 --- a/src/factorizations/factorizations.jl +++ b/src/factorizations/factorizations.jl @@ -6,7 +6,7 @@ module Factorizations export copy_oftype, factorisation_scalartype, one!, truncspace using ..TensorKit -using ..TensorKit: AdjointTensorMap, SectorDict, blocktype, foreachblock, one! +using ..TensorKit: AdjointTensorMap, SectorDict, blocktype, foreachblock, one!, similarstoragetype using LinearAlgebra: LinearAlgebra, BlasFloat, Diagonal, svdvals, svdvals!, eigen, eigen!, isposdef, isposdef!, ishermitian @@ -30,9 +30,9 @@ include("pullbacks.jl") TensorKit.one!(A::AbstractMatrix) = MatrixAlgebraKit.one!(A) -function MatrixAlgebraKit.isisometry(t::AbstractTensorMap, (p₁, p₂)::Index2Tuple) +function MatrixAlgebraKit.isisometric(t::AbstractTensorMap, (p₁, p₂)::Index2Tuple) t = permute(t, (p₁, p₂); copy = false) - return isisometry(t) + return isisometric(t) end #------------------------------# @@ -84,14 +84,14 @@ function LinearAlgebra.isposdef!(t::AbstractTensorMap) end # TODO: tolerances are per-block, not global or weighted - does that matter? -function MatrixAlgebraKit.is_left_isometry(t::AbstractTensorMap; kwargs...) +function MatrixAlgebraKit.is_left_isometric(t::AbstractTensorMap; kwargs...) domain(t) ≾ codomain(t) || return false - f((c, b)) = MatrixAlgebraKit.is_left_isometry(b; kwargs...) + f((c, b)) = MatrixAlgebraKit.is_left_isometric(b; kwargs...) return all(f, blocks(t)) end -function MatrixAlgebraKit.is_right_isometry(t::AbstractTensorMap; kwargs...) +function MatrixAlgebraKit.is_right_isometric(t::AbstractTensorMap; kwargs...) domain(t) ≿ codomain(t) || return false - f((c, b)) = MatrixAlgebraKit.is_right_isometry(b; kwargs...) + f((c, b)) = MatrixAlgebraKit.is_right_isometric(b; kwargs...) return all(f, blocks(t)) end diff --git a/src/factorizations/matrixalgebrakit.jl b/src/factorizations/matrixalgebrakit.jl index fc5f8450..b648b38d 100644 --- a/src/factorizations/matrixalgebrakit.jl +++ b/src/factorizations/matrixalgebrakit.jl @@ -144,7 +144,8 @@ end function MAK.initialize_output(::typeof(svd_compact!), t::AbstractTensorMap, ::AbstractAlgorithm) V_cod = V_dom = infimum(fuse(codomain(t)), fuse(domain(t))) U = similar(t, codomain(t) ← V_cod) - S = DiagonalTensorMap{real(scalartype(t))}(undef, V_cod) + output_type = similarstoragetype(t, real(scalartype(t))) + S = DiagonalTensorMap{real(scalartype(t)), typeof(V_cod), output_type}(undef, V_cod) Vᴴ = similar(t, V_dom ← domain(t)) return U, S, Vᴴ end @@ -156,7 +157,8 @@ end function MAK.initialize_output(::typeof(svd_vals!), t::AbstractTensorMap, alg::AbstractAlgorithm) V_cod = infimum(fuse(codomain(t)), fuse(domain(t))) - return DiagonalTensorMap{real(scalartype(t))}(undef, V_cod) + output_storage_type = similarstoragetype(t, real(scalartype(t))) + return DiagonalTensorMap{real(scalartype(t)), typeof(V_cod), output_storage_type}(undef, V_cod) end # Eigenvalue decomposition @@ -224,7 +226,7 @@ end function MAK.initialize_output(::typeof(eigh_full!), t::AbstractTensorMap, ::AbstractAlgorithm) V_D = fuse(domain(t)) T = real(scalartype(t)) - D = DiagonalTensorMap{T}(undef, V_D) + D = DiagonalTensorMap{T, typeof(V_D), similarstoragetype(t, T)}(undef, V_D) V = similar(t, codomain(t) ← V_D) return D, V end @@ -232,7 +234,7 @@ end function MAK.initialize_output(::typeof(eig_full!), t::AbstractTensorMap, ::AbstractAlgorithm) V_D = fuse(domain(t)) Tc = complex(scalartype(t)) - D = DiagonalTensorMap{Tc}(undef, V_D) + D = DiagonalTensorMap{Tc, typeof(V_D), similarstoragetype(t, Tc)}(undef, V_D) V = similar(t, Tc, codomain(t) ← V_D) return D, V end @@ -240,13 +242,13 @@ end function MAK.initialize_output(::typeof(eigh_vals!), t::AbstractTensorMap, alg::AbstractAlgorithm) V_D = fuse(domain(t)) T = real(scalartype(t)) - return D = DiagonalTensorMap{Tc}(undef, V_D) + return D = DiagonalTensorMap{Tc, typeof(V_D), storagetype(t)}(undef, V_D) end function MAK.initialize_output(::typeof(eig_vals!), t::AbstractTensorMap, alg::AbstractAlgorithm) V_D = fuse(domain(t)) Tc = complex(scalartype(t)) - return D = DiagonalTensorMap{Tc}(undef, V_D) + return D = DiagonalTensorMap{Tc, typeof(V_D), similarstoragetype(t, Tc)}(undef, V_D) end # QR decomposition diff --git a/src/factorizations/truncation.jl b/src/factorizations/truncation.jl index 19f39759..dc2ea38a 100644 --- a/src/factorizations/truncation.jl +++ b/src/factorizations/truncation.jl @@ -67,7 +67,7 @@ function MAK.truncate( Ũ = similar(U, codomain(U) ← V_truncated) truncate_domain!(Ũ, U, ind) - S̃ = DiagonalTensorMap{scalartype(S)}(undef, V_truncated) + S̃ = DiagonalTensorMap{scalartype(S), typeof(V_truncated), storagetype(S)}(undef, V_truncated) truncate_diagonal!(S̃, S, ind) Ṽᴴ = similar(Vᴴ, V_truncated ← domain(Vᴴ)) truncate_codomain!(Ṽᴴ, Vᴴ, ind) @@ -100,7 +100,7 @@ for f! in (:eig_trunc!, :eigh_trunc!) ind = MAK.findtruncated(diagview(D), strategy) V_truncated = truncate_space(space(D, 1), ind) - D̃ = DiagonalTensorMap{scalartype(D)}(undef, V_truncated) + D̃ = DiagonalTensorMap{scalartype(D), typeof(V_truncated), storagetype(D)}(undef, V_truncated) truncate_diagonal!(D̃, D, ind) Ṽ = similar(V, codomain(V) ← V_truncated) @@ -129,17 +129,15 @@ function _findnexttruncvalue( ) where {I <: Sector} # early return (isempty(S) || all(iszero, values(truncdim))) && return nothing + mapped_keys = map(keys(truncdim)) do c + d = truncdim[c] + return by(S[c][d]) + end if rev - σmin, imin = findmin(keys(truncdim)) do c - d = truncdim[c] - return by(S[c][d]) - end + σmin, imin = findmin(mapped_keys) return σmin, keys(truncdim)[imin] else - σmax, imax = findmax(keys(truncdim)) do c - d = truncdim[c] - return by(S[c][d]) - end + σmax, imax = findmax(mapped_keys) return σmax, keys(truncdim)[imax] end end diff --git a/src/tensors/diagonal.jl b/src/tensors/diagonal.jl index df04cb9b..b04ef5c8 100644 --- a/src/tensors/diagonal.jl +++ b/src/tensors/diagonal.jl @@ -278,7 +278,7 @@ function LinearAlgebra.mul!( dC::DiagonalTensorMap, dA::DiagonalTensorMap, dB::DiagonalTensorMap, α::Number, β::Number ) dC.domain == dA.domain == dB.domain || throw(SpaceMismatch()) - mul!(Diagonal(dC.data), Diagonal(dA.data), Diagonal(dB.data), α, β) + @. dC.data = (α * dA.data * dB.data) + β * dC.data return dC end diff --git a/src/tensors/linalg.jl b/src/tensors/linalg.jl index d09609ed..34fd6ec1 100644 --- a/src/tensors/linalg.jl +++ b/src/tensors/linalg.jl @@ -272,7 +272,7 @@ function _norm(blockiter, p::Real, init::Real) end elseif p == 2 n² = mapreduce(+, blockiter; init = init) do (c, b) - return isempty(b) ? init : oftype(init, dim(c) * LinearAlgebra.norm2(b)^2) + return isempty(b) ? init : oftype(init, dim(c) * LinearAlgebra.norm(b, 2)^2) end return sqrt(n²) elseif p == 1 @@ -281,7 +281,7 @@ function _norm(blockiter, p::Real, init::Real) end elseif p > 0 nᵖ = mapreduce(+, blockiter; init = init) do (c, b) - return isempty(b) ? init : oftype(init, dim(c) * LinearAlgebra.normp(b, p)^p) + return isempty(b) ? init : oftype(init, dim(c) * LinearAlgebra.norm(b, p)^p) end return (nᵖ)^inv(oftype(nᵖ, p)) else @@ -299,7 +299,7 @@ function LinearAlgebra.rank( r = dim(one(sectortype(t))) * 0 dim(t) == 0 && return r S = LinearAlgebra.svdvals(t) - tol = max(atol, rtol * maximum(first, values(S))) + tol = max(atol, rtol * mapreduce(maximum, max, values(S))) for (c, b) in S if !isempty(b) r += dim(c) * count(>(tol), b) @@ -317,8 +317,8 @@ function LinearAlgebra.cond(t::AbstractTensorMap, p::Real = 2) return zero(real(float(scalartype(t)))) end S = LinearAlgebra.svdvals(t) - maxS = maximum(first, values(S)) - minS = minimum(last, values(S)) + maxS = mapreduce(maximum, max, values(S)) + minS = mapreduce(minimum, min, values(S)) return iszero(maxS) ? oftype(maxS, Inf) : (maxS / minS) else throw(ArgumentError("cond currently only defined for p=2")) @@ -431,7 +431,7 @@ function exp!(t::TensorMap) domain(t) == codomain(t) || error("Exponential of a tensor only exist when domain == codomain.") for (c, b) in blocks(t) - copy!(b, LinearAlgebra.exp!(b)) + copy!(b, exp!(b)) end return t end diff --git a/src/tensors/tensor.jl b/src/tensors/tensor.jl index 3f28f168..17910f29 100644 --- a/src/tensors/tensor.jl +++ b/src/tensors/tensor.jl @@ -139,7 +139,7 @@ function TensorMap( datac = data[c] size(datac) == size(b) || throw(DimensionMismatch("wrong size of block for sector $c")) - copy!(b, datac) + copyto!(b, datac) end for (c, b) in data c ∈ blocksectors(t) || isempty(b) || @@ -357,7 +357,7 @@ function TensorMap( ) where {S} return TensorMap(data, codom ← dom; kwargs...) end -function Tensor(data::AbstractArray, codom::TensorSpace, ; kwargs...) +function Tensor(data::AbstractArray, codom::TensorSpace; kwargs...) return TensorMap(data, codom ← one(codom); kwargs...) end diff --git a/test/amd/factorizations.jl b/test/amd/factorizations.jl new file mode 100644 index 00000000..cbdfd9e2 --- /dev/null +++ b/test/amd/factorizations.jl @@ -0,0 +1,406 @@ +const AMDGPUExt = Base.get_extension(TensorKit, :TensorKitAMDGPUExt) +@assert !isnothing(AMDGPUExt) +const ROCTensorMap = getglobal(AMDGPUExt, :ROCTensorMap) +const ROCDiagonalTensorMap = getglobal(AMDGPUExt, :ROCDiagonalTensorMap) + +spacelist = try + if ENV["CI"] == "true" + println("Detected running on CI") + if Sys.iswindows() + (Vtr, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂) + elseif Sys.isapple() + (Vtr, Vℤ₃, VfU₁, VfSU₂) + else + (Vtr, VU₁, VCU₁, VSU₂, VfSU₂) + end + else + (Vtr, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VfSU₂) + end +catch + (Vtr, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VfSU₂) +end + +import AMDGPU: rand as rocrand, randn as rocrandn + + +eltypes = (Float32, ComplexF64) + +for V in spacelist + I = sectortype(first(V)) + Istr = TensorKit.type_repr(I) + println("---------------------------------------") + println("Factorizations with symmetry: $Istr") + println("---------------------------------------") + @timedtestset "Factorizations with symmetry: $Istr" verbose = true begin + V1, V2, V3, V4, V5 = V + W = V1 ⊗ V2 + @testset "QR decomposition" begin + @testset "$(typeof(t))($T)" for T in eltypes, + t in ( + rocrand(T, W, W), rocrand(T, W, W)', rocrand(T, W, V1), rocrand(T, V1, W)', + ROCDiagonalTensorMap(rocrand(T, reduceddim(V1)), V1), + ) + + Q, R = @constinferred qr_full(t) + AMDGPU.@allowscalar begin + @test Q * R ≈ t + end + @test isunitary(Q) + + Q, R = @constinferred qr_compact(t) + AMDGPU.@allowscalar begin + @test Q * R ≈ t + end + @test isisometric(Q) + + Q, R = @constinferred left_orth(t; kind = :qr) + AMDGPU.@allowscalar begin + @test Q * R ≈ t + end + @test isisometric(Q) + + N = @constinferred qr_null(t) + @test isisometric(N) + @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t)) + + N = @constinferred left_null(t; kind = :qr) + @test isisometric(N) + @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t)) + end + + # empty tensor + for T in eltypes + t = rocrand(T, V1 ⊗ V2, zero(V1)) + + Q, R = @constinferred qr_full(t) + AMDGPU.@allowscalar begin + @test Q * R ≈ t + end + @test isunitary(Q) + @test dim(R) == dim(t) == 0 + + Q, R = @constinferred qr_compact(t) + AMDGPU.@allowscalar begin + @test Q * R ≈ t + end + @test isisometric(Q) + @test dim(Q) == dim(R) == dim(t) + + Q, R = @constinferred left_orth(t; kind = :qr) + AMDGPU.@allowscalar begin + @test Q * R ≈ t + end + @test isisometric(Q) + @test dim(Q) == dim(R) == dim(t) + + N = @constinferred qr_null(t) + @test isunitary(N) + @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t)) + end + end + @testset "LQ decomposition" begin + for T in eltypes, + t in ( + rocrand(T, W, W), rocrand(T, W, W)', rocrand(T, W, V1), rocrand(T, V1, W)', + ROCDiagonalTensorMap(rocrand(T, reduceddim(V1)), V1), + ) + + L, Q = @constinferred lq_full(t) + AMDGPU.@allowscalar begin + @test L * Q ≈ t + end + @test isunitary(Q) + + L, Q = @constinferred lq_compact(t) + AMDGPU.@allowscalar begin + @test L * Q ≈ t + end + @test isisometric(Q; side = :right) + + L, Q = @constinferred right_orth(t; kind = :lq) + AMDGPU.@allowscalar begin + @test L * Q ≈ t + end + @test isisometric(Q; side = :right) + + Nᴴ = @constinferred lq_null(t) + @test isisometric(Nᴴ; side = :right) + @test norm(t * Nᴴ') ≈ 0 atol = 100 * eps(norm(t)) + end + + for T in eltypes + # empty tensor + t = rocrand(T, zero(V1), V1 ⊗ V2) + + L, Q = @constinferred lq_full(t) + AMDGPU.@allowscalar begin + @test L * Q ≈ t + end + @test isunitary(Q) + @test dim(L) == dim(t) == 0 + + L, Q = @constinferred lq_compact(t) + AMDGPU.@allowscalar begin + @test L * Q ≈ t + end + @test isisometric(Q; side = :right) + @test dim(Q) == dim(L) == dim(t) + + L, Q = @constinferred right_orth(t; kind = :lq) + AMDGPU.@allowscalar begin + @test L * Q ≈ t + end + @test isisometric(Q; side = :right) + @test dim(Q) == dim(L) == dim(t) + + Nᴴ = @constinferred lq_null(t) + @test isunitary(Nᴴ) + @test norm(t * Nᴴ') ≈ 0 atol = 100 * eps(norm(t)) + end + end + @testset "Polar decomposition" begin + @testset "$(typeof(t))($T)" for T in eltypes, + t in ( + rocrand(T, W, W), + rocrand(T, W, W)', + rocrand(T, W, V1), + rocrand(T, V1, W)', + ROCDiagonalTensorMap(rocrand(T, reduceddim(V1)), V1), + ) + + @assert domain(t) ≾ codomain(t) + w, p = @constinferred left_polar(t) + @test w * p ≈ t + @test isisometric(w) + @test isposdef(p) + + w, p = @constinferred left_orth(t; kind = :polar) + @test w * p ≈ t + @test isisometric(w) + end + + @testset "$(typeof(t))($T)" for T in eltypes, + t in ( + rocrand(T, W, W), rocrand(T, W, W)', rocrand(T, V1, W), rocrand(T, W, V1)', + ROCDiagonalTensorMap(rocrand(T, reduceddim(V1)), V1), + ) + + @assert codomain(t) ≾ domain(t) + p, wᴴ = @constinferred right_polar(t) + @test p * wᴴ ≈ t + @test isisometric(wᴴ; side = :right) + @test isposdef(p) + + p, wᴴ = @constinferred right_orth(t; kind = :polar) + @test p * wᴴ ≈ t + @test isisometric(wᴴ; side = :right) + end + end + @testset "SVD" begin + for T in eltypes, + t in ( + rocrand(T, W, W), rocrand(T, W, W)', + rocrand(T, W, V1), rocrand(T, V1, W), + rocrand(T, W, V1)', rocrand(T, V1, W)', + ROCDiagonalTensorMap(rocrand(T, reduceddim(V1)), V1), + ) + + u, s, vᴴ = @constinferred svd_full(t) + @test u * s * vᴴ ≈ t + @test isunitary(u) + @test isunitary(vᴴ) + + u, s, vᴴ = @constinferred svd_compact(t) + @test u * s * vᴴ ≈ t + @test isisometric(u) + @test isposdef(s) + @test isisometric(vᴴ; side = :right) + + s′ = LinearAlgebra.diag(s) + for (c, b) in LinearAlgebra.svdvals(t) + @test b ≈ s′[c] + end + + v, c = @constinferred left_orth(t; kind = :svd) + @test v * c ≈ t + @test isisometric(v) + + N = @constinferred left_null(t; kind = :svd) + @test isisometric(N) + @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t)) + + Nᴴ = @constinferred right_null(t; kind = :svd) + @test isisometric(Nᴴ; side = :right) + @test norm(t * Nᴴ') ≈ 0 atol = 100 * eps(norm(t)) + end + + # empty tensor + for T in eltypes, t in (rocrand(T, W, zero(V1)), rocrand(T, zero(V1), W)) + U, S, Vᴴ = @constinferred svd_full(t) + @test U * S * Vᴴ ≈ t + @test isunitary(U) + @test isunitary(Vᴴ) + + U, S, Vᴴ = @constinferred svd_compact(t) + @test U * S * Vᴴ ≈ t + @test dim(U) == dim(S) == dim(Vᴴ) == dim(t) == 0 + end + end + + @testset "truncated SVD" begin + for T in eltypes, + # AMDGPU doesn't support randn(ComplexF64) + t in ( + ROCTensorMap(randn(T, W, W)), + ROCTensorMap(randn(T, W, W))', + ROCTensorMap(randn(T, W, V1)), + ROCTensorMap(randn(T, V1, W)), + ROCTensorMap(randn(T, W, V1))', + ROCTensorMap(randn(T, V1, W))', + ROCDiagonalTensorMap(randn(T, reduceddim(V1)), V1), + ) + + @constinferred normalize!(t) + + U, S, Vᴴ = @constinferred svd_trunc(t; trunc = notrunc()) + @test U * S * Vᴴ ≈ t + @test isisometric(U) + @test isisometric(Vᴴ; side = :right) + + trunc = truncrank(dim(domain(S)) ÷ 2) + AMDGPU.@allowscalar begin + U1, S1, Vᴴ1 = @constinferred svd_trunc(t; trunc) + end + @test t * Vᴴ1' ≈ U1 * S1 + @test isisometric(U1) + @test isisometric(Vᴴ1; side = :right) + @test dim(domain(S1)) <= trunc.howmany + + λ = minimum(minimum, values(LinearAlgebra.diag(S1))) + trunc = trunctol(; atol = λ - 10eps(λ)) + AMDGPU.@allowscalar begin + U2, S2, Vᴴ2 = @constinferred svd_trunc(t; trunc) + end + @test t * Vᴴ2' ≈ U2 * S2 + @test isisometric(U2) + @test isisometric(Vᴴ2; side = :right) + @test minimum(minimum, values(LinearAlgebra.diag(S1))) >= λ + @test U2 ≈ U1 + @test S2 ≈ S1 + @test Vᴴ2 ≈ Vᴴ1 + + trunc = truncspace(space(S2, 1)) + AMDGPU.@allowscalar begin + U3, S3, Vᴴ3 = @constinferred svd_trunc(t; trunc) + end + @test t * Vᴴ3' ≈ U3 * S3 + @test isisometric(U3) + @test isisometric(Vᴴ3; side = :right) + @test space(S3, 1) ≾ space(S2, 1) + + trunc = truncerror(; atol = 0.5) + AMDGPU.@allowscalar begin + U4, S4, Vᴴ4 = @constinferred svd_trunc(t; trunc) + end + @test t * Vᴴ4' ≈ U4 * S4 + @test isisometric(U4) + @test isisometric(Vᴴ4; side = :right) + @test norm(t - U4 * S4 * Vᴴ4) <= 0.5 + end + end + + #=@testset "Eigenvalue decomposition" begin + for T in eltypes, + t in (rocrand(T, V1, V1), + rocrand(T, W, W), + rocrand(T, W, W)', + ROCDiagonalTensorMap(rocrand(T, reduceddim(V1)), V1) + ) + + d, v = @constinferred eig_full(t) + @test t * v ≈ v * d + + d′ = LinearAlgebra.diag(d) + for (c, b) in LinearAlgebra.eigvals(t) + @test sort(b; by=abs) ≈ sort(d′[c]; by=abs) + end + + vdv = v' * v + vdv = (vdv + vdv') / 2 + @test @constinferred isposdef(vdv) + t isa ROCDiagonalTensorMap || @test !isposdef(t) # unlikely for non-hermitian map + + AMDGPU.@allowscalar begin # TODO + d, v = @constinferred eig_trunc(t; trunc=truncrank(dim(domain(t)) ÷ 2)) + end + @test t * v ≈ v * d + @test dim(domain(d)) ≤ dim(domain(t)) ÷ 2 + + t2 = (t + t') + D, V = eigen(t2) + @test isisometric(V) + D̃, Ṽ = @constinferred eigh_full(t2) + @test D ≈ D̃ + @test V ≈ Ṽ + λ = minimum(minimum(real(LinearAlgebra.diag(b))) + for (c, b) in blocks(D)) + @test cond(Ṽ) ≈ one(real(T)) + @test isposdef(t2) == isposdef(λ) + @test isposdef(t2 - λ * one(t2) + 0.1 * one(t2)) + @test !isposdef(t2 - λ * one(t2) - 0.1 * one(t2)) + + add!(t, t') + + d, v = @constinferred eigh_full(t) + @test t * v ≈ v * d + @test isunitary(v) + + λ = minimum(minimum(real(LinearAlgebra.diag(b))) for (c, b) in blocks(d)) + @test cond(v) ≈ one(real(T)) + @test isposdef(t) == isposdef(λ) + @test isposdef(t - λ * one(t) + 0.1 * one(t)) + @test !isposdef(t - λ * one(t) - 0.1 * one(t)) + + AMDGPU.@allowscalar begin + d, v = @constinferred eigh_trunc(t; trunc=truncrank(dim(domain(t)) ÷ 2)) + end + @test t * v ≈ v * d + @test dim(domain(d)) ≤ dim(domain(t)) ÷ 2 + end + end=# + + #=@testset "Condition number and rank" begin + for T in eltypes, + t in (rocrand(T, W, W), rocrand(T, W, W)', + rocrand(T, W, V1), rocrand(T, V1, W), + rocrand(T, W, V1)', rocrand(T, V1, W)', + ROCDiagonalTensorMap(rocrand(T, reduceddim(V1)), V1)) + + d1, d2 = dim(codomain(t)), dim(domain(t)) + @test rank(t) == min(d1, d2) + M = left_null(t) + @test @constinferred(rank(M)) + rank(t) == d1 + Mᴴ = right_null(t) + @test rank(Mᴴ) + rank(t) == d2 + end + for T in eltypes + u = unitary(T, V1 ⊗ V2, V1 ⊗ V2) + @test @constinferred(cond(u)) ≈ one(real(T)) + @test @constinferred(rank(u)) == dim(V1 ⊗ V2) + + t = rocrand(T, zero(V1), W) + @test rank(t) == 0 + t2 = rocrand(T, zero(V1) * zero(V2), zero(V1) * zero(V2)) + @test rank(t2) == 0 + @test cond(t2) == 0.0 + end + for T in eltypes, t in (rocrand(T, W, W), rocrand(T, W, W)') + add!(t, t') + vals = @constinferred LinearAlgebra.eigvals(t) + λmax = maximum(s -> maximum(abs, s), values(vals)) + λmin = minimum(s -> minimum(abs, s), values(vals)) + @test cond(t) ≈ λmax / λmin + end + end=# + end +end diff --git a/test/amd/tensors.jl b/test/amd/tensors.jl new file mode 100644 index 00000000..0df035f3 --- /dev/null +++ b/test/amd/tensors.jl @@ -0,0 +1,609 @@ +using Adapt, AMDGPU +ad = adapt(Array) + +const AMDGPUExt = Base.get_extension(TensorKit, :TensorKitAMDGPUExt) +@assert !isnothing(AMDGPUExt) +const ROCTensorMap = getglobal(AMDGPUExt, :ROCTensorMap) +const ROCDiagonalTensorMap = getglobal(AMDGPUExt, :ROCDiagonalTensorMap) + +for V in (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VfSU₂) #, VSU₃) + V1, V2, V3, V4, V5 = V + @assert V3 * V4 * V2 ≿ V1' * V5' # necessary for leftorth tests + @assert V3 * V4 ≾ V1' * V2' * V5' # necessary for rightorth tests +end + +spacelist = try + if ENV["CI"] == "true" + println("Detected running on CI") + if Sys.iswindows() + (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂) + elseif Sys.isapple() + (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VfU₁, VfSU₂) #, VSU₃) + else + (Vtr, Vℤ₂, Vfℤ₂, VU₁, VCU₁, VSU₂, VfSU₂) #, VSU₃) + end + else + (Vtr, VU₁, VSU₂, Vfℤ₂) + end +catch + (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VfSU₂) #, VSU₃) +end + +for V in spacelist + I = sectortype(first(V)) + Istr = TensorKit.type_repr(I) + println("---------------------------------------") + println("AMDGPU Tensors with symmetry: $Istr") + println("---------------------------------------") + @timedtestset "Tensors with symmetry: $Istr" verbose = true begin + V1, V2, V3, V4, V5 = V + @timedtestset "Basic tensor properties" begin + W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 + for T in (Int, Float32, Float64, ComplexF32, ComplexF64) + t = @constinferred AMDGPU.zeros(T, W) + AMDGPU.@allowscalar begin + @test @constinferred(hash(t)) == hash(deepcopy(t)) + end + @test scalartype(t) == T + @test norm(t) == 0 + @test codomain(t) == W + @test space(t) == (W ← one(W)) + @test domain(t) == one(W) + @test typeof(t) == TensorMap{T, spacetype(t), 5, 0, ROCVector{T, AMDGPU.Mem.HIPBuffer}} + # blocks + bs = @constinferred blocks(t) + (c, b1), state = @constinferred Nothing iterate(bs) + @test c == first(blocksectors(W)) + next = @constinferred Nothing iterate(bs, state) + b2 = @constinferred block(t, first(blocksectors(t))) + @test b1 == b2 + @test_broken eltype(bs) === Pair{typeof(c), typeof(b1)} + @test_broken typeof(b1) === TensorKit.blocktype(t) + @test typeof(c) === sectortype(t) + end + end + @timedtestset "Tensor Dict conversion" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + for T in (Int, Float32, ComplexF64) + t = @constinferred AMDGPU.rand(T, W) + d = convert(Dict, t) + @test t == convert(ROCTensorMap, d) + end + end + if hasfusiontensor(I) + @timedtestset "Tensor Array conversion" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + W1 = V1 ← one(V1) + W2 = one(V2) ← V2 + W3 = V1 ⊗ V2 ← one(V1) + W4 = V1 ← V2 + W5 = one(V1) ← V1 ⊗ V2 + W6 = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + for W in (W1, W2, W3, W4, W5, W6) + for T in (Int, Float32, ComplexF64) + if T == Int + t = ROCTensorMap{T}(undef, W) + for (_, b) in blocks(t) + AMDGPU.@allowscalar AMDGPU.rand!(b, -20:20) + end + else + t = @constinferred AMDGPU.randn(T, W) + end + AMDGPU.@allowscalar begin + a = @constinferred convert(ROCArray, t) + end + b = reshape(a, dim(codomain(W)), dim(domain(W))) + @test t ≈ @constinferred TensorMap(a, W) + @test t ≈ @constinferred TensorMap(b, W) + @test t === @constinferred TensorMap(t.data, W) + end + end + for T in (Int, Float32, ComplexF64) + t = AMDGPU.randn(T, V1 ⊗ V2 ← zero(V1)) + AMDGPU.@allowscalar begin + a = convert(ROCArray, t) + end + @test norm(a) == 0 + end + end + end + @timedtestset "Basic linear algebra" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + for T in (Float32, ComplexF64) + t = @constinferred AMDGPU.rand(T, W) + @test scalartype(t) == T + @test space(t) == W + @test space(t') == W' + @test dim(t) == dim(space(t)) + @test codomain(t) == codomain(W) + @test domain(t) == domain(W) + # blocks for adjoint + bs = @constinferred blocks(t') + (c, b1), state = @constinferred Nothing iterate(bs) + @test c == first(blocksectors(W')) + next = @constinferred Nothing iterate(bs, state) + b2 = @constinferred block(t', first(blocksectors(t'))) + @test b1 == b2 + @test_broken eltype(bs) === Pair{typeof(c), typeof(b1)} + @test_broken typeof(b1) === TensorKit.blocktype(t') + @test typeof(c) === sectortype(t) + # linear algebra + @test isa(@constinferred(norm(t)), real(T)) + @test norm(t)^2 ≈ dot(t, t) + α = rand(T) + @test norm(α * t) ≈ abs(α) * norm(t) + @test norm(t + t, 2) ≈ 2 * norm(t, 2) + @test norm(t + t, 1) ≈ 2 * norm(t, 1) + @test norm(t + t, Inf) ≈ 2 * norm(t, Inf) + p = 3 * rand(Float64) + AMDGPU.@allowscalar begin + @test norm(t + t, p) ≈ 2 * norm(t, p) + end + @test norm(t) ≈ norm(t') + + t2 = @constinferred AMDGPU.rand!(similar(t)) + β = rand(T) + @test @constinferred(dot(β * t2, α * t)) ≈ conj(β) * α * conj(dot(t, t2)) + @test dot(t2, t) ≈ conj(dot(t, t2)) + @test dot(t2, t) ≈ conj(dot(t2', t')) + @test dot(t2, t) ≈ dot(t', t2') + + i1 = @constinferred(isomorphism(T, V1 ⊗ V2, V2 ⊗ V1)) + i2 = @constinferred(isomorphism(Vector{T}, V2 ⊗ V1, V1 ⊗ V2)) + @test i1 * i2 == @constinferred(id(T, V1 ⊗ V2)) + @test i2 * i1 == @constinferred(id(Vector{T}, V2 ⊗ V1)) + + w = @constinferred(isometry(T, V1 ⊗ (oneunit(V1) ⊕ oneunit(V1)), V1)) + @test dim(w) == 2 * dim(V1 ← V1) + @test w' * w == id(Vector{T}, V1) + @test w * w' == (w * w')^2 + end + end + @timedtestset "Trivial space insertion and removal" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + for T in (Float32, ComplexF64) + t = @constinferred AMDGPU.rand(T, W) + t2 = @constinferred insertleftunit(t) + @test t2 == @constinferred insertrightunit(t) + @test numind(t2) == numind(t) + 1 + @test space(t2) == insertleftunit(space(t)) + @test scalartype(t2) === T + @test t.data === t2.data + @test @constinferred(removeunit(t2, $(numind(t2)))) == t + t3 = @constinferred insertleftunit(t; copy = true) + @test t3 == @constinferred insertrightunit(t; copy = true) + @test t.data !== t3.data + for (c, b) in blocks(t) + @test b == block(t3, c) + end + @test @constinferred(removeunit(t3, $(numind(t3)))) == t + t4 = @constinferred insertrightunit(t, 3; dual = true) + @test numin(t4) == numin(t) && numout(t4) == numout(t) + 1 + for (c, b) in blocks(t) + @test b == block(t4, c) + end + @test @constinferred(removeunit(t4, 4)) == t + t5 = @constinferred insertleftunit(t, 4; dual = true) + @test numin(t5) == numin(t) + 1 && numout(t5) == numout(t) + for (c, b) in blocks(t) + @test b == block(t5, c) + end + @test @constinferred(removeunit(t5, 4)) == t + end + end + if hasfusiontensor(I) + @timedtestset "Basic linear algebra: test via conversion" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + for T in (Float32, ComplexF64) + t = AMDGPU.rand(T, W) + t2 = @constinferred AMDGPU.rand!(similar(t)) + @test norm(t, 2) ≈ norm(ad(t), 2) + @test dot(t2, t) ≈ dot(ad(t2), ad(t)) + α = rand(T) + @test ad(α * t) ≈ α * ad(t) + @test ad(t + t) ≈ 2 * ad(t) + end + end + @timedtestset "Real and imaginary parts" begin + W = V1 ⊗ V2 + for T in (Float64, ComplexF64, ComplexF32) + t = @constinferred AMDGPU.randn(T, W, W) + @test real(ad(t)) == ad(@constinferred real(t)) + @test imag(ad(t)) == ad(@constinferred imag(t)) + end + end + end + @timedtestset "Tensor conversion" begin + W = V1 ⊗ V2 + t = @constinferred AMDGPU.randn(W ← W) + @test typeof(convert(ROCTensorMap, t')) == typeof(t) + tc = complex(t) + @test convert(typeof(tc), t) == tc + @test typeof(convert(typeof(tc), t)) == typeof(tc) + @test typeof(convert(typeof(tc), t')) == typeof(tc) + @test Base.promote_typeof(t, tc) == typeof(tc) + @test Base.promote_typeof(tc, t) == typeof(tc + t) + end + @timedtestset "Permutations: test via inner product invariance" begin + W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 + t = AMDGPU.rand(ComplexF64, W) + t′ = AMDGPU.randn!(similar(t)) + AMDGPU.@allowscalar begin + for k in 0:5 + for p in permutations(1:5) + p1 = ntuple(n -> p[n], k) + p2 = ntuple(n -> p[k + n], 5 - k) + t2 = @constinferred permute(t, (p1, p2)) + t2 = permute(t, (p1, p2)) + @test norm(t2) ≈ norm(t) + t2′ = permute(t′, (p1, p2)) + @test dot(t2′, t2) ≈ dot(t′, t) ≈ dot(transpose(t2′), transpose(t2)) + end + t3 = @constinferred repartition(t, $k) + t3 = repartition(t, k) + @test norm(t3) ≈ norm(t) + t3′ = @constinferred repartition!(similar(t3), t′) + @test norm(t3′) ≈ norm(t′) + @test dot(t′, t) ≈ dot(t3′, t3) + end + end + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Permutations: test via conversion" begin + W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 + t = AMDGPU.rand(ComplexF64, W) + a = ad(t) + AMDGPU.@allowscalar begin + for k in 0:5 + for p in permutations(1:5) + p1 = ntuple(n -> p[n], k) + p2 = ntuple(n -> p[k + n], 5 - k) + t2 = permute(t, (p1, p2)) + a2 = ad(t2) + @test a2 ≈ permute(a, (p1, p2)) + @test ad(transpose(t2)) ≈ transpose(a2) + end + + t3 = repartition(t, k) + a3 = ad(t3) + @test a3 ≈ repartition(a, k) + end + end + end + end + @timedtestset "Full trace: test self-consistency" begin + t = AMDGPU.rand(ComplexF64, V1 ⊗ V2' ⊗ V2 ⊗ V1') + AMDGPU.@allowscalar begin + t2 = permute(t, ((1, 2), (4, 3))) + s = @constinferred tr(t2) + end + @test conj(s) ≈ tr(t2') + if !isdual(V1) + t2 = twist!(t2, 1) + end + if isdual(V2) + t2 = twist!(t2, 2) + end + AMDGPU.@allowscalar begin + ss = tr(t2) + @tensor s2 = t[a, b, b, a] + @tensor t3[a, b] := t[a, c, c, b] + @tensor s3 = t3[a, a] + end + @test ss ≈ s2 + @test ss ≈ s3 + end + @timedtestset "Partial trace: test self-consistency" begin + t = AMDGPU.rand(ComplexF64, V1 ⊗ V2' ⊗ V3 ⊗ V2 ⊗ V1' ⊗ V3') + AMDGPU.@allowscalar begin + @tensor t2[a, b] := t[c, d, b, d, c, a] + @tensor t4[a, b, c, d] := t[d, e, b, e, c, a] + @tensor t5[a, b] := t4[a, b, c, c] + end + @test t2 ≈ t5 + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Trace: test via conversion" begin + t = AMDGPU.rand(ComplexF64, V1 ⊗ V2' ⊗ V3 ⊗ V2 ⊗ V1' ⊗ V3') + AMDGPU.@allowscalar begin + @tensor t2[a, b] := t[c, d, b, d, c, a] + @tensor t3[a, b] := ad(t)[c, d, b, d, c, a] + end + @test t3 ≈ ad(t2) + end + end + @timedtestset "Trace and contraction" begin + t1 = AMDGPU.rand(ComplexF64, V1 ⊗ V2 ⊗ V3) + t2 = AMDGPU.rand(ComplexF64, V2' ⊗ V4 ⊗ V1') + AMDGPU.@allowscalar begin + t3 = t1 ⊗ t2 + @tensor ta[a, b] := t1[x, y, a] * t2[y, b, x] + @tensor tb[a, b] := t3[x, y, a, y, b, x] + @test ta ≈ tb + end + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Tensor contraction: test via conversion" begin + A1 = AMDGPU.randn(ComplexF64, V1' * V2', V3') + A2 = AMDGPU.randn(ComplexF64, V3 * V4, V5) + rhoL = AMDGPU.randn(ComplexF64, V1, V1) + rhoR = AMDGPU.randn(ComplexF64, V5, V5)' # test adjoint tensor + H = AMDGPU.randn(ComplexF64, V2 * V4, V2 * V4) + AMDGPU.@allowscalar begin + @tensor HrA12[a, s1, s2, c] := rhoL[a, a'] * conj(A1[a', t1, b]) * + A2[b, t2, c'] * rhoR[c', c] * + H[s1, s2, t1, t2] + + @tensor HrA12array[a, s1, s2, c] := ad(rhoL)[a, a'] * + conj(ad(A1)[a', t1, b]) * + ad(A2)[b, t2, c'] * + ad(rhoR)[c', c] * + ad(H)[s1, s2, t1, t2] + end + @test HrA12array ≈ ad(HrA12) + end + end + @timedtestset "Index flipping: test flipping inverse" begin + t = AMDGPU.rand(ComplexF64, V1 ⊗ V1' ← V1' ⊗ V1) + AMDGPU.@allowscalar begin + for i in 1:4 + @test t ≈ flip(flip(t, i), i; inv = true) + @test t ≈ flip(flip(t, i; inv = true), i) + end + end + end + #= + @timedtestset "Index flipping: test via explicit flip" begin + t = AMDGPU.rand(ComplexF64, V1 ⊗ V1' ← V1' ⊗ V1) + F1 = unitary(flip(V1), V1) # TODO wrong storage type + AMDGPU.@allowscalar begin + @tensor tf[a, b; c, d] := F1[a, a'] * t[a', b; c, d] + @test flip(t, 1) ≈ tf + @tensor tf[a, b; c, d] := conj(F1[b, b']) * t[a, b'; c, d] + @test twist!(flip(t, 2), 2) ≈ tf + @tensor tf[a, b; c, d] := F1[c, c'] * t[a, b; c', d] + @test flip(t, 3) ≈ tf + @tensor tf[a, b; c, d] := conj(F1[d, d']) * t[a, b; c, d'] + @test twist!(flip(t, 4), 4) ≈ tf + end + end =# + @timedtestset "Index flipping: test via contraction" begin + t1 = AMDGPU.rand(ComplexF64, V1 ⊗ V2 ⊗ V3 ← V4) + t2 = AMDGPU.rand(ComplexF64, V2' ⊗ V5 ← V4' ⊗ V1) + AMDGPU.@allowscalar begin + @tensor ta[a, b] := t1[x, y, a, z] * t2[y, b, z, x] + @tensor tb[a, b] := flip(t1, 1)[x, y, a, z] * flip(t2, 4)[y, b, z, x] + @test ta ≈ tb + @tensor tb[a, b] := flip(t1, (2, 4))[x, y, a, z] * flip(t2, (1, 3))[y, b, z, x] + @test ta ≈ tb + @tensor tb[a, b] := flip(t1, (1, 2, 4))[x, y, a, z] * flip(t2, (1, 3, 4))[y, b, z, x] + @tensor tb[a, b] := flip(t1, (1, 3))[x, y, a, z] * flip(t2, (2, 4))[y, b, z, x] + @test flip(ta, (1, 2)) ≈ tb + end + end + @timedtestset "Multiplication and inverse: test compatibility" begin + W1 = V1 ⊗ V2 ⊗ V3 + W2 = V4 ⊗ V5 + for T in (Float64, ComplexF64) + t1 = AMDGPU.rand(T, W1, W1) + t2 = AMDGPU.rand(T, W2, W2) + t = AMDGPU.rand(T, W1, W2) + @test t1 * (t1 \ t) ≈ t + @test (t / t2) * t2 ≈ t + AMDGPU.@allowscalar begin + @test t1 \ one(t1) ≈ inv(t1) + end + # @test one(t1) / t1 ≈ pinv(t1) # pinv not available in AMDGPU + @test_throws SpaceMismatch inv(t) + @test_throws SpaceMismatch t2 \ t + @test_throws SpaceMismatch t / t1 + # pinv not available in AMDGPU + # tp = pinv(t) * t + # @test tp ≈ tp * tp + end + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Multiplication and inverse: test via conversion" begin + W1 = V1 ⊗ V2 ⊗ V3 + W2 = V4 ⊗ V5 + for T in (Float32, Float64, ComplexF32, ComplexF64) + t1 = AMDGPU.rand(T, W1, W1) + t2 = AMDGPU.rand(T, W2, W2) + t = AMDGPU.rand(T, W1, W2) + d1 = dim(W1) + d2 = dim(W2) + AMDGPU.@allowscalar begin + At1 = reshape(convert(ROCArray, t1), d1, d1) + At2 = reshape(convert(ROCArray, t2), d2, d2) + At = reshape(convert(ROCArray, t), d1, d2) + end + @test ad(t1 * t) ≈ ad(t1) * ad(t) + @test ad(t1' * t) ≈ ad(t1)' * ad(t) + @test ad(t2 * t') ≈ ad(t2) * ad(t)' + @test ad(t2' * t') ≈ ad(t2)' * ad(t)' + @test ad(inv(t1)) ≈ inv(ad(t1)) + # pinv not available in AMDGPU + #@test ad(pinv(t)) ≈ pinv(ad(t)) + + if T == Float32 || T == ComplexF32 + continue + end + + @test ad(t1 \ t) ≈ ad(t1) \ ad(t) + @test ad(t1' \ t) ≈ ad(t1)' \ ad(t) + @test ad(t2 \ t') ≈ ad(t2) \ ad(t)' + @test ad(t2' \ t') ≈ ad(t2)' \ ad(t)' + + @test ad(t2 / t) ≈ ad(t2) / ad(t) + @test ad(t2' / t) ≈ ad(t2)' / ad(t) + @test ad(t1 / t') ≈ ad(t1) / ad(t)' + @test ad(t1' / t') ≈ ad(t1)' / ad(t)' + end + end + end + #= + @timedtestset "diag/diagm" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + t = AMDGPU.randn(ComplexF64, W) + d = LinearAlgebra.diag(t) + D = LinearAlgebra.diagm(codomain(t), domain(t), d) + @test LinearAlgebra.isdiag(D) + @test LinearAlgebra.diag(D) == d + end=# # TODO + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Tensor functions" begin + W = V1 ⊗ V2 + for T in (Float64, ComplexF64) + t = AMDGPU.randn(T, W, W) + s = dim(W) + AMDGPU.@allowscalar begin + expt = @constinferred exp(t) + @test ad(expt) ≈ exp(ad(t)) + + @test (@constinferred sqrt(t))^2 ≈ t + @test ad(sqrt(t)) ≈ sqrt(ad(t)) + + @test exp(@constinferred log(expt)) ≈ expt + @test ad(log(expt)) ≈ log(ad(expt)) + + @test (@constinferred cos(t))^2 + (@constinferred sin(t))^2 ≈ + id(storagetype(t), W) + @test (@constinferred tan(t)) ≈ sin(t) / cos(t) + @test (@constinferred cot(t)) ≈ cos(t) / sin(t) + @test (@constinferred cosh(t))^2 - (@constinferred sinh(t))^2 ≈ + id(storagetype(t), W) + @test (@constinferred tanh(t)) ≈ sinh(t) / cosh(t) + @test (@constinferred coth(t)) ≈ cosh(t) / sinh(t) + + t1 = sin(t) + @test sin(@constinferred asin(t1)) ≈ t1 + t2 = cos(t) + @test cos(@constinferred acos(t2)) ≈ t2 + t3 = sinh(t) + @test sinh(@constinferred asinh(t3)) ≈ t3 + t4 = cosh(t) + @test cosh(@constinferred acosh(t4)) ≈ t4 + t5 = tan(t) + @test tan(@constinferred atan(t5)) ≈ t5 + t6 = cot(t) + @test cot(@constinferred acot(t6)) ≈ t6 + t7 = tanh(t) + @test tanh(@constinferred atanh(t7)) ≈ t7 + t8 = coth(t) + @test coth(@constinferred acoth(t8)) ≈ t8 + end + end + end + end + # Sylvester not defined for AMDGPU + # @timedtestset "Sylvester equation" begin + # for T in (Float32, ComplexF64) + # tA = AMDGPU.rand(T, V1 ⊗ V3, V1 ⊗ V3) + # tB = AMDGPU.rand(T, V2 ⊗ V4, V2 ⊗ V4) + # tA = 3 // 2 * leftorth(tA; alg=Polar())[1] + # tB = 1 // 5 * leftorth(tB; alg=Polar())[1] + # tC = AMDGPU.rand(T, V1 ⊗ V3, V2 ⊗ V4) + # t = @constinferred sylvester(tA, tB, tC) + # @test codomain(t) == V1 ⊗ V3 + # @test domain(t) == V2 ⊗ V4 + # @test norm(tA * t + t * tB + tC) < + # (norm(tA) + norm(tB) + norm(tC)) * eps(real(T))^(2 / 3) + # if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + # matrix(x) = reshape(convert(Array, x), dim(codomain(x)), dim(domain(x))) + # @test matrix(t) ≈ sylvester(matrix(tA), matrix(tB), matrix(tC)) + # end + # end + # end + # + # TODO + @timedtestset "Tensor product: test via norm preservation" begin + for T in (Float32, ComplexF64) + t1 = AMDGPU.rand(T, V2 ⊗ V3 ⊗ V1, V1 ⊗ V2) + t2 = AMDGPU.rand(T, V2 ⊗ V1 ⊗ V3, V1 ⊗ V1) + AMDGPU.@allowscalar begin + t = @constinferred (t1 ⊗ t2) + end + @test norm(t) ≈ norm(t1) * norm(t2) + end + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Tensor product: test via conversion" begin + for T in (Float32, ComplexF64) + t1 = AMDGPU.rand(T, V2 ⊗ V3 ⊗ V1, V1) + t2 = AMDGPU.rand(T, V2 ⊗ V1 ⊗ V3, V2) + AMDGPU.@allowscalar begin + t = @constinferred (t1 ⊗ t2) + end + d1 = dim(codomain(t1)) + d2 = dim(codomain(t2)) + d3 = dim(domain(t1)) + d4 = dim(domain(t2)) + At = ad(t) + AMDGPU.@allowscalar begin + @test ad(t) ≈ ad(t1) ⊗ ad(t2) + end + end + end + end + @timedtestset "Tensor product: test via tensor contraction" begin + for T in (Float32, ComplexF64) + t1 = AMDGPU.rand(T, V2 ⊗ V3 ⊗ V1) + t2 = AMDGPU.rand(T, V2 ⊗ V1 ⊗ V3) + AMDGPU.@allowscalar begin + t = @constinferred (t1 ⊗ t2) + @tensor t′[1, 2, 3, 4, 5, 6] := t1[1, 2, 3] * t2[4, 5, 6] + @test t ≈ t′ + end + end + end + @timedtestset "Tensor absorpsion" begin + # absorbing small into large + t1 = AMDGPU.zeros(V1 ⊕ V1, V2 ⊗ V3) + t2 = AMDGPU.rand(V1, V2 ⊗ V3) + AMDGPU.@allowscalar begin + t3 = @constinferred absorb(t1, t2) + end + @test norm(t3) ≈ norm(t2) + @test norm(t1) == 0 + AMDGPU.@allowscalar begin + t4 = @constinferred absorb!(t1, t2) + end + @test t1 === t4 + @test t3 ≈ t4 + + # absorbing large into small + t1 = AMDGPU.rand(V1 ⊕ V1, V2 ⊗ V3) + t2 = AMDGPU.zeros(V1, V2 ⊗ V3) + AMDGPU.@allowscalar begin + t3 = @constinferred absorb(t2, t1) + end + @test norm(t3) < norm(t1) + @test norm(t2) == 0 + AMDGPU.@allowscalar begin + t4 = @constinferred absorb!(t2, t1) + end + @test t2 === t4 + @test t3 ≈ t4 + end + end + TensorKit.empty_globalcaches!() +end + +@timedtestset "Deligne tensor product: test via conversion" begin + Vlists1 = (Vtr,) # VSU₂) + Vlists2 = (Vtr,) # Vℤ₂) + @testset for Vlist1 in Vlists1, Vlist2 in Vlists2 + V1, V2, V3, V4, V5 = Vlist1 + W1, W2, W3, W4, W5 = Vlist2 + for T in (Float32, ComplexF64) + t1 = AMDGPU.rand(T, V1 ⊗ V2, V3' ⊗ V4) + t2 = AMDGPU.rand(T, W2, W1 ⊗ W1') + t = @constinferred (t1 ⊠ t2) + d1 = dim(codomain(t1)) + d2 = dim(codomain(t2)) + d3 = dim(domain(t1)) + d4 = dim(domain(t2)) + @test ad(t1) ⊠ ad(t2) ≈ ad(t1 ⊠ t2) + end + end +end diff --git a/test/cuda/factorizations.jl b/test/cuda/factorizations.jl new file mode 100644 index 00000000..c72df450 --- /dev/null +++ b/test/cuda/factorizations.jl @@ -0,0 +1,385 @@ +const CUDAExt = Base.get_extension(TensorKit, :TensorKitCUDAExt) +@assert !isnothing(CUDAExt) +# const CuTensorMap{T,S,N1,N2,I,A} = CUDAExt.CuTensorMap{T,S,N1,N2,I,A} +const CuTensorMap = getglobal(CUDAExt, :CuTensorMap) +const CuDiagonalTensorMap = getglobal(CUDAExt, :CuDiagonalTensorMap) + +spacelist = try + if ENV["CI"] == "true" + println("Detected running on CI") + if Sys.iswindows() + (Vtr, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂) + elseif Sys.isapple() + (Vtr, Vℤ₃, VfU₁, VfSU₂) + else + (Vtr, VU₁, VCU₁, VSU₂, VfSU₂) + end + else + (Vtr, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VfSU₂) + end +catch + (Vtr, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VfSU₂) +end + +import CUDA: rand as curand, randn as curandn + + +eltypes = (Float32, ComplexF64) + +for V in spacelist + I = sectortype(first(V)) + Istr = TensorKit.type_repr(I) + println("---------------------------------------") + println("Factorizations with symmetry: $Istr") + println("---------------------------------------") + @timedtestset "Factorizations with symmetry: $Istr" verbose = true begin + V1, V2, V3, V4, V5 = V + W = V1 ⊗ V2 + @testset "QR decomposition" begin + for T in eltypes, + t in ( + curand(T, W, W), curand(T, W, W)', curand(T, W, V1), curand(T, V1, W)', + CuDiagonalTensorMap(curand(T, reduceddim(V1)), V1), + ) + + Q, R = @constinferred qr_full(t) + @test Q * R ≈ t + @test isunitary(Q) + + Q, R = @constinferred qr_compact(t) + @test Q * R ≈ t + @test isisometric(Q) + + Q, R = @constinferred left_orth(t; kind = :qr) + @test Q * R ≈ t + @test isisometric(Q) + + N = @constinferred qr_null(t) + @test isisometric(N) + @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t)) + + N = @constinferred left_null(t; kind = :qr) + @test isisometric(N) + @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t)) + end + + # empty tensor + for T in eltypes + t = curand(T, V1 ⊗ V2, zero(V1)) + + Q, R = @constinferred qr_full(t) + @test Q * R ≈ t + @test isunitary(Q) + @test dim(R) == dim(t) == 0 + + Q, R = @constinferred qr_compact(t) + @test Q * R ≈ t + @test isisometric(Q) + @test dim(Q) == dim(R) == dim(t) + + Q, R = @constinferred left_orth(t; kind = :qr) + @test Q * R ≈ t + @test isisometric(Q) + @test dim(Q) == dim(R) == dim(t) + + N = @constinferred qr_null(t) + @test isunitary(N) + @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t)) + end + end + @testset "LQ decomposition" begin + for T in eltypes, + t in ( + curand(T, W, W), curand(T, W, W)', curand(T, W, V1), curand(T, V1, W)', + CuDiagonalTensorMap(curand(T, reduceddim(V1)), V1), + ) + + L, Q = @constinferred lq_full(t) + @test L * Q ≈ t + @test isunitary(Q) + + L, Q = @constinferred lq_compact(t) + @test L * Q ≈ t + @test isisometric(Q; side = :right) + + L, Q = @constinferred right_orth(t; kind = :lq) + @test L * Q ≈ t + @test isisometric(Q; side = :right) + + Nᴴ = @constinferred lq_null(t) + @test isisometric(Nᴴ; side = :right) + @test norm(t * Nᴴ') ≈ 0 atol = 100 * eps(norm(t)) + end + + for T in eltypes + # empty tensor + t = curand(T, zero(V1), V1 ⊗ V2) + + L, Q = @constinferred lq_full(t) + @test L * Q ≈ t + @test isunitary(Q) + @test dim(L) == dim(t) == 0 + + L, Q = @constinferred lq_compact(t) + @test L * Q ≈ t + @test isisometric(Q; side = :right) + @test dim(Q) == dim(L) == dim(t) + + L, Q = @constinferred right_orth(t; kind = :lq) + @test L * Q ≈ t + @test isisometric(Q; side = :right) + @test dim(Q) == dim(L) == dim(t) + + Nᴴ = @constinferred lq_null(t) + @test isunitary(Nᴴ) + @test norm(t * Nᴴ') ≈ 0 atol = 100 * eps(norm(t)) + end + end + @testset "Polar decomposition" begin + @testset for T in eltypes, + t in ( + curand(T, W, W), + curand(T, W, W)', + curand(T, W, V1), + curand(T, V1, W)', + CuDiagonalTensorMap(curand(T, reduceddim(V1)), V1), + ) + + @assert domain(t) ≾ codomain(t) + w, p = @constinferred left_polar(t) + @test w * p ≈ t + @test isisometric(w) + # broken for T <: Complex + @test isposdef(p) + + w, p = @constinferred left_orth(t; kind = :polar) + @test w * p ≈ t + @test isisometric(w) + end + + @testset for T in eltypes, + t in ( + curand(T, W, W), curand(T, W, W)', curand(T, V1, W), curand(T, W, V1)', + CuDiagonalTensorMap(curand(T, reduceddim(V1)), V1), + ) + + @assert codomain(t) ≾ domain(t) + p, wᴴ = @constinferred right_polar(t) + @test p * wᴴ ≈ t + @test isisometric(wᴴ; side = :right) + @test isposdef(p) + + p, wᴴ = @constinferred right_orth(t; kind = :polar) + @test p * wᴴ ≈ t + @test isisometric(wᴴ; side = :right) + end + end + @testset "SVD" begin + for T in eltypes, + t in ( + curand(T, W, W), curand(T, W, W)', + curand(T, W, V1), curand(T, V1, W), + curand(T, W, V1)', curand(T, V1, W)', + CuDiagonalTensorMap(curand(T, reduceddim(V1)), V1), + ) + + u, s, vᴴ = @constinferred svd_full(t) + @test u * s * vᴴ ≈ t + @test isunitary(u) + @test isunitary(vᴴ) + + u, s, vᴴ = @constinferred svd_compact(t) + @test u * s * vᴴ ≈ t + @test isisometric(u) + @test isposdef(s) + @test isisometric(vᴴ; side = :right) + + s′ = LinearAlgebra.diag(s) + for (c, b) in LinearAlgebra.svdvals(t) + @test b ≈ s′[c] + end + + v, c = @constinferred left_orth(t; kind = :svd) + @test v * c ≈ t + @test isisometric(v) + + N = @constinferred left_null(t; kind = :svd) + @test isisometric(N) + @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t)) + + Nᴴ = @constinferred right_null(t; kind = :svd) + @test isisometric(Nᴴ; side = :right) + @test norm(t * Nᴴ') ≈ 0 atol = 100 * eps(norm(t)) + end + + # empty tensor + for T in eltypes, t in (curand(T, W, zero(V1)), curand(T, zero(V1), W)) + U, S, Vᴴ = @constinferred svd_full(t) + @test U * S * Vᴴ ≈ t + @test isunitary(U) + @test isunitary(Vᴴ) + + U, S, Vᴴ = @constinferred svd_compact(t) + @test U * S * Vᴴ ≈ t + @test dim(U) == dim(S) == dim(Vᴴ) == dim(t) == 0 + end + end + + @testset "truncated SVD" begin + for T in eltypes, + t in ( + curandn(T, W, W), curandn(T, W, W)', + curandn(T, W, V1), curandn(T, V1, W), + curandn(T, W, V1)', curandn(T, V1, W)', + CuDiagonalTensorMap(curandn(T, reduceddim(V1)), V1), + ) + + @constinferred normalize!(t) + + U, S, Vᴴ = @constinferred svd_trunc(t; trunc = notrunc()) + @test U * S * Vᴴ ≈ t + @test isisometric(U) + @test isisometric(Vᴴ; side = :right) + + trunc = truncrank(dim(domain(S)) ÷ 2) + CUDA.@allowscalar begin + U1, S1, Vᴴ1 = @constinferred svd_trunc(t; trunc) + end + @test t * Vᴴ1' ≈ U1 * S1 + @test isisometric(U1) + @test isisometric(Vᴴ1; side = :right) + @test dim(domain(S1)) <= trunc.howmany + + λ = minimum(minimum, values(LinearAlgebra.diag(S1))) + trunc = trunctol(; atol = λ - 10eps(λ)) + CUDA.@allowscalar begin + U2, S2, Vᴴ2 = @constinferred svd_trunc(t; trunc) + end + @test t * Vᴴ2' ≈ U2 * S2 + @test isisometric(U2) + @test isisometric(Vᴴ2; side = :right) + @test minimum(minimum, values(LinearAlgebra.diag(S1))) >= λ + @test U2 ≈ U1 + @test S2 ≈ S1 + @test Vᴴ2 ≈ Vᴴ1 + + trunc = truncspace(space(S2, 1)) + CUDA.@allowscalar begin + U3, S3, Vᴴ3 = @constinferred svd_trunc(t; trunc) + end + @test t * Vᴴ3' ≈ U3 * S3 + @test isisometric(U3) + @test isisometric(Vᴴ3; side = :right) + @test space(S3, 1) ≾ space(S2, 1) + + trunc = truncerror(; atol = 0.5) + CUDA.@allowscalar begin + U4, S4, Vᴴ4 = @constinferred svd_trunc(t; trunc) + end + @test t * Vᴴ4' ≈ U4 * S4 + @test isisometric(U4) + @test isisometric(Vᴴ4; side = :right) + @test norm(t - U4 * S4 * Vᴴ4) <= 0.5 + end + end + + @testset "Eigenvalue decomposition" begin + for T in eltypes, + t in ( + curand(T, V1, V1), + curand(T, W, W), + curand(T, W, W)', + CuDiagonalTensorMap(curand(T, reduceddim(V1)), V1), + ) + + d, v = @constinferred eig_full(t) + @test t * v ≈ v * d + + d′ = LinearAlgebra.diag(d) + for (c, b) in LinearAlgebra.eigvals(t) + @test sort(b; by = abs) ≈ sort(d′[c]; by = abs) + end + + vdv = v' * v + vdv = (vdv + vdv') / 2 + @test @constinferred isposdef(vdv) + t isa CuDiagonalTensorMap || @test !isposdef(t) # unlikely for non-hermitian map + + CUDA.@allowscalar begin # TODO + d, v = @constinferred eig_trunc(t; trunc = truncrank(dim(domain(t)) ÷ 2)) + end + @test t * v ≈ v * d + @test dim(domain(d)) ≤ dim(domain(t)) ÷ 2 + + t2 = (t + t') + D, V = eigen(t2) + @test isisometric(V) + D̃, Ṽ = @constinferred eigh_full(t2) + @test D ≈ D̃ + @test V ≈ Ṽ + λ = minimum( + minimum(real(LinearAlgebra.diag(b))) + for (c, b) in blocks(D) + ) + @test cond(Ṽ) ≈ one(real(T)) + @test isposdef(t2) == isposdef(λ) + @test isposdef(t2 - λ * one(t2) + 0.1 * one(t2)) + @test !isposdef(t2 - λ * one(t2) - 0.1 * one(t2)) + + add!(t, t') + + d, v = @constinferred eigh_full(t) + @test t * v ≈ v * d + @test isunitary(v) + + λ = minimum(minimum(real(LinearAlgebra.diag(b))) for (c, b) in blocks(d)) + @test cond(v) ≈ one(real(T)) + @test isposdef(t) == isposdef(λ) + @test isposdef(t - λ * one(t) + 0.1 * one(t)) + @test !isposdef(t - λ * one(t) - 0.1 * one(t)) + + CUDA.@allowscalar begin + d, v = @constinferred eigh_trunc(t; trunc = truncrank(dim(domain(t)) ÷ 2)) + end + @test t * v ≈ v * d + @test dim(domain(d)) ≤ dim(domain(t)) ÷ 2 + end + end + + @testset "Condition number and rank" begin + for T in eltypes, + t in ( + curand(T, W, W), curand(T, W, W)', + curand(T, W, V1), curand(T, V1, W), + curand(T, W, V1)', curand(T, V1, W)', + CuDiagonalTensorMap(curand(T, reduceddim(V1)), V1), + ) + + d1, d2 = dim(codomain(t)), dim(domain(t)) + @test rank(t) == min(d1, d2) + M = left_null(t) + @test @constinferred(rank(M)) + rank(t) == d1 + Mᴴ = right_null(t) + @test rank(Mᴴ) + rank(t) == d2 + end + for T in eltypes + u = unitary(T, V1 ⊗ V2, V1 ⊗ V2) + @test @constinferred(cond(u)) ≈ one(real(T)) + @test @constinferred(rank(u)) == dim(V1 ⊗ V2) + + t = curand(T, zero(V1), W) + @test rank(t) == 0 + t2 = curand(T, zero(V1) * zero(V2), zero(V1) * zero(V2)) + @test rank(t2) == 0 + @test cond(t2) == 0.0 + end + for T in eltypes, t in (curand(T, W, W), curand(T, W, W)') + add!(t, t') + vals = @constinferred LinearAlgebra.eigvals(t) + λmax = maximum(s -> maximum(abs, s), values(vals)) + λmin = minimum(s -> minimum(abs, s), values(vals)) + @test cond(t) ≈ λmax / λmin + end + end + end +end diff --git a/test/cuda/tensors.jl b/test/cuda/tensors.jl new file mode 100644 index 00000000..704b63cd --- /dev/null +++ b/test/cuda/tensors.jl @@ -0,0 +1,625 @@ +using Adapt, CUDA, cuTENSOR +ad = adapt(Array) +const CUDAExt = Base.get_extension(TensorKit, :TensorKitCUDAExt) +@assert !isnothing(CUDAExt) +const CuTensorMap = getglobal(CUDAExt, :CuTensorMap) +const CuDiagonalTensorMap = getglobal(CUDAExt, :CuDiagonalTensorMap) + +for V in (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VfSU₂) #, VSU₃) + V1, V2, V3, V4, V5 = V + @assert V3 * V4 * V2 ≿ V1' * V5' # necessary for leftorth tests + @assert V3 * V4 ≾ V1' * V2' * V5' # necessary for rightorth tests +end + +spacelist = try + if ENV["CI"] == "true" + println("Detected running on CI") + if Sys.iswindows() + (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂) + elseif Sys.isapple() + (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VfU₁, VfSU₂) #, VSU₃) + else + (Vtr, Vℤ₂, Vfℤ₂, VU₁, VCU₁, VSU₂, VfSU₂) #, VSU₃) + end + else + (Vtr, VU₁, VSU₂, Vfℤ₂) + end +catch + (Vtr, Vℤ₂, Vfℤ₂, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VfSU₂) #, VSU₃) +end + +for V in spacelist + I = sectortype(first(V)) + Istr = TensorKit.type_repr(I) + println("---------------------------------------") + println("CUDA Tensors with symmetry: $Istr") + println("---------------------------------------") + @timedtestset "Tensors with symmetry: $Istr" verbose = true begin + V1, V2, V3, V4, V5 = V + @timedtestset "Basic tensor properties" begin + W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 + for T in (Int, Float32, Float64, ComplexF32, ComplexF64) + t = @constinferred CUDA.zeros(T, W) + CUDA.@allowscalar begin + @test @constinferred(hash(t)) == hash(deepcopy(t)) + end + @test scalartype(t) == T + @test norm(t) == 0 + @test codomain(t) == W + @test space(t) == (W ← one(W)) + @test domain(t) == one(W) + @test typeof(t) == TensorMap{T, spacetype(t), 5, 0, CuVector{T, CUDA.DeviceMemory}} + # blocks + bs = @constinferred blocks(t) + (c, b1), state = @constinferred Nothing iterate(bs) + @test c == first(blocksectors(W)) + next = @constinferred Nothing iterate(bs, state) + b2 = @constinferred block(t, first(blocksectors(t))) + @test b1 == b2 + @test_broken eltype(bs) === Pair{typeof(c), typeof(b1)} + @test_broken typeof(b1) === TensorKit.blocktype(t) + @test typeof(c) === sectortype(t) + end + end + @timedtestset "Tensor Dict conversion" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + for T in (Int, Float32, ComplexF64) + t = @constinferred CUDA.rand(T, W) + d = convert(Dict, t) + @test t == convert(CuTensorMap, d) + end + end + if hasfusiontensor(I) || I == Trivial + @timedtestset "Tensor Array conversion" begin + W1 = V1 ← one(V1) + W2 = one(V2) ← V2 + W3 = V1 ⊗ V2 ← one(V1) + W4 = V1 ← V2 + W5 = one(V1) ← V1 ⊗ V2 + W6 = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + for W in (W1, W2, W3, W4, W5, W6) + for T in (Int, Float32, ComplexF64) + if T == Int + t = CuTensorMap{T}(undef, W) + for (_, b) in blocks(t) + CUDA.@allowscalar CUDA.rand!(b, -20:20) + end + else + t = @constinferred CUDA.randn(T, W) + end + #=CUDA.@allowscalar begin + a = @constinferred convert(CuArray, t) + end + b = reshape(a, dim(codomain(W)), dim(domain(W))) + @test t ≈ @constinferred CuTensorMap(a, W) + @test t ≈ @constinferred CuTensorMap(b, W) + @test t === @constinferred CuTensorMap(t.data, W)=# # TODO convert(CuArray broken + end + end + for T in (Int, Float32, ComplexF64) + t = CUDA.randn(T, V1 ⊗ V2 ← zero(V1)) + CUDA.@allowscalar begin + a = convert(CuArray, t) + end + @test norm(a) == 0 + end + end + end + @timedtestset "Basic linear algebra" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + for T in (Float32, ComplexF64) + t = @constinferred CUDA.rand(T, W) + @test scalartype(t) == T + @test space(t) == W + @test space(t') == W' + @test dim(t) == dim(space(t)) + @test codomain(t) == codomain(W) + @test domain(t) == domain(W) + # blocks for adjoint + bs = @constinferred blocks(t') + (c, b1), state = @constinferred Nothing iterate(bs) + @test c == first(blocksectors(W')) + next = @constinferred Nothing iterate(bs, state) + b2 = @constinferred block(t', first(blocksectors(t'))) + @test b1 == b2 + @test_broken eltype(bs) === Pair{typeof(c), typeof(b1)} + @test_broken typeof(b1) === TensorKit.blocktype(t') + @test typeof(c) === sectortype(t) + # linear algebra + @test isa(@constinferred(norm(t)), real(T)) + @test norm(t)^2 ≈ dot(t, t) + α = rand(T) + @test norm(α * t) ≈ abs(α) * norm(t) + @test norm(t + t, 2) ≈ 2 * norm(t, 2) + @test norm(t + t, 1) ≈ 2 * norm(t, 1) + @test norm(t + t, Inf) ≈ 2 * norm(t, Inf) + p = 3 * rand(Float64) + CUDA.@allowscalar begin + @test norm(t + t, p) ≈ 2 * norm(t, p) + end + @test norm(t) ≈ norm(t') + + t2 = @constinferred rand!(similar(t)) + β = rand(T) + #@test @constinferred(dot(β * t2, α * t)) ≈ conj(β) * α * conj(dot(t, t2)) # broken for Irrep[CU₁] + @test dot(β * t2, α * t) ≈ conj(β) * α * conj(dot(t, t2)) + @test dot(t2, t) ≈ conj(dot(t, t2)) + @test dot(t2, t) ≈ conj(dot(t2', t')) + @test dot(t2, t) ≈ dot(t', t2') + + i1 = @constinferred(isomorphism(T, V1 ⊗ V2, V2 ⊗ V1)) + i2 = @constinferred(isomorphism(CuVector{T}, V2 ⊗ V1, V1 ⊗ V2)) + CUDA.@allowscalar begin + #@test i1 * i2 == @constinferred(id(T, V1 ⊗ V2)) # TODO + #@test i2 * i1 == @constinferred(id(CuVector{T}, V2 ⊗ V1)) # TODO + w = @constinferred(isometry(T, V1 ⊗ (oneunit(V1) ⊕ oneunit(V1)), V1)) + @test dim(w) == 2 * dim(V1 ← V1) + @test w' * w == id(CuVector{T}, V1) + @test w * w' == (w * w')^2 + end + end + end + @timedtestset "Trivial space insertion and removal" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + for T in (Float32, ComplexF64) + t = @constinferred CUDA.rand(T, W) + t2 = @constinferred insertleftunit(t) + @test t2 == @constinferred insertrightunit(t) + @test numind(t2) == numind(t) + 1 + @test space(t2) == insertleftunit(space(t)) + @test scalartype(t2) === T + @test t.data === t2.data + @test @constinferred(removeunit(t2, $(numind(t2)))) == t + t3 = @constinferred insertleftunit(t; copy = true) + @test t3 == @constinferred insertrightunit(t; copy = true) + @test t.data !== t3.data + for (c, b) in blocks(t) + @test b == block(t3, c) + end + @test @constinferred(removeunit(t3, $(numind(t3)))) == t + t4 = @constinferred insertrightunit(t, 3; dual = true) + @test numin(t4) == numin(t) && numout(t4) == numout(t) + 1 + for (c, b) in blocks(t) + @test b == block(t4, c) + end + @test @constinferred(removeunit(t4, 4)) == t + t5 = @constinferred insertleftunit(t, 4; dual = true) + @test numin(t5) == numin(t) + 1 && numout(t5) == numout(t) + for (c, b) in blocks(t) + @test b == block(t5, c) + end + @test @constinferred(removeunit(t5, 4)) == t + end + end + if hasfusiontensor(I) + #=@timedtestset "Basic linear algebra: test via conversion" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + for T in (Float32, ComplexF64) + t = CUDA.rand(T, W) + t2 = @constinferred CUDA.rand!(similar(t)) + @test norm(t, 2) ≈ norm(convert(CuArray, t), 2) + @test dot(t2, t) ≈ dot(convert(CuArray, t2), convert(CuArray, t)) + α = rand(T) + @test convert(CuArray, α * t) ≈ α * convert(CuArray, t) + @test convert(CuArray, t + t) ≈ 2 * convert(CuArray, t) + end + end + @timedtestset "Real and imaginary parts" begin + W = V1 ⊗ V2 + for T in (Float64, ComplexF64, ComplexF32) + t = @constinferred CUDA.randn(T, W, W) + + tr = @constinferred real(t) + @test scalartype(tr) <: Real + @test real(convert(CuArray, t)) == convert(CuArray, tr) + + ti = @constinferred imag(t) + @test scalartype(ti) <: Real + @test imag(convert(CuArray, t)) == convert(CuArray, ti) + + tc = @inferred complex(t) + @test scalartype(tc) <: Complex + @test complex(convert(CuArray, t)) == convert(CuArray, tc) + + tc2 = @inferred complex(tr, ti) + @test tc2 ≈ tc + end + end=# # TODO convert(CuArray is broken + end + @timedtestset "Tensor conversion" begin + W = V1 ⊗ V2 + t = @constinferred CUDA.randn(W ← W) + @test typeof(convert(CuTensorMap, t')) == typeof(t) + tc = complex(t) + @test convert(typeof(tc), t) == tc + @test typeof(convert(typeof(tc), t)) == typeof(tc) + @test typeof(convert(typeof(tc), t')) == typeof(tc) + @test Base.promote_typeof(t, tc) == typeof(tc) + @test Base.promote_typeof(tc, t) == typeof(tc + t) + end + #=@timedtestset "diag/diagm" begin + W = V1 ⊗ V2 ⊗ V3 ← V4 ⊗ V5 + t = CUDA.randn(ComplexF64, W) + d = LinearAlgebra.diag(t) + # TODO find a way to use CUDA here + D = LinearAlgebra.diagm(codomain(t), domain(t), d) + @test LinearAlgebra.isdiag(D) + @test LinearAlgebra.diag(D) == d + end=# + @timedtestset "Permutations: test via inner product invariance" begin + W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 + t = CUDA.rand(ComplexF64, W) + t′ = CUDA.randn!(similar(t)) + for k in 0:5 + for p in permutations(1:5) + p1 = ntuple(n -> p[n], k) + p2 = ntuple(n -> p[k + n], 5 - k) + CUDA.@allowscalar begin + t2 = @constinferred permute(t, (p1, p2)) + t2 = permute(t, (p1, p2)) + @test norm(t2) ≈ norm(t) + t2′ = permute(t′, (p1, p2)) + @test dot(t2′, t2) ≈ dot(t′, t) ≈ dot(transpose(t2′), transpose(t2)) + end + end + + CUDA.@allowscalar begin + t3 = @constinferred repartition(t, $k) + t3 = repartition(t, k) + @test norm(t3) ≈ norm(t) + t3′ = @constinferred repartition!(similar(t3), t′) + @test norm(t3′) ≈ norm(t′) + @test dot(t′, t) ≈ dot(t3′, t3) + end + end + end + #=if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Permutations: test via conversion" begin + W = V1 ⊗ V2 ⊗ V3 ⊗ V4 ⊗ V5 + t = CUDA.rand(ComplexF64, W) + a = convert(CuArray, t) + for k in 0:5 + for p in permutations(1:5) + p1 = ntuple(n -> p[n], k) + p2 = ntuple(n -> p[k + n], 5 - k) + t2 = permute(t, (p1, p2)) + a2 = convert(CuArray, t2) + @test a2 ≈ permutedims(a, (p1..., p2...)) + @test convert(CuArray, transpose(t2)) ≈ + permutedims(a2, (5, 4, 3, 2, 1)) + end + + t3 = repartition(t, k) + a3 = convert(CuArray, t3) + @test a3 ≈ permutedims( + a, (ntuple(identity, k)..., reverse(ntuple(i -> i + k, 5 - k))...) + ) + end + end + end=# # convert(CuArray broken + @timedtestset "Full trace: test self-consistency" begin + t = CUDA.rand(ComplexF64, V1 ⊗ V2' ⊗ V2 ⊗ V1') + CUDA.@allowscalar begin + t2 = permute(t, ((1, 2), (4, 3))) + s = @constinferred tr(t2) + @test conj(s) ≈ tr(t2') + if !isdual(V1) + t2 = twist!(t2, 1) + end + if isdual(V2) + t2 = twist!(t2, 2) + end + ss = tr(t2) + @tensor s2 = t[a, b, b, a] + @tensor t3[a, b] := t[a, c, c, b] + @tensor s3 = t3[a, a] + end + @test ss ≈ s2 + @test ss ≈ s3 + end + @timedtestset "Partial trace: test self-consistency" begin + t = CUDA.rand(ComplexF64, V1 ⊗ V2' ⊗ V3 ⊗ V2 ⊗ V1' ⊗ V3') + @tensor t2[a, b] := t[c, d, b, d, c, a] + @tensor t4[a, b, c, d] := t[d, e, b, e, c, a] + @tensor t5[a, b] := t4[a, b, c, c] + @test t2 ≈ t5 + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Trace: test via conversion" begin + t = CUDA.rand(ComplexF64, V1 ⊗ V2' ⊗ V3 ⊗ V2 ⊗ V1' ⊗ V3') + CUDA.@allowscalar begin + @tensor t2[a, b] := t[c, d, b, d, c, a] + @tensor t3[a, b] := ad(t)[c, d, b, d, c, a] + end + @test t3 ≈ ad(t2) + end + end + @timedtestset "Trace and contraction" begin + t1 = CUDA.rand(ComplexF64, V1 ⊗ V2 ⊗ V3) + t2 = CUDA.rand(ComplexF64, V2' ⊗ V4 ⊗ V1') + CUDA.@allowscalar begin + t3 = t1 ⊗ t2 + @tensor ta[a, b] := t1[x, y, a] * t2[y, b, x] + @tensor tb[a, b] := t3[x, y, a, y, b, x] + end + @test ta ≈ tb + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Tensor contraction: test via conversion" begin + A1 = CUDA.randn(ComplexF64, V1' * V2', V3') + A2 = CUDA.randn(ComplexF64, V3 * V4, V5) + rhoL = CUDA.randn(ComplexF64, V1, V1) + rhoR = CUDA.randn(ComplexF64, V5, V5)' # test adjoint tensor + H = CUDA.randn(ComplexF64, V2 * V4, V2 * V4) + CUDA.@allowscalar begin + @tensor HrA12[a, s1, s2, c] := rhoL[a, a'] * conj(A1[a', t1, b]) * + A2[b, t2, c'] * rhoR[c', c] * + H[s1, s2, t1, t2] + + @tensor HrA12array[a, s1, s2, c] := ad(rhoL)[a, a'] * + conj(ad(A1)[a', t1, b]) * + ad(A2)[b, t2, c'] * + ad(rhoR)[c', c] * + ad(H)[s1, s2, t1, t2] + end + @test HrA12array ≈ ad(HrA12) + end + end + @timedtestset "Index flipping: test flipping inverse" begin + t = CUDA.rand(ComplexF64, V1 ⊗ V1' ← V1' ⊗ V1) + for i in 1:4 + CUDA.@allowscalar begin + @test t ≈ flip(flip(t, i), i; inv = true) + @test t ≈ flip(flip(t, i; inv = true), i) + end + end + end + #=@timedtestset "Index flipping: test via explicit flip" begin + t = CUDA.rand(ComplexF64, V1 ⊗ V1' ← V1' ⊗ V1) + F1 = unitary(flip(V1), V1) + + CUDA.@allowscalar begin + @tensor tf[a, b; c, d] := F1[a, a'] * t[a', b; c, d] + @test flip(t, 1) ≈ tf + @tensor tf[a, b; c, d] := conj(F1[b, b']) * t[a, b'; c, d] + @test twist!(flip(t, 2), 2) ≈ tf + @tensor tf[a, b; c, d] := F1[c, c'] * t[a, b; c', d] + @test flip(t, 3) ≈ tf + @tensor tf[a, b; c, d] := conj(F1[d, d']) * t[a, b; c, d'] + @test twist!(flip(t, 4), 4) ≈ tf + end + end=# # TODO + @timedtestset "Index flipping: test via contraction" begin + t1 = CUDA.rand(ComplexF64, V1 ⊗ V2 ⊗ V3 ← V4) + t2 = CUDA.rand(ComplexF64, V2' ⊗ V5 ← V4' ⊗ V1) + CUDA.@allowscalar begin + @tensor ta[a, b] := t1[x, y, a, z] * t2[y, b, z, x] + @tensor tb[a, b] := flip(t1, 1)[x, y, a, z] * flip(t2, 4)[y, b, z, x] + @test ta ≈ tb + @tensor tb[a, b] := flip(t1, (2, 4))[x, y, a, z] * flip(t2, (1, 3))[y, b, z, x] + @test ta ≈ tb + @tensor tb[a, b] := flip(t1, (1, 2, 4))[x, y, a, z] * flip(t2, (1, 3, 4))[y, b, z, x] + @tensor tb[a, b] := flip(t1, (1, 3))[x, y, a, z] * flip(t2, (2, 4))[y, b, z, x] + @test flip(ta, (1, 2)) ≈ tb + end + end + @timedtestset "Multiplication of isometries: test properties" begin + W2 = V4 ⊗ V5 + W1 = W2 ⊗ (oneunit(V1) ⊕ oneunit(V1)) + for T in (Float64, ComplexF64) + t1 = randisometry(T, CuMatrix{T}, W1, W2) + t2 = randisometry(T, CuMatrix{T}, W2 ← W2) + @test isisometric(t1) + @test isunitary(t2) + P = t1 * t1' + @test P * P ≈ P + end + end + @timedtestset "Multiplication and inverse: test compatibility" begin + W1 = V1 ⊗ V2 ⊗ V3 + W2 = V4 ⊗ V5 + for T in (Float64, ComplexF64) + t1 = CUDA.rand(T, W1, W1) + t2 = CUDA.rand(T, W2, W2) + t = CUDA.rand(T, W1, W2) + @test t1 * (t1 \ t) ≈ t + @test (t / t2) * t2 ≈ t + @test t1 \ one(t1) ≈ inv(t1) + # @test one(t1) / t1 ≈ pinv(t1) # pinv not available in CUDA + @test_throws SpaceMismatch inv(t) + @test_throws SpaceMismatch t2 \ t + @test_throws SpaceMismatch t / t1 + # pinv not available in CUDA + # tp = pinv(t) * t + # @test tp ≈ tp * tp + end + end + #=if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Multiplication and inverse: test via conversion" begin + W1 = V1 ⊗ V2 ⊗ V3 + W2 = V4 ⊗ V5 + for T in (Float32, Float64, ComplexF32, ComplexF64) + t1 = CUDA.rand(T, W1, W1) + t2 = CUDA.rand(T, W2, W2) + t = CUDA.rand(T, W1, W2) + d1 = dim(W1) + d2 = dim(W2) + At1 = reshape(convert(CuArray, t1), d1, d1) + At2 = reshape(convert(CuArray, t2), d2, d2) + At = reshape(convert(CuArray, t), d1, d2) + @test reshape(convert(CuArray, t1 * t), d1, d2) ≈ At1 * At + @test reshape(convert(CuArray, t1' * t), d1, d2) ≈ At1' * At + @test reshape(convert(CuArray, t2 * t'), d2, d1) ≈ At2 * At' + @test reshape(convert(CuArray, t2' * t'), d2, d1) ≈ At2' * At' + + @test reshape(convert(CuArray, inv(t1)), d1, d1) ≈ inv(At1) + @test reshape(convert(CuArray, pinv(t)), d2, d1) ≈ pinv(At) + + if T == Float32 || T == ComplexF32 + continue + end + + @test reshape(convert(CuArray, t1 \ t), d1, d2) ≈ At1 \ At + @test reshape(convert(CuArray, t1' \ t), d1, d2) ≈ At1' \ At + @test reshape(convert(CuArray, t2 \ t'), d2, d1) ≈ At2 \ At' + @test reshape(convert(CuArray, t2' \ t'), d2, d1) ≈ At2' \ At' + + @test reshape(convert(CuArray, t2 / t), d2, d1) ≈ At2 / At + @test reshape(convert(CuArray, t2' / t), d2, d1) ≈ At2' / At + @test reshape(convert(CuArray, t1 / t'), d1, d2) ≈ At1 / At' + @test reshape(convert(CuArray, t1' / t'), d1, d2) ≈ At1' / At' + end + end + end=# # convert(CuArray broken + @timedtestset "Tensor truncation" begin + for T in (Float32, ComplexF64) + for p in (1, 2, 3, Inf) + # Test both a normal tensor and an adjoint one. + ts = ( + CUDA.randn(T, V1 ⊗ V2 ⊗ V3, V4 ⊗ V5), + CUDA.randn(T, V4 ⊗ V5, V1 ⊗ V2 ⊗ V3)', + ) + for t in ts + U₀, S₀, V₀, = tsvd(t) + t = rmul!(t, 1 / norm(S₀, p)) + # Probably shouldn't allow truncerr and truncdim, as these require scalar indexing? + CUDA.@allowscalar begin + U, S, V = tsvd(t; trunc = truncbelow(1 / dim(domain(S₀))), p = p) + U′, S′, V′ = tsvd(t; trunc = truncspace(space(S, 1)), p = p) + end + @test (U, S, V) == (U′, S′, V′) + end + end + end + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Tensor functions" begin + W = V1 ⊗ V2 + for T in (Float64, ComplexF64) + t = CUDA.randn(T, W, W) + s = dim(W) + CUDA.@allowscalar begin + #=@test (@constinferred sqrt(t))^2 ≈ t + @test ad(sqrt(t)) ≈ sqrt(ad(t)) + + expt = @constinferred exp(t) + @test ad(expt) ≈ exp(ad(t)) + + @test exp(@constinferred log(expt)) ≈ expt + @test ad(log(expt)) ≈ log(ad(expt)) + + @test (@constinferred cos(t))^2 + (@constinferred sin(t))^2 ≈ + id(storagetype(t), W) + @test (@constinferred tan(t)) ≈ sin(t) / cos(t) + @test (@constinferred cot(t)) ≈ cos(t) / sin(t) + @test (@constinferred cosh(t))^2 - (@constinferred sinh(t))^2 ≈ + id(storagetype(t), W) + @test (@constinferred tanh(t)) ≈ sinh(t) / cosh(t) + @test (@constinferred coth(t)) ≈ cosh(t) / sinh(t) + + t1 = sin(t) + @test sin(@constinferred asin(t1)) ≈ t1 + t2 = cos(t) + @test cos(@constinferred acos(t2)) ≈ t2 + t3 = sinh(t) + @test sinh(@constinferred asinh(t3)) ≈ t3 + t4 = cosh(t) + @test cosh(@constinferred acosh(t4)) ≈ t4 + t5 = tan(t) + @test tan(@constinferred atan(t5)) ≈ t5 + t6 = cot(t) + @test cot(@constinferred acot(t6)) ≈ t6 + t7 = tanh(t) + @test tanh(@constinferred atanh(t7)) ≈ t7 + t8 = coth(t) + @test coth(@constinferred acoth(t8)) ≈ t8 + =# #exp not supported for CuArray + end + end + end + end + # Sylvester not defined for CUDA + # @timedtestset "Sylvester equation" begin + # for T in (Float32, ComplexF64) + # tA = CUDA.rand(T, V1 ⊗ V3, V1 ⊗ V3) + # tB = CUDA.rand(T, V2 ⊗ V4, V2 ⊗ V4) + # tA = 3 // 2 * leftorth(tA; alg=Polar())[1] + # tB = 1 // 5 * leftorth(tB; alg=Polar())[1] + # tC = CUDA.rand(T, V1 ⊗ V3, V2 ⊗ V4) + # t = @constinferred sylvester(tA, tB, tC) + # @test codomain(t) == V1 ⊗ V3 + # @test domain(t) == V2 ⊗ V4 + # @test norm(tA * t + t * tB + tC) < + # (norm(tA) + norm(tB) + norm(tC)) * eps(real(T))^(2 / 3) + # if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + # matrix(x) = reshape(convert(Array, x), dim(codomain(x)), dim(domain(x))) + # @test matrix(t) ≈ sylvester(matrix(tA), matrix(tB), matrix(tC)) + # end + # end + # end + # + # TODO + @timedtestset "Tensor product: test via norm preservation" begin + for T in (Float32, ComplexF64) + t1 = CUDA.rand(T, V2 ⊗ V3 ⊗ V1, V1 ⊗ V2) + t2 = CUDA.rand(T, V2 ⊗ V1 ⊗ V3, V1 ⊗ V1) + CUDA.@allowscalar begin + t = @constinferred (t1 ⊗ t2) + end + @test norm(t) ≈ norm(t1) * norm(t2) + end + end + if BraidingStyle(I) isa Bosonic && hasfusiontensor(I) + @timedtestset "Tensor product: test via conversion" begin + for T in (Float32, ComplexF64) + t1 = CUDA.rand(T, V2 ⊗ V3 ⊗ V1, V1) + t2 = CUDA.rand(T, V2 ⊗ V1 ⊗ V3, V2) + d1 = dim(codomain(t1)) + d2 = dim(codomain(t2)) + d3 = dim(domain(t1)) + d4 = dim(domain(t2)) + CUDA.@allowscalar begin + t = @constinferred (t1 ⊗ t2) + At = ad(t) + @test ad(t) ≈ ad(t1) ⊗ ad(t2) + end + end + end + end + @timedtestset "Tensor product: test via tensor contraction" begin + for T in (Float32, ComplexF64) + t1 = CUDA.rand(T, V2 ⊗ V3 ⊗ V1) + t2 = CUDA.rand(T, V2 ⊗ V1 ⊗ V3) + CUDA.@allowscalar begin + t = @constinferred (t1 ⊗ t2) + @tensor t′[1, 2, 3, 4, 5, 6] := t1[1, 2, 3] * t2[4, 5, 6] + # @test t ≈ t′ # TODO broken for symmetry: Irrep[ℤ₃] + end + end + end + end + TensorKit.empty_globalcaches!() +end + +@timedtestset "Deligne tensor product: test via conversion" begin + Vlists1 = (Vtr,) # VSU₂) + Vlists2 = (Vtr,) # Vℤ₂) + @testset for Vlist1 in Vlists1, Vlist2 in Vlists2 + V1, V2, V3, V4, V5 = Vlist1 + W1, W2, W3, W4, W5 = Vlist2 + for T in (Float32, ComplexF64) + t1 = CUDA.rand(T, V1 ⊗ V2, V3' ⊗ V4) + t2 = CUDA.rand(T, W2, W1 ⊗ W1') + CUDA.@allowscalar begin + t = @constinferred (t1 ⊠ t2) + end + d1 = dim(codomain(t1)) + d2 = dim(codomain(t2)) + d3 = dim(domain(t1)) + d4 = dim(domain(t2)) + CUDA.@allowscalar begin + @test ad(t1) ⊠ ad(t2) ≈ ad(t1 ⊠ t2) + end + end + end +end diff --git a/test/factorizations.jl b/test/factorizations.jl index 294cbdf6..aa03ffa4 100644 --- a/test/factorizations.jl +++ b/test/factorizations.jl @@ -40,18 +40,18 @@ for V in spacelist Q, R = @constinferred qr_compact(t) @test Q * R ≈ t - @test isisometry(Q) + @test isisometric(Q) Q, R = @constinferred left_orth(t; kind = :qr) @test Q * R ≈ t - @test isisometry(Q) + @test isisometric(Q) N = @constinferred qr_null(t) - @test isisometry(N) + @test isisometric(N) @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t)) N = @constinferred left_null(t; kind = :qr) - @test isisometry(N) + @test isisometric(N) @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t)) end @@ -66,12 +66,12 @@ for V in spacelist Q, R = @constinferred qr_compact(t) @test Q * R ≈ t - @test isisometry(Q) + @test isisometric(Q) @test dim(Q) == dim(R) == dim(t) Q, R = @constinferred left_orth(t; kind = :qr) @test Q * R ≈ t - @test isisometry(Q) + @test isisometric(Q) @test dim(Q) == dim(R) == dim(t) N = @constinferred qr_null(t) @@ -93,14 +93,14 @@ for V in spacelist L, Q = @constinferred lq_compact(t) @test L * Q ≈ t - @test isisometry(Q; side = :right) + @test isisometric(Q; side = :right) L, Q = @constinferred right_orth(t; kind = :lq) @test L * Q ≈ t - @test isisometry(Q; side = :right) + @test isisometric(Q; side = :right) Nᴴ = @constinferred lq_null(t) - @test isisometry(Nᴴ; side = :right) + @test isisometric(Nᴴ; side = :right) @test norm(t * Nᴴ') ≈ 0 atol = 100 * eps(norm(t)) end @@ -115,12 +115,12 @@ for V in spacelist L, Q = @constinferred lq_compact(t) @test L * Q ≈ t - @test isisometry(Q; side = :right) + @test isisometric(Q; side = :right) @test dim(Q) == dim(L) == dim(t) L, Q = @constinferred right_orth(t; kind = :lq) @test L * Q ≈ t - @test isisometry(Q; side = :right) + @test isisometric(Q; side = :right) @test dim(Q) == dim(L) == dim(t) Nᴴ = @constinferred lq_null(t) @@ -139,12 +139,12 @@ for V in spacelist @assert domain(t) ≾ codomain(t) w, p = @constinferred left_polar(t) @test w * p ≈ t - @test isisometry(w) + @test isisometric(w) @test isposdef(p) w, p = @constinferred left_orth(t; kind = :polar) @test w * p ≈ t - @test isisometry(w) + @test isisometric(w) end for T in eltypes, @@ -153,12 +153,12 @@ for V in spacelist @assert codomain(t) ≾ domain(t) p, wᴴ = @constinferred right_polar(t) @test p * wᴴ ≈ t - @test isisometry(wᴴ; side = :right) + @test isisometric(wᴴ; side = :right) @test isposdef(p) p, wᴴ = @constinferred right_orth(t; kind = :polar) @test p * wᴴ ≈ t - @test isisometry(wᴴ; side = :right) + @test isisometric(wᴴ; side = :right) end end @@ -178,9 +178,9 @@ for V in spacelist u, s, vᴴ = @constinferred svd_compact(t) @test u * s * vᴴ ≈ t - @test isisometry(u) + @test isisometric(u) @test isposdef(s) - @test isisometry(vᴴ; side = :right) + @test isisometric(vᴴ; side = :right) s′ = LinearAlgebra.diag(s) for (c, b) in LinearAlgebra.svdvals(t) @@ -189,14 +189,14 @@ for V in spacelist v, c = @constinferred left_orth(t; kind = :svd) @test v * c ≈ t - @test isisometry(v) + @test isisometric(v) N = @constinferred left_null(t; kind = :svd) - @test isisometry(N) + @test isisometric(N) @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t)) Nᴴ = @constinferred right_null(t; kind = :svd) - @test isisometry(Nᴴ; side = :right) + @test isisometric(Nᴴ; side = :right) @test norm(t * Nᴴ') ≈ 0 atol = 100 * eps(norm(t)) end @@ -226,22 +226,22 @@ for V in spacelist U, S, Vᴴ = @constinferred svd_trunc(t; trunc = notrunc()) @test U * S * Vᴴ ≈ t - @test isisometry(U) - @test isisometry(Vᴴ; side = :right) + @test isisometric(U) + @test isisometric(Vᴴ; side = :right) trunc = truncrank(dim(domain(S)) ÷ 2) U1, S1, Vᴴ1 = @constinferred svd_trunc(t; trunc) @test t * Vᴴ1' ≈ U1 * S1 - @test isisometry(U1) - @test isisometry(Vᴴ1; side = :right) + @test isisometric(U1) + @test isisometric(Vᴴ1; side = :right) @test dim(domain(S1)) <= trunc.howmany λ = minimum(minimum, values(LinearAlgebra.diag(S1))) trunc = trunctol(; atol = λ - 10eps(λ)) U2, S2, Vᴴ2 = @constinferred svd_trunc(t; trunc) @test t * Vᴴ2' ≈ U2 * S2 - @test isisometry(U2) - @test isisometry(Vᴴ2; side = :right) + @test isisometric(U2) + @test isisometric(Vᴴ2; side = :right) @test minimum(minimum, values(LinearAlgebra.diag(S1))) >= λ @test U2 ≈ U1 @test S2 ≈ S1 @@ -250,15 +250,15 @@ for V in spacelist trunc = truncspace(space(S2, 1)) U3, S3, Vᴴ3 = @constinferred svd_trunc(t; trunc) @test t * Vᴴ3' ≈ U3 * S3 - @test isisometry(U3) - @test isisometry(Vᴴ3; side = :right) + @test isisometric(U3) + @test isisometric(Vᴴ3; side = :right) @test space(S3, 1) ≾ space(S2, 1) trunc = truncerror(; atol = 0.5) U4, S4, Vᴴ4 = @constinferred svd_trunc(t; trunc) @test t * Vᴴ4' ≈ U4 * S4 - @test isisometry(U4) - @test isisometry(Vᴴ4; side = :right) + @test isisometric(U4) + @test isisometric(Vᴴ4; side = :right) @test norm(t - U4 * S4 * Vᴴ4) <= 0.5 trunc = truncrank(dim(domain(S)) ÷ 2) & trunctol(; atol = λ - 10eps(λ)) @@ -297,7 +297,7 @@ for V in spacelist t2 = (t + t') D, V = eigen(t2) - @test isisometry(V) + @test isisometric(V) D̃, Ṽ = @constinferred eigh_full(t2) @test D ≈ D̃ @test V ≈ Ṽ diff --git a/test/runtests.jl b/test/runtests.jl index 3ae62e9b..3adeafbe 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -16,10 +16,6 @@ const TK = TensorKit Random.seed!(1234) -# don't run all tests on GPU, only the GPU -# specific ones -is_buildkite = get(ENV, "BUILDKITE", "false") == "true" - smallset(::Type{I}) where {I <: Sector} = take(values(I), 5) function smallset(::Type{ProductSector{Tuple{I1, I2}}}) where {I1, I2} iter = product(smallset(I1), smallset(I2)) @@ -137,6 +133,9 @@ Vfib = ( Vect[FibonacciAnyon](:I => 2, :τ => 2), ) +# don't run all tests on GPU, only the GPU +# specific ones +is_buildkite = get(ENV, "BUILDKITE", "false") == "true" if !is_buildkite Ti = time() @time include("fusiontrees.jl") @@ -162,12 +161,18 @@ if !is_buildkite end else Ti = time() - #=using CUDA + println("---- BEGINNING GPU TESTS ----") + flush(stdout) + using CUDA, cuTENSOR if CUDA.functional() + @time include("cuda/tensors.jl") + @time include("cuda/factorizations.jl") end using AMDGPU if AMDGPU.functional() - end=# + @time include("amd/tensors.jl") + @time include("amd/factorizations.jl") + end Tf = time() printstyled( "Finished all GPU tests in ", diff --git a/test/tensors.jl b/test/tensors.jl index c0c1d90b..d4197323 100644 --- a/test/tensors.jl +++ b/test/tensors.jl @@ -355,7 +355,7 @@ for V in spacelist for T in (Float64, ComplexF64) t1 = randisometry(T, W1, W2) t2 = randisometry(T, W2 ← W2) - @test isisometry(t1) + @test isisometric(t1) @test isunitary(t2) P = t1 * t1' @test P * P ≈ P