QuantumKitHub
diff --git a/‎Project.toml‎
Lines changed: 21 additions & 2 deletions b/‎Project.toml‎
Lines changed: 21 additions & 2 deletions
diff --git a/‎ext/TensorKitAMDGPUExt/TensorKitAMDGPUExt.jl‎
Lines changed: 10 additions & 0 deletions b/‎ext/TensorKitAMDGPUExt/TensorKitAMDGPUExt.jl‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎ext/TensorKitAMDGPUExt/roctensormap.jl‎
Lines changed: 103 additions & 0 deletions b/‎ext/TensorKitAMDGPUExt/roctensormap.jl‎
Lines changed: 103 additions & 0 deletions
diff --git a/‎ext/TensorKitCUDAExt/TensorKitCUDAExt.jl‎
Lines changed: 105 additions & 0 deletions b/‎ext/TensorKitCUDAExt/TensorKitCUDAExt.jl‎
Lines changed: 105 additions & 0 deletions
@@ -18,19 +18,33 @@ TupleTools = "9d95972d-f1c8-5527-a6e0-b4b365fa01f6"
 VectorInterface = "409d34a3-91d5-4945-b6ec-7529ddf182d8"
 
 [weakdeps]
+AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
 FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
+cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1"
+
+[sources]
+GPUArrays = {rev = "ksh/more_diag", url = "https://github.com/JuliaGPU/GPUArrays.jl"}
+MatrixAlgebraKit = {rev = "ksh/tk", url = "https://github.com/QuantumKitHub/MatrixAlgebraKit.jl"}
+cuTENSOR = {subdir = "lib/cutensor", url = "https://github.com/JuliaGPU/CUDA.jl"}
 
 [extensions]
+TensorKitAMDGPUExt = "AMDGPU"
+TensorKitCUDAExt = ["CUDA", "cuTENSOR"]
 TensorKitChainRulesCoreExt = "ChainRulesCore"
 TensorKitFiniteDifferencesExt = "FiniteDifferences"
 
 [compat]
+AMDGPU = "2"
+Adapt = "4"
 Aqua = "0.6, 0.7, 0.8"
+CUDA = "5.8.4"
 ChainRulesCore = "1"
 ChainRulesTestUtils = "1"
 Combinatorics = "1"
 FiniteDifferences = "0.12"
+GPUArrays = "11.2.6"
 LRUCache = "1.0.2"
 LinearAlgebra = "1"
 MatrixAlgebraKit = "0.5.0"
@@ -39,26 +53,31 @@ PackageExtensionCompat = "1"
 Random = "1"
 ScopedValues = "1.3.0"
 Strided = "2"
-TensorKitSectors = "0.1.4, 0.2"
+TensorKitSectors = "0.3"
 TensorOperations = "5.1"
 Test = "1"
 TestExtras = "0.2,0.3"
 TupleTools = "1.1"
 VectorInterface = "0.4.8, 0.5"
 Zygote = "0.7"
+cuTENSOR = "2"
 julia = "1.10"
 
 [extras]
+Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
 Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
 ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a"
 Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
 FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
+GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 TensorOperations = "6aa20fa7-93e2-5fca-9bc0-fbd0db3c71a2"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 TestExtras = "5ed8adda-3752-4e41-b88a-e8b09835ee3a"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
+cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1"
 
 [targets]
-test = ["Aqua", "Combinatorics", "LinearAlgebra", "TensorOperations", "Test", "TestExtras", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote"]
+test = ["Adapt", "Aqua", "Combinatorics", "CUDA", "cuTENSOR", "GPUArrays", "LinearAlgebra", "TensorOperations", "Test", "TestExtras", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote"]
@@ -0,0 +1,10 @@
+module TensorKitAMDGPUExt
+
+using TensorKit
+using TensorKit: SectorDict
+using AMDGPU 
+using Random
+
+include("roctensormap.jl")
+
+end
@@ -0,0 +1,103 @@
+const _ROCMatOrDict{I,T} = Union{ROCMatrix{T},SectorDict{I,ROCMatrix{T}}}
+const ROCTensorMap{T,S,N₁,N₂,I,A<:_ROCMatOrDict{I,T}} = TensorMap{T,S,N₁,N₂,A}
+const ROCTensor{T, S, N, I, A <: _ROCMatOrDict{I, T}} = ROCTensorMap{T, S, N, 0, I, A}
+
+function ROCTensorMap{T}(::UndefInitializer, V::TensorMapSpace{S, N₁, N₂}) where {T, S, N₁, N₂}
+    A = ROCMatrix{T, AMDGPU.default_memory}
+    TT = tensormaptype{S, N₁, N₂, A}
+    return TT(undef, codomain(V), domain(V))
+end
+
+function ROCTensorMap{T}(::UndefInitializer, codomain::TensorSpace{S},
+                         domain::TensorSpace{S}) where {T,S}
+    return ROCTensorMap{T}(undef, codomain ← domain)
+end
+function ROCTensor{T}(::UndefInitializer, V::TensorSpace{S}) where {T,S}
+    return ROCTensorMap{T}(undef, V ← one(V))
+end
+
+for (fname, felt) in ((:zeros, :zero), (:ones, :one))
+    @eval begin
+        function AMDGPU.$fname(codomain::TensorSpace{S},
+                               domain::TensorSpace{S}=one(codomain)) where {S<:IndexSpace}
+            return AMDGPU.$fname(codomain ← domain)
+        end
+        function AMDGPU.$fname(::Type{T}, codomain::TensorSpace{S},
+                               domain::TensorSpace{S}=one(codomain)) where {T,S<:IndexSpace}
+            return AMDGPU.$fname(T, codomain ← domain)
+        end
+        AMDGPU.$fname(V::TensorMapSpace) = AMDGPU.$fname(Float64, V)
+        function AMDGPU.$fname(::Type{T}, V::TensorMapSpace) where {T}
+            t = ROCTensorMap{T}(undef, V)
+            fill!(t, $felt(T))
+            return t
+        end
+    end
+end
+
+for randfun in (:rand, :randn)
+    randfun! = Symbol(randfun, :!)
+    @eval begin
+        # converting `codomain` and `domain` into `HomSpace`
+        function AMDGPU.$randfun(codomain::TensorSpace{S},
+                                 domain::TensorSpace{S}) where {S<:IndexSpace}
+            return AMDGPU.$randfun(codomain ← domain)
+        end
+        function AMDGPU.$randfun(::Type{T}, codomain::TensorSpace{S},
+                                 domain::TensorSpace{S}) where {T,S<:IndexSpace}
+            return AMDGPU.$randfun(T, codomain ← domain)
+        end
+        function AMDGPU.$randfun(rng::Random.AbstractRNG, ::Type{T},
+                                 codomain::TensorSpace{S},
+                                 domain::TensorSpace{S}) where {T,S<:IndexSpace}
+            return AMDGPU.$randfun(rng, T, codomain ← domain)
+        end
+
+        # accepting single `TensorSpace`
+        AMDGPU.$randfun(codomain::TensorSpace) = AMDGPU.$randfun(codomain ← one(codomain))
+        function AMDGPU.$randfun(::Type{T}, codomain::TensorSpace) where {T}
+            return AMDGPU.$randfun(T, codomain ← one(codomain))
+        end
+        function AMDGPU.$randfun(rng::Random.AbstractRNG, ::Type{T},
+                                 codomain::TensorSpace) where {T}
+            return AMDGPU.$randfun(rng, T, codomain ← one(domain))
+        end
+
+        # filling in default eltype
+        AMDGPU.$randfun(V::TensorMapSpace) = AMDGPU.$randfun(Float64, V)
+        function AMDGPU.$randfun(rng::Random.AbstractRNG, V::TensorMapSpace)
+            return AMDGPU.$randfun(rng, Float64, V)
+        end
+
+        # filling in default rng
+        function AMDGPU.$randfun(::Type{T}, V::TensorMapSpace) where {T}
+            return AMDGPU.$randfun(Random.default_rng(), T, V)
+        end
+
+        # implementation
+        function AMDGPU.$randfun(rng::Random.AbstractRNG, ::Type{T},
+                               V::TensorMapSpace) where {T}
+            t = ROCTensorMap{T}(undef, V)
+            AMDGPU.$randfun!(rng, t)
+            return t
+        end
+    end
+end
+
+# converters
+# ----------
+function Base.convert(::Type{ROCTensorMap}, d::Dict{Symbol,Any})
+    try
+        codomain = eval(Meta.parse(d[:codomain]))
+        domain = eval(Meta.parse(d[:domain]))
+        data = SectorDict(eval(Meta.parse(c)) => ROCArray(b) for (c, b) in d[:data])
+        return TensorMap(data, codomain, domain)
+    catch e # sector unknown in TensorKit.jl; user-defined, hopefully accessible in Main
+        codomain = Base.eval(Main, Meta.parse(d[:codomain]))
+        domain = Base.eval(Main, Meta.parse(d[:domain]))
+        data = SectorDict(Base.eval(Main, Meta.parse(c)) => ROCArray(b)
+                          for (c, b) in d[:data])
+        return TensorMap(data, codomain, domain)
+    end
+end
+
@@ -0,0 +1,105 @@
+module TensorKitCUDAExt
+
+using CUDA, CUDA.CUBLAS, LinearAlgebra
+using CUDA: @allowscalar
+using cuTENSOR: cuTENSOR
+import CUDA: rand as curand, rand! as curand!, randn as curandn, randn! as curandn!
+
+using TensorKit
+import TensorKit.VectorInterface: scalartype as vi_scalartype
+using TensorKit.Factorizations
+using TensorKit.Factorizations: AbstractAlgorithm
+using TensorKit: SectorDict, tensormaptype, scalar, similarstoragetype, AdjointTensorMap
+
+using TensorKit.MatrixAlgebraKit
+
+using Random
+
+include("cutensormap.jl")
+
+const CuDiagonalTensorMap{T, S} = DiagonalTensorMap{T, S, CuVector{T, CUDA.DeviceMemory}}
+
+"""
+    CuDiagonalTensorMap{T}(undef, domain::S) where {T,S<:IndexSpace}
+    # expert mode: select storage type `A`
+    DiagonalTensorMap{T,S,A}(undef, domain::S) where {T,S<:IndexSpace,A<:DenseVector{T}}
+
+Construct a `DiagonalTensorMap` with uninitialized data.
+"""
+function CuDiagonalTensorMap{T}(::UndefInitializer, V::TensorMapSpace) where {T}
+    (numin(V) == numout(V) == 1 && domain(V) == codomain(V)) ||
+        throw(ArgumentError("DiagonalTensorMap requires a space with equal domain and codomain and 2 indices"))
+    return CuDiagonalTensorMap{T}(undef, domain(V))
+end
+function CuDiagonalTensorMap{T}(::UndefInitializer, V::ProductSpace) where {T}
+    length(V) == 1 ||
+        throw(ArgumentError("DiagonalTensorMap requires `numin(d) == numout(d) == 1`"))
+    return CuDiagonalTensorMap{T}(undef, only(V))
+end
+function CuDiagonalTensorMap{T}(::UndefInitializer, V::S) where {T,S<:IndexSpace}
+    return CuDiagonalTensorMap{T,S}(undef, V)
+end
+CuDiagonalTensorMap(::UndefInitializer, V::IndexSpace) = CuDiagonalTensorMap{Float64}(undef, V)
+
+function CuDiagonalTensorMap(data::CuVector{T}, V::S) where {T, S}
+    return CuDiagonalTensorMap{T, S}(data, V)
+end
+
+function TensorKit.Factorizations.MAK.initialize_output(::typeof(svd_full!), t::CuDiagonalTensorMap, alg::DiagonalAlgorithm)
+    V_cod = fuse(codomain(t))
+    V_dom = fuse(domain(t))
+    U = similar(t, codomain(t) ← V_cod)
+    S = CuDiagonalTensorMap{real(scalartype(t))}(undef, V_cod ← V_dom)
+    Vᴴ = similar(t, V_dom ← domain(t))
+    return U, S, Vᴴ
+end
+
+function TensorKit.Factorizations.MAK.initialize_output(::typeof(svd_vals!), t::CuTensorMap, alg::AbstractAlgorithm)
+    V_cod = infimum(fuse(codomain(t)), fuse(domain(t)))
+    return CuDiagonalTensorMap{real(scalartype(t))}(undef, V_cod)
+end
+
+function TensorKit.Factorizations.MAK.initialize_output(::typeof(svd_compact!), t::CuTensorMap, ::AbstractAlgorithm)
+    V_cod = V_dom = infimum(fuse(codomain(t)), fuse(domain(t)))
+    U = similar(t, codomain(t) ← V_cod)
+    S = CuDiagonalTensorMap{real(scalartype(t))}(undef, V_cod)
+    Vᴴ = similar(t, V_dom ← domain(t))
+    return U, S, Vᴴ
+end
+
+function TensorKit.Factorizations.MAK.initialize_output(::typeof(eigh_full!), t::CuTensorMap, ::AbstractAlgorithm)
+    V_D = fuse(domain(t))
+    T = real(scalartype(t))
+    D = CuDiagonalTensorMap{T}(undef, V_D)
+    V = similar(t, codomain(t) ← V_D)
+    return D, V
+end
+
+function TensorKit.Factorizations.MAK.initialize_output(::typeof(eig_full!), t::CuTensorMap, ::AbstractAlgorithm)
+    V_D = fuse(domain(t))
+    Tc = complex(scalartype(t))
+    D = CuDiagonalTensorMap{Tc}(undef, V_D)
+    V = similar(t, Tc, codomain(t) ← V_D)
+    return D, V
+end
+
+function TensorKit.Factorizations.MAK.initialize_output(::typeof(eigh_vals!), t::CuTensorMap, alg::AbstractAlgorithm)
+    V_D = fuse(domain(t))
+    T = real(scalartype(t))
+    return D = CuDiagonalTensorMap{Tc}(undef, V_D)
+end
+
+function TensorKit.Factorizations.MAK.initialize_output(::typeof(eig_vals!), t::CuTensorMap, alg::AbstractAlgorithm)
+    V_D = fuse(domain(t))
+    Tc = complex(scalartype(t))
+    return D = CuDiagonalTensorMap{Tc}(undef, V_D)
+end
+
+
+# TODO
+# add VectorInterface extensions for proper CUDA promotion
+function TensorKit.VectorInterface.promote_add(TA::Type{<:CUDA.StridedCuMatrix{Tx}}, TB::Type{<:CUDA.StridedCuMatrix{Ty}}, α::Tα = TensorKit.VectorInterface.One(), β::Tβ = TensorKit.VectorInterface.One()) where {Tx, Ty, Tα, Tβ}
+    return Base.promote_op(add, Tx, Ty, Tα, Tβ)
+end
+
+end