Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,24 @@ TupleTools = "9d95972d-f1c8-5527-a6e0-b4b365fa01f6"
VectorInterface = "409d34a3-91d5-4945-b6ec-7529ddf182d8"

[weakdeps]
AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"

[extensions]
TensorKitAMDGPUExt = "AMDGPU"
TensorKitCUDAExt = ["CUDA", "cuTENSOR"]
TensorKitChainRulesCoreExt = "ChainRulesCore"
TensorKitFiniteDifferencesExt = "FiniteDifferences"

[compat]
AMDGPU = "2"
Adapt = "4"
Aqua = "0.6, 0.7, 0.8"
CUDA = "5"
cuTENSOR = "2"
ChainRulesCore = "1"
ChainRulesTestUtils = "1"
Combinatorics = "1"
Expand All @@ -49,7 +58,10 @@ Zygote = "0.7"
julia = "1.10"

[extras]
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1"
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a"
Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
Expand All @@ -61,4 +73,10 @@ TestExtras = "5ed8adda-3752-4e41-b88a-e8b09835ee3a"
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"

[targets]
test = ["Aqua", "Combinatorics", "LinearAlgebra", "TensorOperations", "Test", "TestExtras", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote"]
test = ["Adapt", "Aqua", "Combinatorics", "CUDA", "cuTENSOR", "LinearAlgebra", "TensorOperations", "Test", "TestExtras", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote"]

[sources]
CUDA = {url = "https://github.com/JuliaGPU/CUDA.jl", rev = "master"}
cuTENSOR = {url = "https://github.com/JuliaGPU/CUDA.jl", subdir="lib/cutensor", rev = "ksh/cutensor_bump"}
MatrixAlgebraKit = {url = "https://github.com/QuantumKitHub/MatrixAlgebraKit.jl", rev = "ksh/tk"}
TensorOperations = {url = "https://github.com/QuantumKitHub/TensorOperations.jl", rev = "ksh/cutensor_bump"}
10 changes: 10 additions & 0 deletions ext/TensorKitAMDGPUExt/TensorKitAMDGPUExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
module TensorKitAMDGPUExt

using TensorKit
using TensorKit: SectorDict
using AMDGPU
using Random

include("roctensormap.jl")

end
103 changes: 103 additions & 0 deletions ext/TensorKitAMDGPUExt/roctensormap.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
const _ROCMatOrDict{I,T} = Union{ROCMatrix{T},SectorDict{I,ROCMatrix{T}}}
const ROCTensorMap{T,S,N₁,N₂,I,A<:_ROCMatOrDict{I,T}} = TensorMap{T,S,N₁,N₂,A}
const ROCTensor{T, S, N, I, A <: _ROCMatOrDict{I, T}} = ROCTensorMap{T, S, N, 0, I, A}

function ROCTensorMap{T}(::UndefInitializer, V::TensorMapSpace{S, N₁, N₂}) where {T, S, N₁, N₂}
A = ROCMatrix{T, AMDGPU.default_memory}
TT = tensormaptype{S, N₁, N₂, A}
return TT(undef, codomain(V), domain(V))
end

function ROCTensorMap{T}(::UndefInitializer, codomain::TensorSpace{S},
domain::TensorSpace{S}) where {T,S}
return ROCTensorMap{T}(undef, codomain ← domain)
end
function ROCTensor{T}(::UndefInitializer, V::TensorSpace{S}) where {T,S}
return ROCTensorMap{T}(undef, V ← one(V))
end

for (fname, felt) in ((:zeros, :zero), (:ones, :one))
@eval begin
function AMDGPU.$fname(codomain::TensorSpace{S},
domain::TensorSpace{S}=one(codomain)) where {S<:IndexSpace}
return AMDGPU.$fname(codomain ← domain)
end
function AMDGPU.$fname(::Type{T}, codomain::TensorSpace{S},
domain::TensorSpace{S}=one(codomain)) where {T,S<:IndexSpace}
return AMDGPU.$fname(T, codomain ← domain)
end
AMDGPU.$fname(V::TensorMapSpace) = AMDGPU.$fname(Float64, V)
function AMDGPU.$fname(::Type{T}, V::TensorMapSpace) where {T}
t = ROCTensorMap{T}(undef, V)
fill!(t, $felt(T))
return t
end
end
end

for randfun in (:rand, :randn)
randfun! = Symbol(randfun, :!)
@eval begin
# converting `codomain` and `domain` into `HomSpace`
function AMDGPU.$randfun(codomain::TensorSpace{S},
domain::TensorSpace{S}) where {S<:IndexSpace}
return AMDGPU.$randfun(codomain ← domain)
end
function AMDGPU.$randfun(::Type{T}, codomain::TensorSpace{S},
domain::TensorSpace{S}) where {T,S<:IndexSpace}
return AMDGPU.$randfun(T, codomain ← domain)
end
function AMDGPU.$randfun(rng::Random.AbstractRNG, ::Type{T},
codomain::TensorSpace{S},
domain::TensorSpace{S}) where {T,S<:IndexSpace}
return AMDGPU.$randfun(rng, T, codomain ← domain)
end

# accepting single `TensorSpace`
AMDGPU.$randfun(codomain::TensorSpace) = AMDGPU.$randfun(codomain ← one(codomain))
function AMDGPU.$randfun(::Type{T}, codomain::TensorSpace) where {T}
return AMDGPU.$randfun(T, codomain ← one(codomain))
end
function AMDGPU.$randfun(rng::Random.AbstractRNG, ::Type{T},
codomain::TensorSpace) where {T}
return AMDGPU.$randfun(rng, T, codomain ← one(domain))
end

# filling in default eltype
AMDGPU.$randfun(V::TensorMapSpace) = AMDGPU.$randfun(Float64, V)
function AMDGPU.$randfun(rng::Random.AbstractRNG, V::TensorMapSpace)
return AMDGPU.$randfun(rng, Float64, V)
end

# filling in default rng
function AMDGPU.$randfun(::Type{T}, V::TensorMapSpace) where {T}
return AMDGPU.$randfun(Random.default_rng(), T, V)
end

# implementation
function AMDGPU.$randfun(rng::Random.AbstractRNG, ::Type{T},
V::TensorMapSpace) where {T}
t = ROCTensorMap{T}(undef, V)
AMDGPU.$randfun!(rng, t)
return t
end
end
end

# converters
# ----------
function Base.convert(::Type{ROCTensorMap}, d::Dict{Symbol,Any})
try
codomain = eval(Meta.parse(d[:codomain]))
domain = eval(Meta.parse(d[:domain]))
data = SectorDict(eval(Meta.parse(c)) => ROCArray(b) for (c, b) in d[:data])
return TensorMap(data, codomain, domain)
catch e # sector unknown in TensorKit.jl; user-defined, hopefully accessible in Main
codomain = Base.eval(Main, Meta.parse(d[:codomain]))
domain = Base.eval(Main, Meta.parse(d[:domain]))
data = SectorDict(Base.eval(Main, Meta.parse(c)) => ROCArray(b)
for (c, b) in d[:data])
return TensorMap(data, codomain, domain)
end
end

90 changes: 90 additions & 0 deletions ext/TensorKitCUDAExt/TensorKitCUDAExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
module TensorKitCUDAExt

using CUDA, CUDA.CUBLAS, LinearAlgebra
using CUDA: @allowscalar
using cuTENSOR: cuTENSOR

using TensorKit
import TensorKit.VectorInterface: scalartype as vi_scalartype
using TensorKit.Factorizations
using TensorKit.Factorizations: select_svd_algorithm, OFA, initialize_output, AbstractAlgorithm
using TensorKit: SectorDict, tensormaptype, scalar, similarstoragetype, AdjointTensorMap

using TensorKit.MatrixAlgebraKit

using Random

include("cutensormap.jl")

TensorKit.Factorizations.select_svd_algorithm(::CuTensorMap, ::TensorKit.Factorizations.SVD) = CUSOLVER_QRIteration()
TensorKit.Factorizations.select_svd_algorithm(::CuTensorMap, ::TensorKit.Factorizations.SDD) = throw(ArgumentError("DivideAndConquer unavailable on CUDA"))
TensorKit.Factorizations.select_svd_algorithm(::CuTensorMap, alg::OFA) = throw(ArgumentError(lazy"Unknown algorithm $alg"))

const CuDiagonalTensorMap{T, S} = DiagonalTensorMap{T, S, CuVector{T, CUDA.DeviceMemory}}

"""
CuDiagonalTensorMap{T}(undef, domain::S) where {T,S<:IndexSpace}
# expert mode: select storage type `A`
DiagonalTensorMap{T,S,A}(undef, domain::S) where {T,S<:IndexSpace,A<:DenseVector{T}}

Construct a `DiagonalTensorMap` with uninitialized data.
"""
function CuDiagonalTensorMap{T}(::UndefInitializer, V::TensorMapSpace) where {T}
(numin(V) == numout(V) == 1 && domain(V) == codomain(V)) ||
throw(ArgumentError("DiagonalTensorMap requires a space with equal domain and codomain and 2 indices"))
return CuDiagonalTensorMap{T}(undef, domain(V))
end
function CuDiagonalTensorMap{T}(::UndefInitializer, V::ProductSpace) where {T}
length(V) == 1 ||
throw(ArgumentError("DiagonalTensorMap requires `numin(d) == numout(d) == 1`"))
return CuDiagonalTensorMap{T}(undef, only(V))
end
function CuDiagonalTensorMap{T}(::UndefInitializer, V::S) where {T,S<:IndexSpace}
return CuDiagonalTensorMap{T,S}(undef, V)
end
CuDiagonalTensorMap(::UndefInitializer, V::IndexSpace) = CuDiagonalTensorMap{Float64}(undef, V)

function TensorKit.Factorizations.initialize_output(::typeof(svd_compact!), t::CuTensorMap, ::AbstractAlgorithm)
V_cod = V_dom = infimum(fuse(codomain(t)), fuse(domain(t)))
U = similar(t, codomain(t) ← V_cod)
S = CuDiagonalTensorMap{real(scalartype(t))}(undef, V_cod)
Vᴴ = similar(t, V_dom ← domain(t))
return U, S, Vᴴ
end

function TensorKit.Factorizations.initialize_output(::typeof(eigh_full!), t::CuTensorMap, ::AbstractAlgorithm)
V_D = fuse(domain(t))
T = real(scalartype(t))
D = CuDiagonalTensorMap{T}(undef, V_D)
V = similar(t, codomain(t) ← V_D)
return D, V
end

function TensorKit.Factorizations.initialize_output(::typeof(eig_full!), t::CuTensorMap, ::AbstractAlgorithm)
V_D = fuse(domain(t))
Tc = complex(scalartype(t))
D = CuDiagonalTensorMap{Tc}(undef, V_D)
V = similar(t, Tc, codomain(t) ← V_D)
return D, V
end

function TensorKit.Factorizations.initialize_output(::typeof(eigh_vals!), t::CuTensorMap, alg::AbstractAlgorithm)
V_D = fuse(domain(t))
T = real(scalartype(t))
return D = CuDiagonalTensorMap{Tc}(undef, V_D)
end

function TensorKit.Factorizations.initialize_output(::typeof(eig_vals!), t::CuTensorMap, alg::AbstractAlgorithm)
V_D = fuse(domain(t))
Tc = complex(scalartype(t))
return D = CuDiagonalTensorMap{Tc}(undef, V_D)
end


# TODO
# add VectorInterface extensions for proper CUDA promotion
function TensorKit.VectorInterface.promote_add(TA::Type{<:CUDA.StridedCuMatrix{Tx}}, TB::Type{<:CUDA.StridedCuMatrix{Ty}}, α::Tα = TensorKit.VectorInterface.One(), β::Tβ = TensorKit.VectorInterface.One()) where {Tx, Ty, Tα, Tβ}
return Base.promote_op(add, Tx, Ty, Tα, Tβ)
end

end
Loading
Loading