Skip to content

Commit 25a9337

Browse files
committed
Start on GPU extensions
1 parent 4ab4707 commit 25a9337

File tree

15 files changed

+1916
-33
lines changed

15 files changed

+1916
-33
lines changed

Project.toml

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,19 +18,33 @@ TupleTools = "9d95972d-f1c8-5527-a6e0-b4b365fa01f6"
1818
VectorInterface = "409d34a3-91d5-4945-b6ec-7529ddf182d8"
1919

2020
[weakdeps]
21+
AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
22+
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
2123
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
2224
FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
25+
cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1"
26+
27+
[sources]
28+
GPUArrays = {rev = "ksh/more_diag", url = "https://github.com/JuliaGPU/GPUArrays.jl"}
29+
MatrixAlgebraKit = {rev = "ksh/tk", url = "https://github.com/QuantumKitHub/MatrixAlgebraKit.jl"}
30+
cuTENSOR = {subdir = "lib/cutensor", url = "https://github.com/JuliaGPU/CUDA.jl"}
2331

2432
[extensions]
33+
TensorKitAMDGPUExt = "AMDGPU"
34+
TensorKitCUDAExt = ["CUDA", "cuTENSOR"]
2535
TensorKitChainRulesCoreExt = "ChainRulesCore"
2636
TensorKitFiniteDifferencesExt = "FiniteDifferences"
2737

2838
[compat]
39+
AMDGPU = "2"
40+
Adapt = "4"
2941
Aqua = "0.6, 0.7, 0.8"
42+
CUDA = "5.8.4"
3043
ChainRulesCore = "1"
3144
ChainRulesTestUtils = "1"
3245
Combinatorics = "1"
3346
FiniteDifferences = "0.12"
47+
GPUArrays = "11.2.6"
3448
LRUCache = "1.0.2"
3549
LinearAlgebra = "1"
3650
MatrixAlgebraKit = "0.5.0"
@@ -39,26 +53,31 @@ PackageExtensionCompat = "1"
3953
Random = "1"
4054
ScopedValues = "1.3.0"
4155
Strided = "2"
42-
TensorKitSectors = "0.1.4, 0.2"
56+
TensorKitSectors = "0.3"
4357
TensorOperations = "5.1"
4458
Test = "1"
4559
TestExtras = "0.2,0.3"
4660
TupleTools = "1.1"
4761
VectorInterface = "0.4.8, 0.5"
4862
Zygote = "0.7"
63+
cuTENSOR = "2"
4964
julia = "1.10"
5065

5166
[extras]
67+
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
5268
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
69+
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
5370
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
5471
ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a"
5572
Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
5673
FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
74+
GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
5775
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
5876
TensorOperations = "6aa20fa7-93e2-5fca-9bc0-fbd0db3c71a2"
5977
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
6078
TestExtras = "5ed8adda-3752-4e41-b88a-e8b09835ee3a"
6179
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
80+
cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1"
6281

6382
[targets]
64-
test = ["Aqua", "Combinatorics", "LinearAlgebra", "TensorOperations", "Test", "TestExtras", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote"]
83+
test = ["Adapt", "Aqua", "Combinatorics", "CUDA", "cuTENSOR", "GPUArrays", "LinearAlgebra", "TensorOperations", "Test", "TestExtras", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote"]
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
module TensorKitAMDGPUExt
2+
3+
using TensorKit
4+
using TensorKit: SectorDict
5+
using AMDGPU
6+
using Random
7+
8+
include("roctensormap.jl")
9+
10+
end
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
const _ROCMatOrDict{I,T} = Union{ROCMatrix{T},SectorDict{I,ROCMatrix{T}}}
2+
const ROCTensorMap{T,S,N₁,N₂,I,A<:_ROCMatOrDict{I,T}} = TensorMap{T,S,N₁,N₂,A}
3+
const ROCTensor{T, S, N, I, A <: _ROCMatOrDict{I, T}} = ROCTensorMap{T, S, N, 0, I, A}
4+
5+
function ROCTensorMap{T}(::UndefInitializer, V::TensorMapSpace{S, N₁, N₂}) where {T, S, N₁, N₂}
6+
A = ROCMatrix{T, AMDGPU.default_memory}
7+
TT = tensormaptype{S, N₁, N₂, A}
8+
return TT(undef, codomain(V), domain(V))
9+
end
10+
11+
function ROCTensorMap{T}(::UndefInitializer, codomain::TensorSpace{S},
12+
domain::TensorSpace{S}) where {T,S}
13+
return ROCTensorMap{T}(undef, codomain domain)
14+
end
15+
function ROCTensor{T}(::UndefInitializer, V::TensorSpace{S}) where {T,S}
16+
return ROCTensorMap{T}(undef, V one(V))
17+
end
18+
19+
for (fname, felt) in ((:zeros, :zero), (:ones, :one))
20+
@eval begin
21+
function AMDGPU.$fname(codomain::TensorSpace{S},
22+
domain::TensorSpace{S}=one(codomain)) where {S<:IndexSpace}
23+
return AMDGPU.$fname(codomain domain)
24+
end
25+
function AMDGPU.$fname(::Type{T}, codomain::TensorSpace{S},
26+
domain::TensorSpace{S}=one(codomain)) where {T,S<:IndexSpace}
27+
return AMDGPU.$fname(T, codomain domain)
28+
end
29+
AMDGPU.$fname(V::TensorMapSpace) = AMDGPU.$fname(Float64, V)
30+
function AMDGPU.$fname(::Type{T}, V::TensorMapSpace) where {T}
31+
t = ROCTensorMap{T}(undef, V)
32+
fill!(t, $felt(T))
33+
return t
34+
end
35+
end
36+
end
37+
38+
for randfun in (:rand, :randn)
39+
randfun! = Symbol(randfun, :!)
40+
@eval begin
41+
# converting `codomain` and `domain` into `HomSpace`
42+
function AMDGPU.$randfun(codomain::TensorSpace{S},
43+
domain::TensorSpace{S}) where {S<:IndexSpace}
44+
return AMDGPU.$randfun(codomain domain)
45+
end
46+
function AMDGPU.$randfun(::Type{T}, codomain::TensorSpace{S},
47+
domain::TensorSpace{S}) where {T,S<:IndexSpace}
48+
return AMDGPU.$randfun(T, codomain domain)
49+
end
50+
function AMDGPU.$randfun(rng::Random.AbstractRNG, ::Type{T},
51+
codomain::TensorSpace{S},
52+
domain::TensorSpace{S}) where {T,S<:IndexSpace}
53+
return AMDGPU.$randfun(rng, T, codomain domain)
54+
end
55+
56+
# accepting single `TensorSpace`
57+
AMDGPU.$randfun(codomain::TensorSpace) = AMDGPU.$randfun(codomain one(codomain))
58+
function AMDGPU.$randfun(::Type{T}, codomain::TensorSpace) where {T}
59+
return AMDGPU.$randfun(T, codomain one(codomain))
60+
end
61+
function AMDGPU.$randfun(rng::Random.AbstractRNG, ::Type{T},
62+
codomain::TensorSpace) where {T}
63+
return AMDGPU.$randfun(rng, T, codomain one(domain))
64+
end
65+
66+
# filling in default eltype
67+
AMDGPU.$randfun(V::TensorMapSpace) = AMDGPU.$randfun(Float64, V)
68+
function AMDGPU.$randfun(rng::Random.AbstractRNG, V::TensorMapSpace)
69+
return AMDGPU.$randfun(rng, Float64, V)
70+
end
71+
72+
# filling in default rng
73+
function AMDGPU.$randfun(::Type{T}, V::TensorMapSpace) where {T}
74+
return AMDGPU.$randfun(Random.default_rng(), T, V)
75+
end
76+
77+
# implementation
78+
function AMDGPU.$randfun(rng::Random.AbstractRNG, ::Type{T},
79+
V::TensorMapSpace) where {T}
80+
t = ROCTensorMap{T}(undef, V)
81+
AMDGPU.$randfun!(rng, t)
82+
return t
83+
end
84+
end
85+
end
86+
87+
# converters
88+
# ----------
89+
function Base.convert(::Type{ROCTensorMap}, d::Dict{Symbol,Any})
90+
try
91+
codomain = eval(Meta.parse(d[:codomain]))
92+
domain = eval(Meta.parse(d[:domain]))
93+
data = SectorDict(eval(Meta.parse(c)) => ROCArray(b) for (c, b) in d[:data])
94+
return TensorMap(data, codomain, domain)
95+
catch e # sector unknown in TensorKit.jl; user-defined, hopefully accessible in Main
96+
codomain = Base.eval(Main, Meta.parse(d[:codomain]))
97+
domain = Base.eval(Main, Meta.parse(d[:domain]))
98+
data = SectorDict(Base.eval(Main, Meta.parse(c)) => ROCArray(b)
99+
for (c, b) in d[:data])
100+
return TensorMap(data, codomain, domain)
101+
end
102+
end
103+
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
module TensorKitCUDAExt
2+
3+
using CUDA, CUDA.CUBLAS, LinearAlgebra
4+
using CUDA: @allowscalar
5+
using cuTENSOR: cuTENSOR
6+
import CUDA: rand as curand, rand! as curand!, randn as curandn, randn! as curandn!
7+
8+
using TensorKit
9+
import TensorKit.VectorInterface: scalartype as vi_scalartype
10+
using TensorKit.Factorizations
11+
using TensorKit.Factorizations: AbstractAlgorithm
12+
using TensorKit: SectorDict, tensormaptype, scalar, similarstoragetype, AdjointTensorMap
13+
14+
using TensorKit.MatrixAlgebraKit
15+
16+
using Random
17+
18+
include("cutensormap.jl")
19+
20+
const CuDiagonalTensorMap{T, S} = DiagonalTensorMap{T, S, CuVector{T, CUDA.DeviceMemory}}
21+
22+
"""
23+
CuDiagonalTensorMap{T}(undef, domain::S) where {T,S<:IndexSpace}
24+
# expert mode: select storage type `A`
25+
DiagonalTensorMap{T,S,A}(undef, domain::S) where {T,S<:IndexSpace,A<:DenseVector{T}}
26+
27+
Construct a `DiagonalTensorMap` with uninitialized data.
28+
"""
29+
function CuDiagonalTensorMap{T}(::UndefInitializer, V::TensorMapSpace) where {T}
30+
(numin(V) == numout(V) == 1 && domain(V) == codomain(V)) ||
31+
throw(ArgumentError("DiagonalTensorMap requires a space with equal domain and codomain and 2 indices"))
32+
return CuDiagonalTensorMap{T}(undef, domain(V))
33+
end
34+
function CuDiagonalTensorMap{T}(::UndefInitializer, V::ProductSpace) where {T}
35+
length(V) == 1 ||
36+
throw(ArgumentError("DiagonalTensorMap requires `numin(d) == numout(d) == 1`"))
37+
return CuDiagonalTensorMap{T}(undef, only(V))
38+
end
39+
function CuDiagonalTensorMap{T}(::UndefInitializer, V::S) where {T,S<:IndexSpace}
40+
return CuDiagonalTensorMap{T,S}(undef, V)
41+
end
42+
CuDiagonalTensorMap(::UndefInitializer, V::IndexSpace) = CuDiagonalTensorMap{Float64}(undef, V)
43+
44+
function CuDiagonalTensorMap(data::CuVector{T}, V::S) where {T, S}
45+
return CuDiagonalTensorMap{T, S}(data, V)
46+
end
47+
48+
function TensorKit.Factorizations.MAK.initialize_output(::typeof(svd_full!), t::CuDiagonalTensorMap, alg::DiagonalAlgorithm)
49+
V_cod = fuse(codomain(t))
50+
V_dom = fuse(domain(t))
51+
U = similar(t, codomain(t) V_cod)
52+
S = CuDiagonalTensorMap{real(scalartype(t))}(undef, V_cod V_dom)
53+
Vᴴ = similar(t, V_dom domain(t))
54+
return U, S, Vᴴ
55+
end
56+
57+
function TensorKit.Factorizations.MAK.initialize_output(::typeof(svd_vals!), t::CuTensorMap, alg::AbstractAlgorithm)
58+
V_cod = infimum(fuse(codomain(t)), fuse(domain(t)))
59+
return CuDiagonalTensorMap{real(scalartype(t))}(undef, V_cod)
60+
end
61+
62+
function TensorKit.Factorizations.MAK.initialize_output(::typeof(svd_compact!), t::CuTensorMap, ::AbstractAlgorithm)
63+
V_cod = V_dom = infimum(fuse(codomain(t)), fuse(domain(t)))
64+
U = similar(t, codomain(t) V_cod)
65+
S = CuDiagonalTensorMap{real(scalartype(t))}(undef, V_cod)
66+
Vᴴ = similar(t, V_dom domain(t))
67+
return U, S, Vᴴ
68+
end
69+
70+
function TensorKit.Factorizations.MAK.initialize_output(::typeof(eigh_full!), t::CuTensorMap, ::AbstractAlgorithm)
71+
V_D = fuse(domain(t))
72+
T = real(scalartype(t))
73+
D = CuDiagonalTensorMap{T}(undef, V_D)
74+
V = similar(t, codomain(t) V_D)
75+
return D, V
76+
end
77+
78+
function TensorKit.Factorizations.MAK.initialize_output(::typeof(eig_full!), t::CuTensorMap, ::AbstractAlgorithm)
79+
V_D = fuse(domain(t))
80+
Tc = complex(scalartype(t))
81+
D = CuDiagonalTensorMap{Tc}(undef, V_D)
82+
V = similar(t, Tc, codomain(t) V_D)
83+
return D, V
84+
end
85+
86+
function TensorKit.Factorizations.MAK.initialize_output(::typeof(eigh_vals!), t::CuTensorMap, alg::AbstractAlgorithm)
87+
V_D = fuse(domain(t))
88+
T = real(scalartype(t))
89+
return D = CuDiagonalTensorMap{Tc}(undef, V_D)
90+
end
91+
92+
function TensorKit.Factorizations.MAK.initialize_output(::typeof(eig_vals!), t::CuTensorMap, alg::AbstractAlgorithm)
93+
V_D = fuse(domain(t))
94+
Tc = complex(scalartype(t))
95+
return D = CuDiagonalTensorMap{Tc}(undef, V_D)
96+
end
97+
98+
99+
# TODO
100+
# add VectorInterface extensions for proper CUDA promotion
101+
function TensorKit.VectorInterface.promote_add(TA::Type{<:CUDA.StridedCuMatrix{Tx}}, TB::Type{<:CUDA.StridedCuMatrix{Ty}}, α::Tα = TensorKit.VectorInterface.One(), β::Tβ = TensorKit.VectorInterface.One()) where {Tx, Ty, Tα, Tβ}
102+
return Base.promote_op(add, Tx, Ty, Tα, Tβ)
103+
end
104+
105+
end

0 commit comments

Comments
 (0)