Skip to content

Commit c07e18c

Browse files
Katharine Hyattkshyatt
authored andcommitted
Start on GPU extensions
1 parent 113f9a1 commit c07e18c

File tree

15 files changed

+1610
-53
lines changed

15 files changed

+1610
-53
lines changed

Project.toml

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,18 +18,27 @@ TupleTools = "9d95972d-f1c8-5527-a6e0-b4b365fa01f6"
1818
VectorInterface = "409d34a3-91d5-4945-b6ec-7529ddf182d8"
1919

2020
[weakdeps]
21+
AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
2122
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
23+
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
24+
cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1"
2225
FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
2326

2427
[extensions]
2528
TensorKitChainRulesCoreExt = "ChainRulesCore"
2629
TensorKitFiniteDifferencesExt = "FiniteDifferences"
30+
TensorKitAMDGPUExt = "AMDGPU"
31+
TensorKitCUDAExt = ["CUDA", "cuTENSOR"]
2732

2833
[compat]
34+
Adapt = "4"
35+
AMDGPU = "2"
2936
Aqua = "0.6, 0.7, 0.8"
3037
ChainRulesCore = "1"
3138
ChainRulesTestUtils = "1"
3239
Combinatorics = "1"
40+
CUDA = "5"
41+
cuTENSOR = "2"
3342
FiniteDifferences = "0.12"
3443
LRUCache = "1.0.2"
3544
LinearAlgebra = "1"
@@ -49,6 +58,7 @@ Zygote = "0.7"
4958
julia = "1.10"
5059

5160
[extras]
61+
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
5262
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
5363
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
5464
ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a"
@@ -61,4 +71,4 @@ TestExtras = "5ed8adda-3752-4e41-b88a-e8b09835ee3a"
6171
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
6272

6373
[targets]
64-
test = ["Aqua", "Combinatorics", "LinearAlgebra", "TensorOperations", "Test", "TestExtras", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote"]
74+
test = ["Adapt", "AMDGPU", "Aqua", "Combinatorics", "CUDA", "cuTENSOR", "LinearAlgebra", "TensorOperations", "Test", "TestExtras", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote"]
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
module TensorKitAMDGPUExt
2+
3+
using TensorKit
4+
using TensorKit: SectorDict
5+
using AMDGPU
6+
using Random
7+
8+
include("roctensormap.jl")
9+
10+
end
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
const _ROCMatOrDict{I,T} = Union{ROCMatrix{T},SectorDict{I,ROCMatrix{T}}}
2+
const ROCTensorMap{T,S,N₁,N₂,I,A<:_ROCMatOrDict{I,T}} = TensorMap{T,S,N₁,N₂,A}
3+
const ROCTensor{T, S, N, I, A <: _ROCMatOrDict{I, T}} = ROCTensorMap{T, S, N, 0, I, A}
4+
5+
function ROCTensorMap{T}(::UndefInitializer, V::TensorMapSpace{S, N₁, N₂}) where {T, S, N₁, N₂}
6+
A = ROCMatrix{T, AMDGPU.default_memory}
7+
TT = tensormaptype{S, N₁, N₂, A}
8+
return TT(undef, codomain(V), domain(V))
9+
end
10+
11+
function ROCTensorMap{T}(::UndefInitializer, codomain::TensorSpace{S},
12+
domain::TensorSpace{S}) where {T,S}
13+
return ROCTensorMap{T}(undef, codomain domain)
14+
end
15+
function ROCTensor{T}(::UndefInitializer, V::TensorSpace{S}) where {T,S}
16+
return ROCTensorMap{T}(undef, V one(V))
17+
end
18+
19+
for (fname, felt) in ((:zeros, :zero), (:ones, :one))
20+
@eval begin
21+
function AMDGPU.$fname(codomain::TensorSpace{S},
22+
domain::TensorSpace{S}=one(codomain)) where {S<:IndexSpace}
23+
return AMDGPU.$fname(codomain domain)
24+
end
25+
function AMDGPU.$fname(::Type{T}, codomain::TensorSpace{S},
26+
domain::TensorSpace{S}=one(codomain)) where {T,S<:IndexSpace}
27+
return AMDGPU.$fname(T, codomain domain)
28+
end
29+
AMDGPU.$fname(V::TensorMapSpace) = AMDGPU.$fname(Float64, V)
30+
function AMDGPU.$fname(::Type{T}, V::TensorMapSpace) where {T}
31+
t = ROCTensorMap{T}(undef, V)
32+
fill!(t, $felt(T))
33+
return t
34+
end
35+
end
36+
end
37+
38+
for randfun in (:rand, :randn)
39+
randfun! = Symbol(randfun, :!)
40+
@eval begin
41+
# converting `codomain` and `domain` into `HomSpace`
42+
function AMDGPU.$randfun(codomain::TensorSpace{S},
43+
domain::TensorSpace{S}) where {S<:IndexSpace}
44+
return AMDGPU.$randfun(codomain domain)
45+
end
46+
function AMDGPU.$randfun(::Type{T}, codomain::TensorSpace{S},
47+
domain::TensorSpace{S}) where {T,S<:IndexSpace}
48+
return AMDGPU.$randfun(T, codomain domain)
49+
end
50+
function AMDGPU.$randfun(rng::Random.AbstractRNG, ::Type{T},
51+
codomain::TensorSpace{S},
52+
domain::TensorSpace{S}) where {T,S<:IndexSpace}
53+
return AMDGPU.$randfun(rng, T, codomain domain)
54+
end
55+
56+
# accepting single `TensorSpace`
57+
AMDGPU.$randfun(codomain::TensorSpace) = AMDGPU.$randfun(codomain one(codomain))
58+
function AMDGPU.$randfun(::Type{T}, codomain::TensorSpace) where {T}
59+
return AMDGPU.$randfun(T, codomain one(codomain))
60+
end
61+
function AMDGPU.$randfun(rng::Random.AbstractRNG, ::Type{T},
62+
codomain::TensorSpace) where {T}
63+
return AMDGPU.$randfun(rng, T, codomain one(domain))
64+
end
65+
66+
# filling in default eltype
67+
AMDGPU.$randfun(V::TensorMapSpace) = AMDGPU.$randfun(Float64, V)
68+
function AMDGPU.$randfun(rng::Random.AbstractRNG, V::TensorMapSpace)
69+
return AMDGPU.$randfun(rng, Float64, V)
70+
end
71+
72+
# filling in default rng
73+
function AMDGPU.$randfun(::Type{T}, V::TensorMapSpace) where {T}
74+
return AMDGPU.$randfun(Random.default_rng(), T, V)
75+
end
76+
77+
# implementation
78+
function AMDGPU.$randfun(rng::Random.AbstractRNG, ::Type{T},
79+
V::TensorMapSpace) where {T}
80+
t = ROCTensorMap{T}(undef, V)
81+
AMDGPU.$randfun!(rng, t)
82+
return t
83+
end
84+
end
85+
end
86+
87+
# converters
88+
# ----------
89+
function Base.convert(::Type{ROCTensorMap}, d::Dict{Symbol,Any})
90+
try
91+
codomain = eval(Meta.parse(d[:codomain]))
92+
domain = eval(Meta.parse(d[:domain]))
93+
data = SectorDict(eval(Meta.parse(c)) => ROCArray(b) for (c, b) in d[:data])
94+
return TensorMap(data, codomain, domain)
95+
catch e # sector unknown in TensorKit.jl; user-defined, hopefully accessible in Main
96+
codomain = Base.eval(Main, Meta.parse(d[:codomain]))
97+
domain = Base.eval(Main, Meta.parse(d[:domain]))
98+
data = SectorDict(Base.eval(Main, Meta.parse(c)) => ROCArray(b)
99+
for (c, b) in d[:data])
100+
return TensorMap(data, codomain, domain)
101+
end
102+
end
103+
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
module TensorKitCUDAExt
2+
3+
using CUDA
4+
using CUDA.CUBLAS # for LinearAlgebra tie-ins
5+
using cuTENSOR: cuTENSOR
6+
7+
using TensorKit
8+
using TensorKit.Factorizations
9+
using TensorKit.Factorizations: select_svd_algorithm, OFA, initialize_output, AbstractAlgorithm
10+
using TensorKit: SectorDict, tensormaptype
11+
12+
using TensorKit.MatrixAlgebraKit
13+
14+
using Random
15+
using LinearAlgebra
16+
17+
include("cutensormap.jl")
18+
19+
TensorKit.Factorizations.select_svd_algorithm(::CuTensorMap, ::TensorKit.Factorizations.SVD) = CUSOLVER_QRIteration()
20+
TensorKit.Factorizations.select_svd_algorithm(::CuTensorMap, ::TensorKit.Factorizations.SDD) = throw(ArgumentError("DivideAndConquer unavailable on CUDA"))
21+
TensorKit.Factorizations.select_svd_algorithm(::CuTensorMap, alg::OFA) = throw(ArgumentError(lazy"Unknown algorithm $alg"))
22+
23+
const CuDiagonalTensorMap{T, S} = DiagonalTensorMap{T, S, CuVector{T}}
24+
25+
"""
26+
CuDiagonalTensorMap{T}(undef, domain::S) where {T,S<:IndexSpace}
27+
# expert mode: select storage type `A`
28+
DiagonalTensorMap{T,S,A}(undef, domain::S) where {T,S<:IndexSpace,A<:DenseVector{T}}
29+
30+
Construct a `DiagonalTensorMap` with uninitialized data.
31+
"""
32+
function CuDiagonalTensorMap{T}(::UndefInitializer, V::TensorMapSpace) where {T}
33+
(numin(V) == numout(V) == 1 && domain(V) == codomain(V)) ||
34+
throw(ArgumentError("DiagonalTensorMap requires a space with equal domain and codomain and 2 indices"))
35+
return CuDiagonalTensorMap{T}(undef, domain(V))
36+
end
37+
function CuDiagonalTensorMap{T}(::UndefInitializer, V::ProductSpace) where {T}
38+
length(V) == 1 ||
39+
throw(ArgumentError("DiagonalTensorMap requires `numin(d) == numout(d) == 1`"))
40+
return CuDiagonalTensorMap{T}(undef, only(V))
41+
end
42+
function CuDiagonalTensorMap{T}(::UndefInitializer, V::S) where {T,S<:IndexSpace}
43+
return CuDiagonalTensorMap{T,S}(undef, V)
44+
end
45+
CuDiagonalTensorMap(::UndefInitializer, V::IndexSpace) = CuDiagonalTensorMap{Float64}(undef, V)
46+
47+
function TensorKit.Factorizations.initialize_output(::typeof(svd_compact!), t::CuTensorMap, ::AbstractAlgorithm)
48+
V_cod = V_dom = infimum(fuse(codomain(t)), fuse(domain(t)))
49+
U = similar(t, codomain(t) V_cod)
50+
S = CuDiagonalTensorMap{real(scalartype(t))}(undef, V_cod)
51+
Vᴴ = similar(t, V_dom domain(t))
52+
return U, S, Vᴴ
53+
end
54+
55+
function TensorKit.Factorizations.initialize_output(::typeof(eigh_full!), t::CuTensorMap, ::AbstractAlgorithm)
56+
V_D = fuse(domain(t))
57+
T = real(scalartype(t))
58+
D = CuDiagonalTensorMap{T}(undef, V_D)
59+
V = similar(t, codomain(t) V_D)
60+
return D, V
61+
end
62+
63+
function TensorKit.Factorizations.initialize_output(::typeof(eig_full!), t::CuTensorMap, ::AbstractAlgorithm)
64+
V_D = fuse(domain(t))
65+
Tc = complex(scalartype(t))
66+
D = CuDiagonalTensorMap{Tc}(undef, V_D)
67+
V = similar(t, Tc, codomain(t) V_D)
68+
return D, V
69+
end
70+
71+
function TensorKit.Factorizations.initialize_output(::typeof(eigh_vals!), t::CuTensorMap, alg::AbstractAlgorithm)
72+
V_D = fuse(domain(t))
73+
T = real(scalartype(t))
74+
return D = CuDiagonalTensorMap{Tc}(undef, V_D)
75+
end
76+
77+
function TensorKit.Factorizations.initialize_output(::typeof(eig_vals!), t::CuTensorMap, alg::AbstractAlgorithm)
78+
V_D = fuse(domain(t))
79+
Tc = complex(scalartype(t))
80+
return D = CuDiagonalTensorMap{Tc}(undef, V_D)
81+
end
82+
end
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
const CuTensorMap{T,S,N₁,N₂,A<:CuVector{T}} = TensorMap{T,S,N₁,N₂,A}
2+
const CuTensor{T, S, N, A<:CuVector{T}} = CuTensorMap{T, S, N, 0, A}
3+
4+
function TensorKit.tensormaptype(S::Type{<:IndexSpace}, N₁, N₂, TorA::Type{<:StridedCuArray})
5+
if TorA <: CuVector
6+
return TensorMap{scalartype(TorA),S,N₁,N₂,TorA}
7+
else
8+
throw(ArgumentError("argument $TorA should specify a scalar type (`<:Number`) or a storage type `<:CuVector{<:Number}`"))
9+
end
10+
end
11+
12+
function CuTensorMap{T}(::UndefInitializer, V::TensorMapSpace{S, N₁, N₂}) where {T, S, N₁, N₂}
13+
return CuTensorMap{T,S,N₁,N₂,CuVector{T}}(undef, V)
14+
end
15+
16+
function CuTensorMap{T}(::UndefInitializer, codomain::TensorSpace{S},
17+
domain::TensorSpace{S}) where {T,S}
18+
return CuTensorMap{T}(undef, codomain domain)
19+
end
20+
function CuTensor{T}(::UndefInitializer, V::TensorSpace{S}) where {T,S}
21+
return CuTensorMap{T}(undef, V one(V))
22+
end
23+
# constructor starting from block data
24+
"""
25+
CuTensorMap(data::AbstractDict{<:Sector,<:CuMatrix}, codomain::ProductSpace{S,N₁},
26+
domain::ProductSpace{S,N₂}) where {S<:ElementarySpace,N₁,N₂}
27+
CuTensorMap(data, codomain ← domain)
28+
CuTensorMap(data, domain → codomain)
29+
30+
Construct a `CuTensorMap` by explicitly specifying its block data.
31+
32+
## Arguments
33+
- `data::AbstractDict{<:Sector,<:CuMatrix}`: dictionary containing the block data for
34+
each coupled sector `c` as a matrix of size `(blockdim(codomain, c), blockdim(domain, c))`.
35+
- `codomain::ProductSpace{S,N₁}`: the codomain as a `ProductSpace` of `N₁` spaces of type
36+
`S<:ElementarySpace`.
37+
- `domain::ProductSpace{S,N₂}`: the domain as a `ProductSpace` of `N₂` spaces of type
38+
`S<:ElementarySpace`.
39+
40+
Alternatively, the domain and codomain can be specified by passing a [`HomSpace`](@ref)
41+
using the syntax `codomain ← domain` or `domain → codomain`.
42+
"""
43+
function CuTensorMap(data::AbstractDict{<:Sector,<:CuArray},
44+
V::TensorMapSpace{S,N₁,N₂}) where {S,N₁,N₂}
45+
T = eltype(valtype(data))
46+
t = CuTensorMap{T}(undef, V)
47+
for (c, b) in blocks(t)
48+
haskey(data, c) || throw(SectorMismatch("no data for block sector $c"))
49+
datac = data[c]
50+
size(datac) == size(b) ||
51+
throw(DimensionMismatch("wrong size of block for sector $c"))
52+
copy!(b, datac)
53+
end
54+
for (c, b) in data
55+
c blocksectors(t) || isempty(b) ||
56+
throw(SectorMismatch("data for block sector $c not expected"))
57+
end
58+
return t
59+
end
60+
61+
for (fname, felt) in ((:zeros, :zero), (:ones, :one))
62+
@eval begin
63+
function CUDA.$fname(codomain::TensorSpace{S},
64+
domain::TensorSpace{S}=one(codomain)) where {S<:IndexSpace}
65+
return CUDA.$fname(codomain domain)
66+
end
67+
function CUDA.$fname(::Type{T}, codomain::TensorSpace{S},
68+
domain::TensorSpace{S}=one(codomain)) where {T,S<:IndexSpace}
69+
return CUDA.$fname(T, codomain domain)
70+
end
71+
CUDA.$fname(V::TensorMapSpace) = CUDA.$fname(Float64, V)
72+
function CUDA.$fname(::Type{T}, V::TensorMapSpace) where {T}
73+
t = CuTensorMap{T}(undef, V)
74+
fill!(t, $felt(T))
75+
return t
76+
end
77+
end
78+
end
79+
80+
for randfun in (:rand, :randn)
81+
randfun! = Symbol(randfun, :!)
82+
@eval begin
83+
# converting `codomain` and `domain` into `HomSpace`
84+
function CUDA.$randfun(codomain::TensorSpace{S},
85+
domain::TensorSpace{S}) where {S<:IndexSpace}
86+
return CUDA.$randfun(codomain domain)
87+
end
88+
function CUDA.$randfun(::Type{T}, codomain::TensorSpace{S},
89+
domain::TensorSpace{S}) where {T,S<:IndexSpace}
90+
return CUDA.$randfun(T, codomain domain)
91+
end
92+
function CUDA.$randfun(rng::Random.AbstractRNG, ::Type{T},
93+
codomain::TensorSpace{S},
94+
domain::TensorSpace{S}) where {T,S<:IndexSpace}
95+
return CUDA.$randfun(rng, T, codomain domain)
96+
end
97+
98+
# accepting single `TensorSpace`
99+
CUDA.$randfun(codomain::TensorSpace) = CUDA.$randfun(codomain one(codomain))
100+
function CUDA.$randfun(::Type{T}, codomain::TensorSpace) where {T}
101+
return CUDA.$randfun(T, codomain one(codomain))
102+
end
103+
function CUDA.$randfun(rng::Random.AbstractRNG, ::Type{T},
104+
codomain::TensorSpace) where {T}
105+
return CUDA.$randfun(rng, T, codomain one(domain))
106+
end
107+
108+
# filling in default eltype
109+
CUDA.$randfun(V::TensorMapSpace) = CUDA.$randfun(Float64, V)
110+
function CUDA.$randfun(rng::Random.AbstractRNG, V::TensorMapSpace)
111+
return CUDA.$randfun(rng, Float64, V)
112+
end
113+
114+
# filling in default rng
115+
function CUDA.$randfun(::Type{T}, V::TensorMapSpace) where {T}
116+
return CUDA.$randfun(Random.default_rng(), T, V)
117+
end
118+
119+
# implementation
120+
function CUDA.$randfun(rng::Random.AbstractRNG, ::Type{T},
121+
V::TensorMapSpace) where {T}
122+
t = CuTensorMap{T}(undef, V)
123+
CUDA.$randfun!(rng, t)
124+
return t
125+
end
126+
end
127+
end
128+
129+
# converters
130+
# ----------
131+
function Base.convert(::Type{CuTensorMap}, d::Dict{Symbol,Any})
132+
try
133+
codomain = eval(Meta.parse(d[:codomain]))
134+
domain = eval(Meta.parse(d[:domain]))
135+
data = SectorDict(eval(Meta.parse(c)) => CuArray(b) for (c, b) in d[:data])
136+
return TensorMap(data, codomain, domain)
137+
catch e # sector unknown in TensorKit.jl; user-defined, hopefully accessible in Main
138+
codomain = Base.eval(Main, Meta.parse(d[:codomain]))
139+
domain = Base.eval(Main, Meta.parse(d[:domain]))
140+
data = SectorDict(Base.eval(Main, Meta.parse(c)) => CuArray(b)
141+
for (c, b) in d[:data])
142+
return TensorMap(data, codomain, domain)
143+
end
144+
end
145+

0 commit comments

Comments
 (0)