Skip to content

Commit 60551f8

Browse files
committed
Start on GPU extensions
1 parent ea6bff6 commit 60551f8

File tree

21 files changed

+2970
-82
lines changed

21 files changed

+2970
-82
lines changed

Project.toml

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,19 +18,34 @@ TupleTools = "9d95972d-f1c8-5527-a6e0-b4b365fa01f6"
1818
VectorInterface = "409d34a3-91d5-4945-b6ec-7529ddf182d8"
1919

2020
[weakdeps]
21+
AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
22+
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
2123
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
2224
FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
25+
cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1"
26+
27+
[sources]
28+
GPUArrays = {rev = "master", url = "https://github.com/JuliaGPU/GPUArrays.jl"}
29+
MatrixAlgebraKit = {rev = "ksh/tk", url = "https://github.com/QuantumKitHub/MatrixAlgebraKit.jl"}
30+
AMDGPU = {rev = "master", url = "https://github.com/JuliaGPU/AMDGPU.jl"}
31+
cuTENSOR = {subdir = "lib/cutensor", url = "https://github.com/JuliaGPU/CUDA.jl", rev="master"}
2332

2433
[extensions]
34+
TensorKitAMDGPUExt = "AMDGPU"
35+
TensorKitCUDAExt = ["CUDA", "cuTENSOR"]
2536
TensorKitChainRulesCoreExt = "ChainRulesCore"
2637
TensorKitFiniteDifferencesExt = "FiniteDifferences"
2738

2839
[compat]
40+
AMDGPU = "2"
41+
Adapt = "4"
2942
Aqua = "0.6, 0.7, 0.8"
43+
CUDA = "5.8.4"
3044
ChainRulesCore = "1"
3145
ChainRulesTestUtils = "1"
3246
Combinatorics = "1"
3347
FiniteDifferences = "0.12"
48+
GPUArrays = "11.2.6"
3449
LRUCache = "1.0.2"
3550
LinearAlgebra = "1"
3651
MatrixAlgebraKit = "0.5.0"
@@ -39,26 +54,32 @@ PackageExtensionCompat = "1"
3954
Random = "1"
4055
ScopedValues = "1.3.0"
4156
Strided = "2"
42-
TensorKitSectors = "0.3"
57+
TensorKitSectors = "0.2"
4358
TensorOperations = "5.1"
4459
Test = "1"
4560
TestExtras = "0.2,0.3"
4661
TupleTools = "1.1"
4762
VectorInterface = "0.4.8, 0.5"
4863
Zygote = "0.7"
64+
cuTENSOR = "2"
4965
julia = "1.10"
5066

5167
[extras]
68+
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
69+
AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
5270
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
71+
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
5372
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
5473
ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a"
5574
Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
5675
FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
76+
GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
5777
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
5878
TensorOperations = "6aa20fa7-93e2-5fca-9bc0-fbd0db3c71a2"
5979
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
6080
TestExtras = "5ed8adda-3752-4e41-b88a-e8b09835ee3a"
6181
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
82+
cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1"
6283

6384
[targets]
64-
test = ["Aqua", "Combinatorics", "LinearAlgebra", "TensorOperations", "Test", "TestExtras", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote"]
85+
test = ["Adapt", "AMDGPU", "Aqua", "Combinatorics", "CUDA", "cuTENSOR", "GPUArrays", "LinearAlgebra", "TensorOperations", "Test", "TestExtras", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote"]
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
module TensorKitAMDGPUExt
2+
3+
using AMDGPU, AMDGPU.rocBLAS, LinearAlgebra
4+
using AMDGPU: @allowscalar
5+
import AMDGPU: rand as rocrand, rand! as rocrand!, randn as rocrandn, randn! as rocrandn!
6+
7+
using TensorKit
8+
import TensorKit.VectorInterface: scalartype as vi_scalartype
9+
using TensorKit.Factorizations
10+
using TensorKit.Strided
11+
using TensorKit.Factorizations: AbstractAlgorithm
12+
using TensorKit: SectorDict, tensormaptype, scalar, similarstoragetype, AdjointTensorMap
13+
14+
using TensorKit.MatrixAlgebraKit
15+
16+
using Random
17+
18+
include("roctensormap.jl")
19+
20+
const ROCDiagonalTensorMap{T, S} = DiagonalTensorMap{T, S, ROCVector{T, AMDGPU.Mem.HIPBuffer}}
21+
22+
"""
23+
ROCDiagonalTensorMap{T}(undef, domain::S) where {T,S<:IndexSpace}
24+
# expert mode: select storage type `A`
25+
DiagonalTensorMap{T,S,A}(undef, domain::S) where {T,S<:IndexSpace,A<:DenseVector{T}}
26+
27+
Construct a `DiagonalTensorMap` with uninitialized data.
28+
"""
29+
function ROCDiagonalTensorMap{T}(::UndefInitializer, V::TensorMapSpace) where {T}
30+
(numin(V) == numout(V) == 1 && domain(V) == codomain(V)) ||
31+
throw(ArgumentError("DiagonalTensorMap requires a space with equal domain and codomain and 2 indices"))
32+
return ROCDiagonalTensorMap{T}(undef, domain(V))
33+
end
34+
function ROCDiagonalTensorMap{T}(::UndefInitializer, V::ProductSpace) where {T}
35+
length(V) == 1 ||
36+
throw(ArgumentError("DiagonalTensorMap requires `numin(d) == numout(d) == 1`"))
37+
return ROCDiagonalTensorMap{T}(undef, only(V))
38+
end
39+
function ROCDiagonalTensorMap{T}(::UndefInitializer, V::S) where {T, S <: IndexSpace}
40+
return ROCDiagonalTensorMap{T, S}(undef, V)
41+
end
42+
ROCDiagonalTensorMap(::UndefInitializer, V::IndexSpace) = ROCDiagonalTensorMap{Float64}(undef, V)
43+
44+
function ROCDiagonalTensorMap(data::ROCVector{T}, V::S) where {T, S}
45+
return ROCDiagonalTensorMap{T, S}(data, V)
46+
end
47+
48+
function ROCDiagonalTensorMap(data::Vector{T}, V::S) where {T, S}
49+
return ROCDiagonalTensorMap{T, S}(ROCVector{T}(data), V)
50+
end
51+
52+
function TensorKit.Factorizations.MAK.initialize_output(::typeof(svd_full!), t::ROCDiagonalTensorMap, alg::DiagonalAlgorithm)
53+
V_cod = fuse(codomain(t))
54+
V_dom = fuse(domain(t))
55+
U = similar(t, codomain(t) V_cod)
56+
S = ROCDiagonalTensorMap{real(scalartype(t))}(undef, V_cod V_dom)
57+
Vᴴ = similar(t, V_dom domain(t))
58+
return U, S, Vᴴ
59+
end
60+
61+
function TensorKit.Factorizations.MAK.initialize_output(::typeof(svd_vals!), t::ROCTensorMap, alg::AbstractAlgorithm)
62+
V_cod = infimum(fuse(codomain(t)), fuse(domain(t)))
63+
return ROCDiagonalTensorMap{real(scalartype(t))}(undef, V_cod)
64+
end
65+
66+
function TensorKit.Factorizations.MAK.initialize_output(::typeof(svd_compact!), t::ROCTensorMap, ::AbstractAlgorithm)
67+
V_cod = V_dom = infimum(fuse(codomain(t)), fuse(domain(t)))
68+
U = similar(t, codomain(t) V_cod)
69+
S = ROCDiagonalTensorMap{real(scalartype(t))}(undef, V_cod)
70+
Vᴴ = similar(t, V_dom domain(t))
71+
return U, S, Vᴴ
72+
end
73+
74+
function TensorKit.Factorizations.MAK.initialize_output(::typeof(eigh_full!), t::ROCTensorMap, ::AbstractAlgorithm)
75+
V_D = fuse(domain(t))
76+
T = real(scalartype(t))
77+
D = ROCDiagonalTensorMap{T}(undef, V_D)
78+
V = similar(t, codomain(t) V_D)
79+
return D, V
80+
end
81+
82+
function TensorKit.Factorizations.MAK.initialize_output(::typeof(eig_full!), t::ROCTensorMap, ::AbstractAlgorithm)
83+
V_D = fuse(domain(t))
84+
Tc = complex(scalartype(t))
85+
D = ROCDiagonalTensorMap{Tc}(undef, V_D)
86+
V = similar(t, Tc, codomain(t) V_D)
87+
return D, V
88+
end
89+
90+
function TensorKit.Factorizations.MAK.initialize_output(::typeof(eigh_vals!), t::ROCTensorMap, alg::AbstractAlgorithm)
91+
V_D = fuse(domain(t))
92+
T = real(scalartype(t))
93+
return D = ROCDiagonalTensorMap{Tc}(undef, V_D)
94+
end
95+
96+
function TensorKit.Factorizations.MAK.initialize_output(::typeof(eig_vals!), t::ROCTensorMap, alg::AbstractAlgorithm)
97+
V_D = fuse(domain(t))
98+
Tc = complex(scalartype(t))
99+
return D = ROCDiagonalTensorMap{Tc}(undef, V_D)
100+
end
101+
102+
103+
# TODO
104+
# add VectorInterface extensions for proper AMDGPU promotion
105+
function TensorKit.VectorInterface.promote_add(TA::Type{<:AMDGPU.StridedROCMatrix{Tx}}, TB::Type{<:AMDGPU.StridedROCMatrix{Ty}}, α::Tα = TensorKit.VectorInterface.One(), β::Tβ = TensorKit.VectorInterface.One()) where {Tx, Ty, Tα, Tβ}
106+
return Base.promote_op(add, Tx, Ty, Tα, Tβ)
107+
end
108+
109+
end

0 commit comments

Comments
 (0)