Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,22 @@ VectorInterface = "409d34a3-91d5-4945-b6ec-7529ddf182d8"

[weakdeps]
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1"

[extensions]
TensorKitAdaptExt = "Adapt"
TensorKitAMDGPUExt = "AMDGPU"
TensorKitCUDAExt = ["CUDA", "cuTENSOR"]
TensorKitChainRulesCoreExt = "ChainRulesCore"
TensorKitFiniteDifferencesExt = "FiniteDifferences"

[compat]
Adapt = "4"
AMDGPU = "2"
Aqua = "0.6, 0.7, 0.8"
ArgParse = "1.2.0"
CUDA = "5.9"
Expand Down Expand Up @@ -61,6 +64,7 @@ julia = "1.10"

[extras]
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
Expand All @@ -78,4 +82,4 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1"

[targets]
test = ["ArgParse", "Adapt", "Aqua", "Combinatorics", "CUDA", "cuTENSOR", "GPUArrays", "LinearAlgebra", "SafeTestsets", "TensorOperations", "Test", "TestExtras", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote"]
test = ["ArgParse", "Adapt", "Aqua", "Combinatorics", "AMDGPU", "CUDA", "cuTENSOR", "GPUArrays", "LinearAlgebra", "SafeTestsets", "TensorOperations", "Test", "TestExtras", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote"]
20 changes: 20 additions & 0 deletions ext/TensorKitAMDGPUExt/TensorKitAMDGPUExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
module TensorKitAMDGPUExt

using AMDGPU, AMDGPU.rocBLAS, AMDGPU.rocSOLVER, LinearAlgebra
using AMDGPU: @allowscalar
import AMDGPU: rand as rocrand, rand! as rocrand!, randn as rocrandn, randn! as rocrandn!

using TensorKit
using TensorKit.Factorizations
using TensorKit.Strided
using TensorKit.Factorizations: AbstractAlgorithm
using TensorKit: SectorDict, tensormaptype, scalar, similarstoragetype, AdjointTensorMap, scalartype, project_symmetric_and_check
import TensorKit: randisometry, rand, randn

using TensorKit: MatrixAlgebraKit

using Random

include("roctensormap.jl")

end
166 changes: 166 additions & 0 deletions ext/TensorKitAMDGPUExt/roctensormap.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
const ROCTensorMap{T, S, N₁, N₂} = TensorMap{T, S, N₁, N₂, ROCVector{T, AMDGPU.Mem.HIPBuffer}}
const ROCTensor{T, S, N} = ROCTensorMap{T, S, N, 0}

const AdjointROCTensorMap{T, S, N₁, N₂} = AdjointTensorMap{T, S, N₁, N₂, ROCTensorMap{T, S, N₁, N₂}}

function ROCTensorMap(t::TensorMap{T, S, N₁, N₂, A}) where {T, S, N₁, N₂, A}
return ROCTensorMap{T, S, N₁, N₂}(ROCArray{T}(t.data), space(t))
end

# project_symmetric! doesn't yet work for GPU types, so do this on the host, then copy
function TensorKit.project_symmetric_and_check(::Type{T}, ::Type{A}, data::AbstractArray, V::TensorMapSpace; tol = sqrt(eps(real(float(eltype(data)))))) where {T, A <: ROCVector{T}}
h_t = TensorKit.TensorMapWithStorage{T, Vector{T}}(undef, V)
h_t = TensorKit.project_symmetric!(h_t, Array(data))
# verify result
isapprox(Array(reshape(data, dims(h_t))), convert(Array, h_t); atol = tol) ||
throw(ArgumentError("Data has non-zero elements at incompatible positions"))
return TensorKit.TensorMapWithStorage{T, A}(A(h_t.data), V)
end

for (fname, felt) in ((:zeros, :zero), (:ones, :one))
@eval begin
function AMDGPU.$fname(
codomain::TensorSpace{S},
domain::TensorSpace{S} = one(codomain)
) where {S <: IndexSpace}
return AMDGPU.$fname(codomain ← domain)
end
function AMDGPU.$fname(
::Type{T}, codomain::TensorSpace{S},
domain::TensorSpace{S} = one(codomain)
) where {T, S <: IndexSpace}
return AMDGPU.$fname(T, codomain ← domain)
end
AMDGPU.$fname(V::TensorMapSpace) = AMDGPU.$fname(Float64, V)
function AMDGPU.$fname(::Type{T}, V::TensorMapSpace) where {T}
t = ROCTensorMap{T}(undef, V)
fill!(t, $felt(T))
return t
end
end
end

for randfun in (:rocrand, :rocrandn)
randfun! = Symbol(randfun, :!)
@eval begin
# converting `codomain` and `domain` into `HomSpace`
function $randfun(
codomain::TensorSpace{S},
domain::TensorSpace{S} = one(codomain),
) where {S <: IndexSpace}
return $randfun(codomain ← domain)
end
function $randfun(
::Type{T}, codomain::TensorSpace{S},
domain::TensorSpace{S} = one(codomain),
) where {T, S <: IndexSpace}
return $randfun(T, codomain ← domain)
end
function $randfun(
rng::Random.AbstractRNG, ::Type{T},
codomain::TensorSpace{S},
domain::TensorSpace{S} = one(codomain),
) where {T, S <: IndexSpace}
return $randfun(rng, T, codomain ← domain)
end

# filling in default eltype
$randfun(V::TensorMapSpace) = $randfun(Float64, V)
function $randfun(rng::Random.AbstractRNG, V::TensorMapSpace)
return $randfun(rng, Float64, V)
end

# filling in default rng
function $randfun(::Type{T}, V::TensorMapSpace) where {T}
return $randfun(Random.default_rng(), T, V)
end

# implementation
function $randfun(
rng::Random.AbstractRNG, ::Type{T},
V::TensorMapSpace
) where {T}
t = ROCTensorMap{T}(undef, V)
$randfun!(rng, t)
return t
end

function $randfun!(rng::Random.AbstractRNG, t::ROCTensorMap)
for (_, b) in blocks(t)
$randfun!(rng, b)
end
return t
end
end
end

# Scalar implementation
#-----------------------
function TensorKit.scalar(t::ROCTensorMap{T, S, 0, 0}) where {T, S}
inds = findall(!iszero, t.data)
return isempty(inds) ? zero(scalartype(t)) : @allowscalar @inbounds t.data[only(inds)]
end

function Base.convert(
TT::Type{ROCTensorMap{T, S, N₁, N₂}},
t::AbstractTensorMap{<:Any, S, N₁, N₂}
) where {T, S, N₁, N₂}
if typeof(t) === TT
return t
else
tnew = TT(undef, space(t))
return copy!(tnew, t)
end
end

function LinearAlgebra.isposdef(t::ROCTensorMap)
domain(t) == codomain(t) ||
throw(SpaceMismatch("`isposdef` requires domain and codomain to be the same"))
InnerProductStyle(spacetype(t)) === EuclideanInnerProduct() || return false
for (c, b) in blocks(t)
# do our own hermitian check
isherm = MatrixAlgebraKit.ishermitian(b; atol = eps(real(eltype(b))), rtol = eps(real(eltype(b))))
isherm || return false
isposdef(Hermitian(b)) || return false
end
return true
end

function Base.promote_rule(
::Type{<:TT₁},
::Type{<:TT₂}
) where {
S, N₁, N₂, TTT₁, TTT₂,
TT₁ <: ROCTensorMap{TTT₁, S, N₁, N₂},
TT₂ <: ROCTensorMap{TTT₂, S, N₁, N₂},
}
T = TensorKit.VectorInterface.promote_add(TTT₁, TTT₂)
return ROCTensorMap{T, S, N₁, N₂}
end

# ROCTensorMap exponentation:
function TensorKit.exp!(t::ROCTensorMap)
domain(t) == codomain(t) ||
error("Exponential of a tensor only exist when domain == codomain.")
!MatrixAlgebraKit.ishermitian(t) && throw(ArgumentError("`exp!` is currently only supported on hermitian AMDGPU tensors"))
for (c, b) in blocks(t)
copy!(b, parent(Base.exp(Hermitian(b))))
end
return t
end

# functions that don't map ℝ to (a subset of) ℝ
for f in (:sqrt, :log, :asin, :acos, :acosh, :atanh, :acoth)
sf = string(f)
@eval function Base.$f(t::ROCTensorMap)
domain(t) == codomain(t) ||
throw(SpaceMismatch("`$($sf)` of a tensor only exists when domain == codomain"))
!MatrixAlgebraKit.ishermitian(t) && throw(ArgumentError("`$($sf)` is currently only supported on hermitian AMDGPU tensors"))
T = complex(float(scalartype(t)))
tf = similar(t, T)
for (c, b) in blocks(t)
copy!(block(tf, c), parent($f(Hermitian(b))))
end
return tf
end
end
Loading
Loading