Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ StaticArraysCore = "1e83bf80-4336-4d27-bf5d-d5a4f845583c"
UnPack = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"

[weakdeps]
AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
BandedMatrices = "aae01518-5342-5314-be14-df237901396f"
BlockDiagonals = "0a1fb500-61f7-11e9-3c65-f5ef3456f9f0"
blis_jll = "6136c539-28a5-5bf0-87cc-b183200dce32"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
CUDSS = "45b445bb-4962-46a0-9369-b4df9d0f772e"
CUSOLVERRF = "a8cc9031-bad2-4722-94f5-40deabb4245c"
Expand All @@ -48,8 +48,10 @@ Pardiso = "46dd5b70-b6fb-5a00-ae2d-e8fea33afaf2"
RecursiveFactorization = "f2c3362d-daeb-58d1-803e-2bc74f2840b4"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
Sparspak = "e56a9233-b9d6-4f03-8d0f-1825330902ac"
blis_jll = "6136c539-28a5-5bf0-87cc-b183200dce32"

[extensions]
LinearSolveAMDGPUExt = "AMDGPU"
LinearSolveBLISExt = ["blis_jll", "LAPACK_jll"]
LinearSolveBandedMatricesExt = "BandedMatrices"
LinearSolveBlockDiagonalsExt = "BlockDiagonals"
Expand All @@ -71,12 +73,12 @@ LinearSolveSparseArraysExt = "SparseArrays"
LinearSolveSparspakExt = ["SparseArrays", "Sparspak"]

[compat]
AMDGPU = "1"
AllocCheck = "0.2"
Aqua = "0.8"
ArrayInterface = "7.7"
BandedMatrices = "1.5"
BlockDiagonals = "0.2"
blis_jll = "0.9.0"
CUDA = "5"
CUDSS = "0.4"
CUSOLVERRF = "0.2.6"
Expand Down Expand Up @@ -126,6 +128,7 @@ StaticArraysCore = "1.4.2"
Test = "1"
UnPack = "1"
Zygote = "0.7"
blis_jll = "0.9.0"
julia = "1.10"

[extras]
Expand Down
68 changes: 68 additions & 0 deletions ext/LinearSolveAMDGPUExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
module LinearSolveAMDGPUExt

using AMDGPU
using LinearSolve: LinearSolve, LinearCache, AMDGPUOffloadLUFactorization,
AMDGPUOffloadQRFactorization, init_cacheval, OperatorAssumptions
using LinearSolve.LinearAlgebra, LinearSolve.SciMLBase

# LU Factorization
function SciMLBase.solve!(cache::LinearSolve.LinearCache, alg::AMDGPUOffloadLUFactorization;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[JuliaFormatter] reported by reviewdog 🐶

Suggested change
function SciMLBase.solve!(cache::LinearSolve.LinearCache, alg::AMDGPUOffloadLUFactorization;
function SciMLBase.solve!(
cache::LinearSolve.LinearCache, alg::AMDGPUOffloadLUFactorization;

kwargs...)
if cache.isfresh
fact = AMDGPU.rocSOLVER.getrf!(AMDGPU.ROCArray(cache.A))
cache.cacheval = fact
cache.isfresh = false
end

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[JuliaFormatter] reported by reviewdog 🐶

Suggested change

A_gpu, ipiv = cache.cacheval
b_gpu = AMDGPU.ROCArray(cache.b)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[JuliaFormatter] reported by reviewdog 🐶

Suggested change

AMDGPU.rocSOLVER.getrs!('N', A_gpu, ipiv, b_gpu)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[JuliaFormatter] reported by reviewdog 🐶

Suggested change

y = Array(b_gpu)
cache.u .= y
SciMLBase.build_linear_solution(alg, y, nothing, cache)
end

function LinearSolve.init_cacheval(alg::AMDGPUOffloadLUFactorization, A, b, u, Pl, Pr,
maxiters::Int, abstol, reltol, verbose::Bool,
assumptions::OperatorAssumptions)
AMDGPU.rocSOLVER.getrf!(AMDGPU.ROCArray(A))
end

# QR Factorization
function SciMLBase.solve!(cache::LinearSolve.LinearCache, alg::AMDGPUOffloadQRFactorization;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[JuliaFormatter] reported by reviewdog 🐶

Suggested change
function SciMLBase.solve!(cache::LinearSolve.LinearCache, alg::AMDGPUOffloadQRFactorization;
function SciMLBase.solve!(
cache::LinearSolve.LinearCache, alg::AMDGPUOffloadQRFactorization;

kwargs...)
if cache.isfresh
A_gpu = AMDGPU.ROCArray(cache.A)
tau = AMDGPU.ROCVector{eltype(A_gpu)}(undef, min(size(A_gpu)...))
AMDGPU.rocSOLVER.geqrf!(A_gpu, tau)
cache.cacheval = (A_gpu, tau)
cache.isfresh = false
end

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[JuliaFormatter] reported by reviewdog 🐶

Suggested change

A_gpu, tau = cache.cacheval
b_gpu = AMDGPU.ROCArray(cache.b)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[JuliaFormatter] reported by reviewdog 🐶

Suggested change

# Apply Q^T to b
AMDGPU.rocSOLVER.ormqr!('L', 'T', A_gpu, tau, b_gpu)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[JuliaFormatter] reported by reviewdog 🐶

Suggested change

# Solve the upper triangular system
m, n = size(A_gpu)
AMDGPU.rocBLAS.trsv!('U', 'N', 'N', n, A_gpu, b_gpu)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[JuliaFormatter] reported by reviewdog 🐶

Suggested change

y = Array(b_gpu[1:n])
cache.u .= y
SciMLBase.build_linear_solution(alg, y, nothing, cache)
end

function LinearSolve.init_cacheval(alg::AMDGPUOffloadQRFactorization, A, b, u, Pl, Pr,
maxiters::Int, abstol, reltol, verbose::Bool,
assumptions::OperatorAssumptions)
A_gpu = AMDGPU.ROCArray(A)
tau = AMDGPU.ROCVector{eltype(A_gpu)}(undef, min(size(A_gpu)...))
AMDGPU.rocSOLVER.geqrf!(A_gpu, tau)
(A_gpu, tau)
end

end
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[JuliaFormatter] reported by reviewdog 🐶

Suggested change
end
end

2 changes: 1 addition & 1 deletion src/LinearSolve.jl
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ export KrylovJL, KrylovJL_CG, KrylovJL_MINRES, KrylovJL_GMRES,
export SimpleGMRES

export HYPREAlgorithm
export CudaOffloadFactorization
export CudaOffloadFactorization, AMDGPUOffloadLUFactorization, AMDGPUOffloadQRFactorization
export MKLPardisoFactorize, MKLPardisoIterate
export PanuaPardisoFactorize, PanuaPardisoIterate
export PardisoJL
Expand Down
42 changes: 42 additions & 0 deletions src/extension_algs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,48 @@ struct CudaOffloadFactorization <: LinearSolve.AbstractFactorization
end
end

"""
`AMDGPUOffloadLUFactorization()`

An offloading technique using LU factorization to GPU-accelerate CPU-based computations on AMD GPUs.
Requires a sufficiently large `A` to overcome the data transfer costs.

!!! note

Using this solver requires adding the package AMDGPU.jl, i.e. `using AMDGPU`
"""
struct AMDGPUOffloadLUFactorization <: LinearSolve.AbstractFactorization
function AMDGPUOffloadLUFactorization()
ext = Base.get_extension(@__MODULE__, :LinearSolveAMDGPUExt)
if ext === nothing
error("AMDGPUOffloadLUFactorization requires that AMDGPU is loaded, i.e. `using AMDGPU`")
else
return new{}()
end
end
end

"""
`AMDGPUOffloadQRFactorization()`

An offloading technique using QR factorization to GPU-accelerate CPU-based computations on AMD GPUs.
Requires a sufficiently large `A` to overcome the data transfer costs.

!!! note

Using this solver requires adding the package AMDGPU.jl, i.e. `using AMDGPU`
"""
struct AMDGPUOffloadQRFactorization <: LinearSolve.AbstractFactorization
function AMDGPUOffloadQRFactorization()
ext = Base.get_extension(@__MODULE__, :LinearSolveAMDGPUExt)
if ext === nothing
error("AMDGPUOffloadQRFactorization requires that AMDGPU is loaded, i.e. `using AMDGPU`")
else
return new{}()
end
end
end

## RFLUFactorization

"""
Expand Down
Loading