Skip to content

Commit ea7dd9b

Browse files
authored
Use optimize_gemm! from oneAPI.jl (#77)
1 parent f6de8b3 commit ea7dd9b

File tree

3 files changed

+15
-10
lines changed

3 files changed

+15
-10
lines changed

Project.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ KrylovPreconditionersCUDAExt = "CUDA"
2222
KrylovPreconditionersOneAPIExt = "oneAPI"
2323

2424
[compat]
25-
AMDGPU = "2"
25+
AMDGPU = "2.0.1"
2626
Adapt = "4"
2727
CUDA = "5.7.3"
2828
Graphs = "1.13.0"
@@ -34,7 +34,7 @@ Random = "1.10"
3434
SparseArrays = "1.10"
3535
Test = "1.10"
3636
julia = "1.10"
37-
oneAPI = "2.0.1"
37+
oneAPI = "2.1.0"
3838

3939
[extras]
4040
Krylov = "ba0b0d4f-ebba-5204-a429-3ac8c609bfb7"

ext/oneAPI/operators.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,9 @@ for (SparseMatrixType, BlasType) in ((:(oneSparseMatrixCSR{T}), :BlasFloat),)
1616
m,n = size(A)
1717
if nrhs == 1
1818
oneMKL.sparse_optimize_gemv!(transa, A)
19+
else
20+
oneMKL.sparse_optimize_gemm!(transa, 'N', nrhs, A)
1921
end
20-
# sparse_optimize_gemm! is only available with oneAPI > v2024.1.0
2122
return INTEL_KrylovOperator{T}(T, m, n, nrhs, transa, A)
2223
end
2324

test/gpu/gpu.jl

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ Random.seed!(666)
66
function test_ic0(FC, V, M)
77
n = 100
88
R = real(FC)
9+
tol = eps(R) |> sqrt
910
A_cpu = rand(FC, n, n)
1011
A_cpu = A_cpu * A_cpu'
1112
A_cpu = sparse(A_cpu)
@@ -19,9 +20,9 @@ function test_ic0(FC, V, M)
1920
r_gpu = b_gpu - A_gpu * x_gpu
2021
@test stats.niter ≤ 5
2122
if (FC <: ComplexF64) && V.body.name.name == :ROCArray
22-
@test_broken norm(r_gpu) ≤ 1e-6
23+
@test_broken norm(r_gpu) ≤ tol
2324
else
24-
@test norm(r_gpu) ≤ 1e-8
25+
@test norm(r_gpu) ≤ tol
2526
end
2627
2728
A_gpu = M(A_cpu + 200*I)
@@ -30,15 +31,16 @@ function test_ic0(FC, V, M)
3031
r_gpu = b_gpu - A_gpu * x_gpu
3132
@test stats.niter ≤ 5
3233
if (FC <: ComplexF64) && V.body.name.name == :ROCArray
33-
@test_broken norm(r_gpu) ≤ 1e-6
34+
@test_broken norm(r_gpu) ≤ tol
3435
else
35-
@test norm(r_gpu) ≤ 1e-8
36+
@test norm(r_gpu) ≤ tol
3637
end
3738
end
3839
3940
function test_ilu0(FC, V, M)
4041
n = 100
4142
R = real(FC)
43+
tol = eps(R) |> sqrt
4244
A_cpu = rand(FC, n, n)
4345
A_cpu = sparse(A_cpu)
4446
b_cpu = rand(FC, n)
@@ -50,17 +52,19 @@ function test_ilu0(FC, V, M)
5052
x_gpu, stats = gmres(A_gpu, b_gpu, N=P, ldiv=true)
5153
r_gpu = b_gpu - A_gpu * x_gpu
5254
@test stats.niter ≤ 5
53-
@test norm(r_gpu) ≤ 1e-8
55+
@test norm(r_gpu) ≤ tol
5456
5557
A_gpu = M(A_cpu + 200*I)
5658
update!(P, A_gpu)
5759
x_gpu, stats = gmres(A_gpu, b_gpu, N=P, ldiv=true)
5860
r_gpu = b_gpu - A_gpu * x_gpu
5961
@test stats.niter ≤ 5
60-
@test norm(r_gpu) ≤ 1e-8
62+
@test norm(r_gpu) ≤ tol
6163
end
6264
6365
function test_operator(FC, V, DM, SM)
66+
R = real(FC)
67+
tol = eps(R) |> sqrt
6468
m = 200
6569
n = 100
6670
A_cpu = rand(FC, n, n)
@@ -74,7 +78,7 @@ function test_operator(FC, V, DM, SM)
7478
x_gpu, stats = gmres(opA_gpu, b_gpu)
7579
r_gpu = b_gpu - A_gpu * x_gpu
7680
@test stats.solved
77-
@test norm(r_gpu) ≤ 1e-8
81+
@test norm(r_gpu) ≤ tol
7882
7983
A_cpu = rand(FC, m, n)
8084
A_cpu = sparse(A_cpu)

0 commit comments

Comments
 (0)