diff --git a/.gitignore b/.gitignore index c22bac86..736148be 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,8 @@ LocalPreferences.toml Manifest.toml +deps/onemkl_blas.cpp +deps/onemkl_blas.h +deps/onemkl_lapack.cpp +deps/onemkl_lapack.h +deps/onemkl_sparse.cpp +deps/onemkl_sparse.h diff --git a/deps/Project.toml b/deps/Project.toml index b329510b..b8446dbe 100644 --- a/deps/Project.toml +++ b/deps/Project.toml @@ -12,4 +12,4 @@ oneAPI_Level_Zero_Headers_jll = "f4bc562b-d309-54f8-9efb-476e56f0410d" oneAPI_Support_Headers_jll = "24f86df5-245d-5634-a4cc-32433d9800b3" [compat] -oneAPI_Support_Headers_jll = "=2025.1.0" +oneAPI_Support_Headers_jll = "=2025.2.0" diff --git a/deps/build_local.jl b/deps/build_local.jl index 52560f47..c8ae4e33 100644 --- a/deps/build_local.jl +++ b/deps/build_local.jl @@ -45,7 +45,7 @@ if !isfile(joinpath(conda_dir, "condarc-julia.yml")) touch(joinpath(conda_dir, "conda-meta", "history")) end Conda.add_channel("https://software.repos.intel.com/python/conda/", conda_dir) -Conda.add(["dpcpp_linux-64=2025.1.0", "mkl-devel-dpcpp=2025.1.0"], conda_dir) +Conda.add(["dpcpp_linux-64=2025.2.0", "mkl-devel-dpcpp=2025.2.0"], conda_dir) Conda.list(conda_dir) diff --git a/deps/generate_helpers.jl b/deps/generate_helpers.jl index f17d6786..01f162d9 100644 --- a/deps/generate_helpers.jl +++ b/deps/generate_helpers.jl @@ -1,6 +1,6 @@ non_parametric_routines = ["init_matrix_handle", "release_matrix_handle", "set_matrix_property", "init_matmat_descr", "release_matmat_descr", "set_matmat_data", "get_matmat_data", "matmat", -"omatcopy", "sort_matrix", "optimize_gemv", "optimize_trmv", "optimize_trsv", "optimize_trsm", +"omatcopy", "sort_matrix", "optimize_gemv", "optimize_gemm", "optimize_trmv", "optimize_trsv", "optimize_trsm", "init_omatconvert_descr", "release_omatconvert_descr", "init_omatadd_descr", "release_omatadd_descr", "omatconvert_buffer_size", "omatconvert_analyze", "omatconvert_get_nnz", "omatconvert", "omatadd_buffer_size", "omatadd_analyze", "omatadd_get_nnz"] diff --git a/deps/generate_interfaces.jl b/deps/generate_interfaces.jl index 50378398..a36fd0a8 100644 --- a/deps/generate_interfaces.jl +++ b/deps/generate_interfaces.jl @@ -241,6 +241,7 @@ function generate_headers(library::String, filename::Vector{String}, output::Str # The routine "optimize_trsm" has two versions. suffix = "" (name_routine == "optimize_trsm") && occursin("columns", header) && (suffix = "_advanced") + (name_routine == "optimize_gemm") && occursin("columns", header) && (suffix = "_advanced") name_routine ∈ ("set_csr_data", "set_coo_data") && occursin("int64_t", header) && (suffix = "_64") occursin("batch", name_routine) && !occursin("**", header) && (suffix = "_strided") diff --git a/deps/src/onemkl.cpp b/deps/src/onemkl.cpp index f10a3f57..de667728 100644 --- a/deps/src/onemkl.cpp +++ b/deps/src/onemkl.cpp @@ -4038,6 +4038,16 @@ extern "C" int onemklXsparse_optimize_trsv(syclQueue_t device_queue, onemklUplo return 0; } +extern "C" int onemklXsparse_optimize_gemm(syclQueue_t device_queue, onemklTranspose opA, matrix_handle_t A) { + auto status = oneapi::mkl::sparse::optimize_gemm(device_queue->val, convert(opA), (oneapi::mkl::sparse::matrix_handle_t) A, {}); + return 0; +} + +extern "C" int onemklXsparse_optimize_gemm_advanced(syclQueue_t device_queue, onemklLayout layout_val, onemklTranspose opA, onemklTranspose opB, matrix_handle_t A, int64_t columns) { + auto status = oneapi::mkl::sparse::optimize_gemm(device_queue->val, convert(layout_val), convert(opA), convert(opB), (oneapi::mkl::sparse::matrix_handle_t) A, columns, {}); + return 0; +} + extern "C" int onemklXsparse_optimize_trsm(syclQueue_t device_queue, onemklUplo uplo_val, onemklTranspose opA, onemklDiag diag_val, matrix_handle_t A) { auto status = oneapi::mkl::sparse::optimize_trsm(device_queue->val, convert(uplo_val), convert(opA), convert(diag_val), (oneapi::mkl::sparse::matrix_handle_t) A, {}); return 0; diff --git a/deps/src/onemkl.h b/deps/src/onemkl.h index cf0d3891..d6a448b9 100644 --- a/deps/src/onemkl.h +++ b/deps/src/onemkl.h @@ -2842,6 +2842,13 @@ int onemklXsparse_optimize_trmv(syclQueue_t device_queue, onemklUplo uplo_val, o int onemklXsparse_optimize_trsv(syclQueue_t device_queue, onemklUplo uplo_val, onemklTranspose opA, onemklDiag diag_val, matrix_handle_t A); +int onemklXsparse_optimize_gemm(syclQueue_t device_queue, onemklTranspose opA, matrix_handle_t + A); + +int onemklXsparse_optimize_gemm_advanced(syclQueue_t device_queue, onemklLayout layout_val, + onemklTranspose opA, onemklTranspose opB, + matrix_handle_t A, int64_t columns); + int onemklXsparse_optimize_trsm(syclQueue_t device_queue, onemklUplo uplo_val, onemklTranspose opA, onemklDiag diag_val, matrix_handle_t A); diff --git a/lib/mkl/wrappers_sparse.jl b/lib/mkl/wrappers_sparse.jl index e554330c..f39cbd03 100644 --- a/lib/mkl/wrappers_sparse.jl +++ b/lib/mkl/wrappers_sparse.jl @@ -127,6 +127,18 @@ for (fname, elty) in ((:onemklSsparse_gemm, :Float32), end end +function sparse_optimize_gemm!(trans::Char, A::oneSparseMatrixCSR) + queue = global_queue(context(A.nzVal), device(A.nzVal)) + onemklXsparse_optimize_gemm(sycl_queue(queue), trans, A.handle) + return A +end + +function sparse_optimize_gemm!(trans::Char, transB::Char, nrhs::Int, A::oneSparseMatrixCSR) + queue = global_queue(context(A.nzVal), device(A.nzVal)) + onemklXsparse_optimize_gemm_advanced(sycl_queue(queue), 'C', trans, transB, A.handle, nrhs) + return A +end + for (fname, elty) in ((:onemklSsparse_symv, :Float32), (:onemklDsparse_symv, :Float64), (:onemklCsparse_symv, :ComplexF32), diff --git a/lib/support/liboneapi_support.jl b/lib/support/liboneapi_support.jl index 90d2ed1c..9b5858f3 100644 --- a/lib/support/liboneapi_support.jl +++ b/lib/support/liboneapi_support.jl @@ -180,6 +180,14 @@ end ONEMKL_MATMAT_REQUEST_FINALIZE = 8 end +@cenum onemklOmatconvertAlg::UInt32 begin + ONEMKL_OMATCONVERT_DEFAULT_ALG = 0 +end + +@cenum onemklOmataddAlg::UInt32 begin + ONEMKL_OMATADD_DEFAULT_ALG = 0 +end + mutable struct matrix_handle end const matrix_handle_t = Ptr{matrix_handle} @@ -188,6 +196,14 @@ mutable struct matmat_descr end const matmat_descr_t = Ptr{matmat_descr} +mutable struct omatconvert_descr end + +const omatconvert_descr_t = Ptr{omatconvert_descr} + +mutable struct omatadd_descr end + +const omatadd_descr_t = Ptr{omatadd_descr} + function onemklHgemm_batch(device_queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size) @ccall liboneapi_support.onemklHgemm_batch(device_queue::syclQueue_t, @@ -6596,6 +6612,26 @@ function onemklXsparse_release_matmat_descr(p_desc) @ccall liboneapi_support.onemklXsparse_release_matmat_descr(p_desc::Ptr{matmat_descr_t})::Cint end +function onemklXsparse_init_omatconvert_descr(device_queue, p_descr) + @ccall liboneapi_support.onemklXsparse_init_omatconvert_descr(device_queue::syclQueue_t, + p_descr::Ptr{omatconvert_descr_t})::Cint +end + +function onemklXsparse_release_omatconvert_descr(device_queue, descr) + @ccall liboneapi_support.onemklXsparse_release_omatconvert_descr(device_queue::syclQueue_t, + descr::omatconvert_descr_t)::Cint +end + +function onemklXsparse_init_omatadd_descr(device_queue, p_omatadd_desc) + @ccall liboneapi_support.onemklXsparse_init_omatadd_descr(device_queue::syclQueue_t, + p_omatadd_desc::Ptr{omatadd_descr_t})::Cint +end + +function onemklXsparse_release_omatadd_descr(device_queue, omatadd_desc) + @ccall liboneapi_support.onemklXsparse_release_omatadd_descr(device_queue::syclQueue_t, + omatadd_desc::omatadd_descr_t)::Cint +end + function onemklXsparse_omatcopy(device_queue, transpose_val, spMat_in, spMat_out) @ccall liboneapi_support.onemklXsparse_omatcopy(device_queue::syclQueue_t, transpose_val::onemklTranspose, @@ -6658,6 +6694,22 @@ function onemklXsparse_optimize_trsv(device_queue, uplo_val, opA, diag_val, A) A::matrix_handle_t)::Cint end +function onemklXsparse_optimize_gemm(device_queue, opA, A) + @ccall liboneapi_support.onemklXsparse_optimize_gemm(device_queue::syclQueue_t, + opA::onemklTranspose, + A::matrix_handle_t)::Cint +end + +function onemklXsparse_optimize_gemm_advanced(device_queue, layout_val, opA, opB, A, + columns) + @ccall liboneapi_support.onemklXsparse_optimize_gemm_advanced(device_queue::syclQueue_t, + layout_val::onemklLayout, + opA::onemklTranspose, + opB::onemklTranspose, + A::matrix_handle_t, + columns::Int64)::Cint +end + function onemklXsparse_optimize_trsm(device_queue, uplo_val, opA, diag_val, A) @ccall liboneapi_support.onemklXsparse_optimize_trsm(device_queue::syclQueue_t, uplo_val::onemklUplo, diff --git a/test/Project.toml b/test/Project.toml index 62cdf0f8..a49bf765 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -18,4 +18,5 @@ StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" libigc_jll = "94295238-5935-5bd7-bb0f-b00942e9bdd5" +oneAPI = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b" oneAPI_Support_jll = "b049733a-a71d-5ed3-8eba-7d323ac00b36" diff --git a/test/onemkl.jl b/test/onemkl.jl index 50651584..e5b6541c 100644 --- a/test/onemkl.jl +++ b/test/onemkl.jl @@ -1132,6 +1132,7 @@ end alpha = rand(T) beta = rand(T) + oneMKL.sparse_optimize_gemm!(transa, dA) oneMKL.sparse_gemm!(transa, transb, alpha, dA, dB, beta, dC) @test alpha * opa(A) * opb(B) + beta * C ≈ collect(dC) end