Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 102 additions & 0 deletions .github/workflows/pr-arm.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
name: "PR Tests (aarch64)"
permissions: read-all

# Trigger for PR and merge to develop branch
on:
push:
branches: develop
pull_request:
workflow_dispatch:

env:
CTEST_OUTPUT_ON_FAILURE: 1
LAPACK_VERSION: 3.12.0
ARMPL_VERSION: 25.04.1
DPCPP_VERSION: nightly-2025-04-01

jobs:
unit-tests:
runs-on: ubuntu-24.04-arm
# One runner for each domain
strategy:
fail-fast: false
matrix:
include:
- config: oneMath BLAS
domain: blas
build_options: -DREF_BLAS_ROOT=${PWD}/lapack/install
- config: oneMath LAPACK
domain: lapack
build_options: -DREF_LAPACK_ROOT=${PWD}/lapack/install
- config: oneMath RNG
domain: rng
test_options: -E 'Device|DEVICE'
name: unit tests ${{ matrix.config }} CPU
steps:
- uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b # v4.1.5
- name: Check if the changes affect this domain
id: domain_check
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
with:
script: |
const domainCheck = require('.github/scripts/domain-check.js')
return domainCheck({github, context, domain: "${{ matrix.domain }}"})
- name: Restore netlib from cache
id: cache-lapack
uses: actions/cache@d4323d4df104b026a6aa633fdb11d772146be0bf # v4.2.2
with:
path: lapack/install
key: lapack-${{ env.LAPACK_VERSION }}-arm
- name: Install netlib
if: steps.domain_check.outputs.result == 'true' && steps.cache-lapack.outputs.cache-hit != 'true'
run: |
curl -sL https://github.com/Reference-LAPACK/lapack/archive/refs/tags/v${LAPACK_VERSION}.tar.gz | tar zx
SHARED_OPT="lapack-${LAPACK_VERSION} -DBUILD_SHARED_LIBS=on -DCBLAS=on -DLAPACKE=on -DCMAKE_INSTALL_PREFIX=${PWD}/lapack/install -G Ninja"
# 32 bit int
cmake ${SHARED_OPT} -B lapack/build32
cmake --build lapack/build32 --target install
# 64 bit int
cmake ${SHARED_OPT} -DBUILD_INDEX64=on -B lapack/build64
cmake --build lapack/build64 --target install
- name: Restore DPC++ from cache
id: cache-dpcpp
uses: actions/cache@d4323d4df104b026a6aa633fdb11d772146be0bf # v4.2.2
with:
path: dpcpp/install
key: dpcpp-${{ env.DPCPP_VERSION }}-arm
- name: Build DPC++
if: steps.domain_check.outputs.result == 'true' && steps.cache-dpcpp.outputs.cache-hit != 'true'
run: |
wget -P dpcpp https://github.com/intel/llvm/archive/refs/tags/${{ env.DPCPP_VERSION }}.tar.gz
cd dpcpp
tar -xzf ${{ env.DPCPP_VERSION }}.tar.gz
python llvm-${{ env.DPCPP_VERSION }}/buildbot/configure.py -o build -t Release --cmake-gen Ninja --native_cpu --cmake-opt="-DSYCL_ENABLE_BACKENDS=native_cpu" --cmake-opt="-DCMAKE_INSTALL_PREFIX=$PWD/install" --llvm-external-projects=openmp
cd build
ninja deploy-sycl-toolchain omp install
- name: Install ArmPL
if: steps.domain_check.outputs.result == 'true'
run: |
mkdir armpl
cd armpl
wget https://developer.arm.com/-/cdn-downloads/permalink/Arm-Performance-Libraries/Version_${{ env.ARMPL_VERSION }}/arm-performance-libraries_${{ env.ARMPL_VERSION }}_deb_gcc.tar
tar -xf arm-performance-libraries_${{ env.ARMPL_VERSION }}_deb_gcc.tar
./arm-performance-libraries_${{ env.ARMPL_VERSION }}_deb/arm-performance-libraries_${{ env.ARMPL_VERSION }}_deb.sh -a -i $PWD/install
- name: Configure/Build for a domain
if: steps.domain_check.outputs.result == 'true'
run: |
export PATH=${PWD}/armpl/install/armpl_${{ env.ARMPL_VERSION }}_gcc/bin:${PWD}/dpcpp/install/bin:${PATH}
export CPATH=${PWD}/armpl/install/armpl_${{ env.ARMPL_VERSION }}_gcc/include:${PWD}/dpcpp/install/include:${CPATH}
export LIBRARY_PATH=${PWD}/armpl/install/armpl_${{ env.ARMPL_VERSION }}_gcc/lib:${PWD}/dpcpp/install/lib:${PWD}/dpcpp/install/lib/aarch64-unknown-linux-gnu:${LIBRARY_PATH}
export LD_LIBRARY_PATH=${PWD}/armpl/install/armpl_${{ env.ARMPL_VERSION }}_gcc/lib:${PWD}/dpcpp/install/lib:${PWD}/dpcpp/install/lib/aarch64-unknown-linux-gnu:${LD_LIBRARY_PATH}
sycl-ls
cmake -DTARGET_DOMAINS=${{ matrix.domain }} -DENABLE_MKLCPU_BACKEND=off -DENABLE_MKLGPU_BACKEND=off -DENABLE_ARMPL_BACKEND=on -DARMPL_ROOT=${PWD}/armpl/install/armpl_${{ env.ARMPL_VERSION }}_gcc -DCMAKE_CXX_FLAGS='-fopenmp' -DCMAKE_VERBOSE_MAKEFILE=on ${{ matrix.build_options }} -B build -G Ninja
cmake --build build
- name: Run tests
if: steps.domain_check.outputs.result == 'true'
run: |
export PATH=${PWD}/armpl/install/armpl_${{ env.ARMPL_VERSION }}_gcc/bin:${PWD}/dpcpp/install/bin:${PATH}
export CPATH=${PWD}/armpl/install/armpl_${{ env.ARMPL_VERSION }}_gcc/include:${PWD}/dpcpp/install/include:${CPATH}
export LIBRARY_PATH=${PWD}/armpl/install/armpl_${{ env.ARMPL_VERSION }}_gcc/lib:${PWD}/dpcpp/install/lib:${PWD}/dpcpp/install/lib/aarch64-unknown-linux-gnu:${LIBRARY_PATH}
export LD_LIBRARY_PATH=${PWD}/armpl/install/armpl_${{ env.ARMPL_VERSION }}_gcc/lib:${PWD}/dpcpp/install/lib:${PWD}/dpcpp/install/lib/aarch64-unknown-linux-gnu:${LD_LIBRARY_PATH}
sycl-ls
ctest --test-dir build ${{ matrix.test_options }}
2 changes: 1 addition & 1 deletion .github/workflows/pr.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: "PR Tests"
name: "PR Tests (x86_64)"
permissions: read-all

# Trigger for PR and merge to develop branch
Expand Down
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

# oneAPI Math Library (oneMath)

[![OpenSSF
Scorecard](https://api.securityscorecards.dev/projects/github.com/uxlfoundation/oneMath/badge)](https://securityscorecards.dev/viewer/?uri=github.com%2Fuxlfoundation%2FoneMath)

oneMath is an open-source implementation of the [oneMath specification](https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemath/source/). It can work with multiple devices using multiple libraries (backends) underneath. The oneMath project was previously referred to as oneMKL Interfaces.

oneMath is part of the [UXL Foundation](http://www.uxlfoundation.org).
Expand Down Expand Up @@ -338,7 +341,7 @@ Supported compilers include:
<tr>
<td rowspan=2 align="center">NVIDIA GPU</td>
<td align="center">NVIDIA cuFFT</td>
<td align="center">Open DPC++</td>
<td align="center">Open DPC++</br>AdaptiveCpp</td>
<td align="center">Dynamic, Static</td>
</tr>
<tr>
Expand All @@ -349,7 +352,7 @@ Supported compilers include:
<tr>
<td rowspan=2 align="center">AMD GPU</td>
<td align="center">AMD rocFFT</td>
<td align="center">Open DPC++</td>
<td align="center">Open DPC++</br>AdaptiveCpp</td>
<td align="center">Dynamic, Static</td>
</tr>
<tr>
Expand Down
2 changes: 1 addition & 1 deletion cmake/FindARMPL.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#===============================================================================

include_guard()
set(ARMPL_SEQ armpl_intp64)
set(ARMPL_SEQ armpl_int64)
set(ARMPL_OMP armpl_int64_mp)

include(FindPackageHandleStandardArgs)
Expand Down
18 changes: 12 additions & 6 deletions docs/building_the_project_with_adaptivecpp.rst
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ additional guidance. The target architectures must be specified with
``HIP_TARGETS``. See the `AdaptiveCpp documentation
<https://github.com/AdaptiveCpp/AdaptiveCpp/blob/develop/doc/using-hipsycl.md#adaptivecpp-targets-specification>`_.

If a backend library supports multiple domains (i.e. BLAS, RNG), it may be
If a backend library supports multiple domains (i.e. BLAS, DFT, RNG), it may be
desirable to only enable selected domains. For this, the ``TARGET_DOMAINS``
variable should be set. For further details, see :ref:`_build_target_domains`.

Expand All @@ -81,6 +81,9 @@ The most important supported build options are:
* - ENABLE_CUBLAS_BACKEND
- True, False
- False
* - ENABLE_CUFFT_BACKEND
- True, False
- False
* - ENABLE_CURAND_BACKEND
- True, False
- False
Expand All @@ -93,6 +96,9 @@ The most important supported build options are:
* - ENABLE_ROCBLAS_BACKEND
- True, False
- False
* - ENABLE_ROCFFT_BACKEND
- True, False
- False
* - ENABLE_ROCRAND_BACKEND
- True, False
- False
Expand All @@ -106,7 +112,7 @@ The most important supported build options are:
- True, False
- True
* - TARGET_DOMAINS (list)
- blas, rng
- blas, dft, rng
- All supported domains

Some additional build options are given in
Expand All @@ -120,8 +126,8 @@ Backends
Building for CUDA
~~~~~~~~~~~~~~~~~

The CUDA backends can be enabled with ``ENABLE_CUBLAS_BACKEND`` and
``ENABLE_CURAND_BACKEND``.
The CUDA backends can be enabled with ``ENABLE_CUBLAS_BACKEND``,
``ENABLE_CUFFT_BACKEND`` and ``ENABLE_CURAND_BACKEND``.

The target architecture must be set using the ``HIPSYCL_TARGETS`` parameter. For
example, to target a Nvidia A100 (Ampere architecture), set
Expand All @@ -140,8 +146,8 @@ the CUDA libraries should be found automatically by CMake.
Building for ROCm
~~~~~~~~~~~~~~~~~

The ROCm backends can be enabled with ``ENABLE_ROCBLAS_BACKEND`` and
``ENABLE_ROCRAND_BACKEND``.
The ROCm backends can be enabled with ``ENABLE_ROCBLAS_BACKEND``,
``ENABLE_ROCFFT_BACKEND`` and ``ENABLE_ROCRAND_BACKEND``.

The target architecture must be set using the ``HIPSYCL_TARGETS`` parameter. See
the `AdaptiveCpp documentation
Expand Down
18 changes: 9 additions & 9 deletions src/dft/backends/cufft/backward.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ ONEMATH_EXPORT void compute_backward(descriptor_type& desc,
auto stream = detail::setup_stream(func_name, ih, plan);

auto inout_native = reinterpret_cast<fwd<descriptor_type>*>(
ih.get_native_mem<sycl::backend::ext_oneapi_cuda>(inout_acc));
ih.get_native_mem<detail::sycl_cuda_backend>(inout_acc));
detail::cufft_execute<detail::Direction::Backward, fwd<descriptor_type>>(
func_name, stream, plan, reinterpret_cast<void*>(inout_native + offsets[0]),
reinterpret_cast<void*>(inout_native + offsets[1]));
Expand Down Expand Up @@ -121,14 +121,14 @@ ONEMATH_EXPORT void compute_backward(descriptor_type& desc,
dft::detail::fft_enqueue_task(cgh, [=](sycl::interop_handle ih) {
auto stream = detail::setup_stream(func_name, ih, plan);

auto in_native = reinterpret_cast<void*>(
reinterpret_cast<bwd<descriptor_type>*>(
ih.get_native_mem<sycl::backend::ext_oneapi_cuda>(in_acc)) +
offsets[0]);
auto out_native = reinterpret_cast<void*>(
reinterpret_cast<fwd<descriptor_type>*>(
ih.get_native_mem<sycl::backend::ext_oneapi_cuda>(out_acc)) +
offsets[1]);
auto in_native =
reinterpret_cast<void*>(reinterpret_cast<bwd<descriptor_type>*>(
ih.get_native_mem<detail::sycl_cuda_backend>(in_acc)) +
offsets[0]);
auto out_native =
reinterpret_cast<void*>(reinterpret_cast<fwd<descriptor_type>*>(
ih.get_native_mem<detail::sycl_cuda_backend>(out_acc)) +
offsets[1]);
detail::cufft_execute<detail::Direction::Backward, fwd<descriptor_type>>(
func_name, stream, plan, in_native, out_native);
});
Expand Down
10 changes: 6 additions & 4 deletions src/dft/backends/cufft/commit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@
#include "oneapi/math/dft/detail/cufft/onemath_dft_cufft.hpp"
#include "oneapi/math/dft/types.hpp"

#include "execute_helper.hpp"
#include "../../execute_helper_generic.hpp"
#include "../stride_helper.hpp"

#include <cufft.h>
Expand Down Expand Up @@ -84,7 +86,7 @@ class cufft_commit final : public dft::detail::commit_impl<prec, dom> {
if (fix_context) {
// cufftDestroy changes the context so change it back.
CUdevice interopDevice =
sycl::get_native<sycl::backend::ext_oneapi_cuda>(this->get_queue().get_device());
sycl::get_native<sycl_cuda_backend>(this->get_queue().get_device());
CUcontext interopContext;
if (cuDevicePrimaryCtxRetain(&interopContext, interopDevice) != CUDA_SUCCESS) {
throw math::exception("dft/backends/cufft", __FUNCTION__,
Expand Down Expand Up @@ -353,16 +355,16 @@ class cufft_commit final : public dft::detail::commit_impl<prec, dom> {
.submit([&](sycl::handler& cgh) {
auto workspace_acc =
buffer_workspace.template get_access<sycl::access::mode::read_write>(cgh);
cgh.host_task([=](sycl::interop_handle ih) {
auto stream = ih.get_native_queue<sycl::backend::ext_oneapi_cuda>();
dft::detail::fft_enqueue_task(cgh, [=](sycl::interop_handle ih) {
auto stream = ih.get_native_queue<sycl_cuda_backend>();
auto result = cufftSetStream(plan, stream);
if (result != CUFFT_SUCCESS) {
throw oneapi::math::exception(
"dft/backends/cufft", "set_workspace",
"cufftSetStream returned " + std::to_string(result));
}
auto workspace_native = reinterpret_cast<scalar_type*>(
ih.get_native_mem<sycl::backend::ext_oneapi_cuda>(workspace_acc));
ih.get_native_mem<sycl_cuda_backend>(workspace_acc));
cufftSetWorkArea(plan, workspace_native);
});
})
Expand Down
8 changes: 7 additions & 1 deletion src/dft/backends/cufft/execute_helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@

namespace oneapi::math::dft::cufft::detail {

#if defined(__ADAPTIVECPP__) || defined(__HIPSYCL__)
constexpr auto sycl_cuda_backend{ sycl::backend::cuda };
#else // DPC++
constexpr auto sycl_cuda_backend{ sycl::backend::ext_oneapi_cuda };
#endif

template <dft::precision prec, dft::domain dom>
inline dft::detail::commit_impl<prec, dom>* checked_get_commit(
dft::detail::descriptor<prec, dom>& desc) {
Expand Down Expand Up @@ -142,7 +148,7 @@ void cufft_execute(const std::string& func, CUstream stream, cufftHandle plan, v
}

inline CUstream setup_stream(const std::string& func, sycl::interop_handle ih, cufftHandle plan) {
auto stream = ih.get_native_queue<sycl::backend::ext_oneapi_cuda>();
auto stream = ih.get_native_queue<sycl_cuda_backend>();
auto result = cufftSetStream(plan, stream);
if (result != CUFFT_SUCCESS) {
throw oneapi::math::exception("dft/backends/cufft", func,
Expand Down
18 changes: 9 additions & 9 deletions src/dft/backends/cufft/forward.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ ONEMATH_EXPORT void compute_forward(descriptor_type& desc,
auto stream = detail::setup_stream(func_name, ih, plan);

auto inout_native = reinterpret_cast<fwd<descriptor_type>*>(
ih.get_native_mem<sycl::backend::ext_oneapi_cuda>(inout_acc));
ih.get_native_mem<detail::sycl_cuda_backend>(inout_acc));
detail::cufft_execute<detail::Direction::Forward, fwd<descriptor_type>>(
func_name, stream, plan, reinterpret_cast<void*>(inout_native + offsets[0]),
reinterpret_cast<void*>(inout_native + offsets[1]));
Expand Down Expand Up @@ -124,14 +124,14 @@ ONEMATH_EXPORT void compute_forward(descriptor_type& desc,
dft::detail::fft_enqueue_task(cgh, [=](sycl::interop_handle ih) {
auto stream = detail::setup_stream(func_name, ih, plan);

auto in_native = reinterpret_cast<void*>(
reinterpret_cast<fwd<descriptor_type>*>(
ih.get_native_mem<sycl::backend::ext_oneapi_cuda>(in_acc)) +
offsets[0]);
auto out_native = reinterpret_cast<void*>(
reinterpret_cast<bwd<descriptor_type>*>(
ih.get_native_mem<sycl::backend::ext_oneapi_cuda>(out_acc)) +
offsets[1]);
auto in_native =
reinterpret_cast<void*>(reinterpret_cast<fwd<descriptor_type>*>(
ih.get_native_mem<detail::sycl_cuda_backend>(in_acc)) +
offsets[0]);
auto out_native =
reinterpret_cast<void*>(reinterpret_cast<bwd<descriptor_type>*>(
ih.get_native_mem<detail::sycl_cuda_backend>(out_acc)) +
offsets[1]);
detail::cufft_execute<detail::Direction::Forward, fwd<descriptor_type>>(
func_name, stream, plan, in_native, out_native);
});
Expand Down
6 changes: 4 additions & 2 deletions src/dft/backends/rocfft/commit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@
#include "oneapi/math/dft/detail/rocfft/onemath_dft_rocfft.hpp"
#include "oneapi/math/dft/types.hpp"

#include "execute_helper.hpp"
#include "../../execute_helper_generic.hpp"
#include "../stride_helper.hpp"

#include "rocfft_handle.hpp"
Expand Down Expand Up @@ -557,9 +559,9 @@ class rocfft_commit final : public dft::detail::commit_impl<prec, dom> {
this->get_queue().submit([&](sycl::handler& cgh) {
auto workspace_acc =
buffer_workspace.template get_access<sycl::access::mode::read_write>(cgh);
cgh.host_task([=](sycl::interop_handle ih) {
dft::detail::fft_enqueue_task(cgh, [=](sycl::interop_handle ih) {
auto workspace_native = reinterpret_cast<scalar_type*>(
ih.get_native_mem<sycl::backend::ext_oneapi_hip>(workspace_acc));
ih.get_native_mem<sycl_hip_backend>(workspace_acc));
set_workspace_impl(handle, workspace_native, workspace_bytes, "set_workspace");
});
});
Expand Down
10 changes: 8 additions & 2 deletions src/dft/backends/rocfft/execute_helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@

namespace oneapi::math::dft::rocfft::detail {

#if defined(__ADAPTIVECPP__) || defined(__HIPSYCL__)
constexpr auto sycl_hip_backend{ sycl::backend::hip };
#else // DPC++
constexpr auto sycl_hip_backend{ sycl::backend::ext_oneapi_hip };
#endif

template <dft::precision prec, dft::domain dom>
inline dft::detail::commit_impl<prec, dom>* checked_get_commit(
dft::detail::descriptor<prec, dom>& desc) {
Expand All @@ -60,12 +66,12 @@ inline auto expect_config(DescT& desc, const char* message) {

template <typename Acc>
inline void* native_mem(sycl::interop_handle& ih, Acc& buf) {
return ih.get_native_mem<sycl::backend::ext_oneapi_hip>(buf);
return ih.get_native_mem<sycl_hip_backend>(buf);
}

inline hipStream_t setup_stream(const std::string& func, sycl::interop_handle& ih,
rocfft_execution_info info) {
auto stream = ih.get_native_queue<sycl::backend::ext_oneapi_hip>();
auto stream = ih.get_native_queue<sycl_hip_backend>();
auto result = rocfft_execution_info_set_stream(info, stream);
if (result != rocfft_status_success) {
throw oneapi::math::exception(
Expand Down
6 changes: 5 additions & 1 deletion src/dft/execute_helper_generic.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,11 @@ namespace oneapi::math::dft::detail {
*/
template <typename HandlerT, typename FnT>
static inline void fft_enqueue_task(HandlerT&& cgh, FnT&& f) {
#ifdef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND
#if defined(__ADAPTIVECPP__)
cgh.AdaptiveCpp_enqueue_custom_operation([=](sycl::interop_handle ih) {
#elif defined(__HIPSYCL__)
cgh.hipSYCL_enqueue_custom_operation([=](sycl::interop_handle ih) {
#elif defined(SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND)
cgh.ext_codeplay_enqueue_native_command([=](sycl::interop_handle ih) {
#else
cgh.host_task([=](sycl::interop_handle ih) {
Expand Down
Loading
Loading