Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 21 additions & 7 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -68,17 +68,21 @@ find_package(Dpctl REQUIRED)
message(STATUS "Dpctl_INCLUDE_DIR=" ${Dpctl_INCLUDE_DIR})
message(STATUS "Dpctl_TENSOR_INCLUDE_DIR=" ${Dpctl_TENSOR_INCLUDE_DIR})

option(DPNP_TARGET_CUDA
"Build DPNP to target CUDA devices"
OFF
)
option(DPNP_USE_ONEMKL_INTERFACES
"Build DPNP with oneMKL Interfaces"
OFF
)
set(DPNP_TARGET_CUDA
""
CACHE STRING
"Build DPNP to target CUDA device. \
Set to a truthy value (e.g., ON, TRUE) to use default architecture (sm_50), \
or to a specific architecture like sm_80."
)
set(HIP_TARGETS "" CACHE STRING "HIP architecture for target")

set(_dpnp_sycl_targets)
set(_dpnp_cuda_arch)
set(_use_onemkl_interfaces OFF)
set(_use_onemkl_interfaces_cuda OFF)
set(_use_onemkl_interfaces_hip OFF)
Expand All @@ -87,8 +91,18 @@ set(_dpnp_sycl_target_compile_options)
set(_dpnp_sycl_target_link_options)

if ("x${DPNP_SYCL_TARGETS}" STREQUAL "x")
if(DPNP_TARGET_CUDA)
set(_dpnp_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown")
if (DPNP_TARGET_CUDA)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is not OFF by default now. Should this be updated?

Suggested change
if (DPNP_TARGET_CUDA)
if (NOT "x${DPNP_TARGET_CUDA}" STREQUAL "x")

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The empty string is False for this check if (DPNP_TARGET_CUDA)
I added this check in case when DPNP_TARGET_CUDA is passed as 0, OFF, NO, FALSE, N via cmake-opts argument

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in case when DPNP_TARGET_CUDA is passed as 0, OFF, NO, FALSE, N via cmake-opts argument

That is not the case when DPNP_TARGET_CUDA passed as an empty string. So it's still unclear for me.
Per my understanding the string can't be empty due to the check.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right that --target-cuda= is checked in build_locally.py.
But if someone bypasses it via --cmake-opts="-DDPNP_TARGET_CUDA=" the empty string is still evaluated as FALSE in if(DPNP_TARGET_CUDA). Thus this condition safely handles both cases.
Using if (NOT "x${DPNP_TARGET_CUDA}" STREQUAL "x") would only check for non-empty strings but still treat values like OFF or 0 as TRUE

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it sounds reasonable.
But what is about similar flag for AMD build? Why don't we check the same there then?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@antonwolfy it will be updated in the next PR.
Thank you

if(DPNP_TARGET_CUDA MATCHES "^sm_")
set(_dpnp_cuda_arch ${DPNP_TARGET_CUDA})
elseif(DPNP_TARGET_CUDA MATCHES "^(ON|TRUE|YES|Y|1)$")
set(_dpnp_cuda_arch "sm_50")
else()
message(FATAL_ERROR
"Invalid value for DPNP_TARGET_CUDA: \"${DPNP_TARGET_CUDA}\". "
"Expected 'ON', 'TRUE', 'YES', 'Y', '1', or a CUDA architecture like 'sm_80'."
)
endif()
set(_dpnp_sycl_targets "nvidia_gpu_${_dpnp_cuda_arch},spir64-unknown-unknown")
set(_use_onemkl_interfaces_cuda ON)
endif()

Expand All @@ -104,7 +118,7 @@ if ("x${DPNP_SYCL_TARGETS}" STREQUAL "x")
else()
set(_dpnp_sycl_targets ${DPNP_SYCL_TARGETS})

if ("${DPNP_SYCL_TARGETS}" MATCHES "nvptx64-nvidia-cuda")
if("${DPNP_SYCL_TARGETS}" MATCHES "(nvidia_gpu_sm_|nvptx64-nvidia-cuda)")
set(_use_onemkl_interfaces_cuda ON)
endif()

Expand Down
28 changes: 24 additions & 4 deletions doc/quick_start_guide.rst
Original file line number Diff line number Diff line change
Expand Up @@ -144,13 +144,33 @@ installation layout of compatible version. The following plugins from CodePlay a
Building ``dpnp`` also requires `building Data Parallel Control Library for custom SYCL targets.
<https://intelpython.github.io/dpctl/latest/beginners_guides/installation.html#building-for-custom-sycl-targets>`_

``dpnp`` can be built for CUDA devices as follows:
``dpnp`` can be built for CUDA devices using the ``--target-cuda`` argument.

To target a specific architecture (e.g., ``sm_80``):

.. code-block:: bash

python scripts/build_locally.py --target-cuda=sm_80

To use the default architecture (``sm_50``), run:

.. code-block:: bash

python scripts/build_locally.py --target=cuda
python scripts/build_locally.py --target-cuda

Note that kernels are built for ``sm_50`` by default, allowing them to work on a wider
range of architectures, but limiting the usage of more recent CUDA features.

For reference, compute architecture strings like ``sm_80`` correspond to specific
CUDA Compute Capabilities (e.g., Compute Capability 8.0 corresponds to ``sm_80``).
A complete mapping between NVIDIA GPU models and their respective
Compute Capabilities can be found in the official
`CUDA GPU Compute Capability <https://developer.nvidia.com/cuda-gpus>`_ documentation.

A full list of available SYCL alias targets is available in the
`DPC++ Compiler User Manual <https://intel.github.io/llvm/UsersManual.html>`_.

And for AMD devices:
To build for AMD devices, use:

.. code-block:: bash

Expand Down Expand Up @@ -179,7 +199,7 @@ architecture all at once:

.. code-block:: bash

python scripts/build_locally.py --target=cuda --target-hip=gfx90a
python scripts/build_locally.py --target-cuda --target-hip=gfx90a


Testing
Expand Down
28 changes: 16 additions & 12 deletions scripts/build_locally.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def run(
cmake_executable=None,
verbose=False,
cmake_opts="",
target="intel",
target_cuda=None,
target_hip=None,
onemkl_interfaces=False,
onemkl_interfaces_dir=None,
Expand Down Expand Up @@ -98,12 +98,14 @@ def run(
if "DPL_ROOT" in os.environ:
os.environ["DPL_ROOT_HINT"] = os.environ["DPL_ROOT"]

if not target.strip():
target = "intel"

if target == "cuda":
if target_cuda is not None:
if not target_cuda.strip():
raise ValueError(
"--target-cuda can not be an empty string. "
"Use --target-cuda=<arch> or --target-cuda"
)
cmake_args += [
"-DDPNP_TARGET_CUDA=ON",
f"-DDPNP_TARGET_CUDA={target_cuda}",
]
# Always builds using oneMKL interfaces for the cuda target
onemkl_interfaces = True
Expand All @@ -129,7 +131,7 @@ def run(
f"-DDPNP_ONEMKL_INTERFACES_DIR={onemkl_interfaces_dir}",
]
elif onemkl_interfaces_dir:
RuntimeError("--onemkl-interfaces-dir option is not supported")
raise RuntimeError("--onemkl-interfaces-dir option is not supported")

subprocess.check_call(
cmake_args, shell=False, cwd=setup_dir, env=os.environ
Expand Down Expand Up @@ -186,10 +188,12 @@ def run(
type=str,
)
driver.add_argument(
"--target",
help="Target backend for build",
dest="target",
default="intel",
"--target-cuda",
nargs="?",
const="ON",
help="Enable CUDA target for build; "
"optionally specify architecture (e.g., --target-cuda=sm_80)",
default=None,
type=str,
)
driver.add_argument(
Expand Down Expand Up @@ -265,7 +269,7 @@ def run(
cmake_executable=args.cmake_executable,
verbose=args.verbose,
cmake_opts=args.cmake_opts,
target=args.target,
target_cuda=args.target_cuda,
target_hip=args.target_hip,
onemkl_interfaces=args.onemkl_interfaces,
onemkl_interfaces_dir=args.onemkl_interfaces_dir,
Expand Down
Loading