IntelPython · vlad-perevezentsev · Jun 16, 2025 · Jun 10, 2025 · Jun 10, 2025 · Jun 10, 2025
@@ -68,17 +68,21 @@ find_package(Dpctl REQUIRED)
 message(STATUS "Dpctl_INCLUDE_DIR=" ${Dpctl_INCLUDE_DIR})
 message(STATUS "Dpctl_TENSOR_INCLUDE_DIR=" ${Dpctl_TENSOR_INCLUDE_DIR})
 
-option(DPNP_TARGET_CUDA
-    "Build DPNP to target CUDA devices"
-    OFF
-)
 option(DPNP_USE_ONEMKL_INTERFACES
     "Build DPNP with oneMKL Interfaces"
     OFF
 )
+set(DPNP_TARGET_CUDA
+    ""
+    CACHE STRING
+    "Build DPNP to target CUDA device. \
+Set to a truthy value (e.g., ON, TRUE) to use default architecture (sm_50), \
+or to a specific architecture like sm_80."
+)
 set(HIP_TARGETS "" CACHE STRING "HIP architecture for target")
 
 set(_dpnp_sycl_targets)
+set(_dpnp_cuda_arch)
 set(_use_onemkl_interfaces OFF)
 set(_use_onemkl_interfaces_cuda OFF)
 set(_use_onemkl_interfaces_hip OFF)
@@ -87,8 +91,18 @@ set(_dpnp_sycl_target_compile_options)
 set(_dpnp_sycl_target_link_options)
 
 if ("x${DPNP_SYCL_TARGETS}" STREQUAL "x")
-    if(DPNP_TARGET_CUDA)
-        set(_dpnp_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown")
+    if (DPNP_TARGET_CUDA)
-    if (DPNP_TARGET_CUDA)
+    if (NOT "x${DPNP_TARGET_CUDA}" STREQUAL "x")
-    if (DPNP_TARGET_CUDA)
+    if (NOT "x${DPNP_TARGET_CUDA}" STREQUAL "x")
+        if(DPNP_TARGET_CUDA MATCHES "^sm_")
+            set(_dpnp_cuda_arch ${DPNP_TARGET_CUDA})
+        elseif(DPNP_TARGET_CUDA MATCHES "^(ON|TRUE|YES|Y|1)$")
+            set(_dpnp_cuda_arch "sm_50")
+        else()
+            message(FATAL_ERROR
+                "Invalid value for DPNP_TARGET_CUDA: \"${DPNP_TARGET_CUDA}\". "
+                "Expected 'ON', 'TRUE', 'YES', 'Y', '1', or a CUDA architecture like 'sm_80'."
+            )
+        endif()
+        set(_dpnp_sycl_targets "nvidia_gpu_${_dpnp_cuda_arch},spir64-unknown-unknown")
         set(_use_onemkl_interfaces_cuda ON)
     endif()
 
@@ -104,7 +118,7 @@ if ("x${DPNP_SYCL_TARGETS}" STREQUAL "x")
 else()
     set(_dpnp_sycl_targets ${DPNP_SYCL_TARGETS})
 
-    if ("${DPNP_SYCL_TARGETS}" MATCHES "nvptx64-nvidia-cuda")
+    if("${DPNP_SYCL_TARGETS}" MATCHES "(nvidia_gpu_sm_|nvptx64-nvidia-cuda)")
         set(_use_onemkl_interfaces_cuda ON)
     endif()
 

@@ -144,13 +144,33 @@ installation layout of compatible version. The following plugins from CodePlay a
 Building ``dpnp`` also requires `building Data Parallel Control Library for custom SYCL targets.
 <https://intelpython.github.io/dpctl/latest/beginners_guides/installation.html#building-for-custom-sycl-targets>`_
 
-``dpnp`` can be built for CUDA devices as follows:
+``dpnp`` can be built for CUDA devices using the ``--target-cuda`` argument.
+
+To target a specific architecture (e.g., ``sm_80``):
+
+.. code-block:: bash
+
+    python scripts/build_locally.py --target-cuda=sm_80
+
+To use the default architecture (``sm_50``), run:
 
 .. code-block:: bash
 
-    python scripts/build_locally.py --target=cuda
+    python scripts/build_locally.py --target-cuda
+
+Note that kernels are built for ``sm_50`` by default, allowing them to work on a wider
+range of architectures, but limiting the usage of more recent CUDA features.
+
+For reference, compute architecture strings like ``sm_80`` correspond to specific
+CUDA Compute Capabilities (e.g., Compute Capability 8.0 corresponds to ``sm_80``).
+A complete mapping between NVIDIA GPU models and their respective
+Compute Capabilities can be found in the official
+`CUDA GPU Compute Capability <https://developer.nvidia.com/cuda-gpus>`_ documentation.
+
+A full list of available SYCL alias targets is available in the
+`DPC++ Compiler User Manual <https://intel.github.io/llvm/UsersManual.html>`_.
 
-And for AMD devices:
+To build for AMD devices, use:
 
 .. code-block:: bash
 
@@ -179,7 +199,7 @@ architecture all at once:
 
 .. code-block:: bash
 
-    python scripts/build_locally.py --target=cuda --target-hip=gfx90a
+    python scripts/build_locally.py --target-cuda --target-hip=gfx90a
 
 
 Testing

@@ -38,7 +38,7 @@ def run(
     cmake_executable=None,
     verbose=False,
     cmake_opts="",
-    target="intel",
+    target_cuda=None,
     target_hip=None,
     onemkl_interfaces=False,
     onemkl_interfaces_dir=None,
@@ -98,12 +98,14 @@ def run(
         if "DPL_ROOT" in os.environ:
             os.environ["DPL_ROOT_HINT"] = os.environ["DPL_ROOT"]
 
-    if not target.strip():
-        target = "intel"
-
-    if target == "cuda":
+    if target_cuda is not None:
+        if not target_cuda.strip():
+            raise ValueError(
+                "--target-cuda can not be an empty string. "
+                "Use --target-cuda=<arch> or --target-cuda"
+            )
         cmake_args += [
-            "-DDPNP_TARGET_CUDA=ON",
+            f"-DDPNP_TARGET_CUDA={target_cuda}",
         ]
         # Always builds using oneMKL interfaces for the cuda target
         onemkl_interfaces = True
@@ -129,7 +131,7 @@ def run(
                 f"-DDPNP_ONEMKL_INTERFACES_DIR={onemkl_interfaces_dir}",
             ]
     elif onemkl_interfaces_dir:
-        RuntimeError("--onemkl-interfaces-dir option is not supported")
+        raise RuntimeError("--onemkl-interfaces-dir option is not supported")
 
     subprocess.check_call(
         cmake_args, shell=False, cwd=setup_dir, env=os.environ
@@ -186,10 +188,12 @@ def run(
         type=str,
     )
     driver.add_argument(
-        "--target",
-        help="Target backend for build",
-        dest="target",
-        default="intel",
+        "--target-cuda",
+        nargs="?",
+        const="ON",
+        help="Enable CUDA target for build; "
+        "optionally specify architecture (e.g., --target-cuda=sm_80)",
+        default=None,
         type=str,
     )
     driver.add_argument(
@@ -265,7 +269,7 @@ def run(
         cmake_executable=args.cmake_executable,
         verbose=args.verbose,
         cmake_opts=args.cmake_opts,
-        target=args.target,
+        target_cuda=args.target_cuda,
         target_hip=args.target_hip,
         onemkl_interfaces=args.onemkl_interfaces,
         onemkl_interfaces_dir=args.onemkl_interfaces_dir,