Remove atomics emulation and spirv-tools linking

ZzEeKkAa · Diptorup Deb · commit 491adac8fb49 · 2023-08-02T11:19:19.000-05:00
diff --git a/numba_dpex/config.py b/numba_dpex/config.py
@@ -53,9 +53,6 @@ def __getattr__(name):
 # Dump offload diagnostics
 OFFLOAD_DIAGNOSTICS = _readenv("NUMBA_DPEX_OFFLOAD_DIAGNOSTICS", int, 0)
 
-# Activate Native floating point atomcis support for supported devices.
-# Requires llvm-spirv supporting the FP atomics extension
-NATIVE_FP_ATOMICS = _readenv("NUMBA_DPEX_ACTIVATE_ATOMICS_FP_NATIVE", int, 0)
 # Emit debug info
 DEBUG = _readenv("NUMBA_DPEX_DEBUG", int, config.DEBUG)
 DEBUGINFO_DEFAULT = _readenv(
diff --git a/numba_dpex/ocl/__init__.py b/numba_dpex/ocl/__init__.py
@@ -1,5 +1,3 @@
 # SPDX-FileCopyrightText: 2020 - 2023 Intel Corporation
 #
 # SPDX-License-Identifier: Apache-2.0
-
-from .atomics import atomic_support_present
diff --git a/numba_dpex/ocl/atomics/__init__.py b/numba_dpex/ocl/atomics/__init__.py
@@ -1,32 +1,3 @@
 # SPDX-FileCopyrightText: 2020 - 2023 Intel Corporation
 #
 # SPDX-License-Identifier: Apache-2.0
-
-import os
-import os.path
-
-
-def atomic_support_present():
-    if os.path.isfile(
-        os.path.join(os.path.dirname(__file__), "atomic_ops.spir")
-    ):
-        return True
-    else:
-        return False
-
-
-def get_atomic_spirv_path():
-    if atomic_support_present():
-        return os.path.join(os.path.dirname(__file__), "atomic_ops.spir")
-    else:
-        return None
-
-
-def read_atomic_spirv_file():
-    path = get_atomic_spirv_path()
-    if path:
-        with open(path, "rb") as fin:
-            spirv = fin.read()
-        return spirv
-    else:
-        return None
diff --git a/numba_dpex/ocl/oclimpl.py b/numba_dpex/ocl/oclimpl.py
@@ -143,63 +143,6 @@ def sub_group_barrier_impl(context, builder, sig, args):
     return _void_value
 
 
-def insert_and_call_atomic_fn(
-    context, builder, sig, fn_type, dtype, ptr, val, addrspace
-):
-    ll_p = None
-    name = ""
-    if dtype.name == "float32":
-        ll_val = llvmir.FloatType()
-        ll_p = ll_val.as_pointer()
-        if fn_type == "add":
-            name = "numba_dpex_atomic_add_f32"
-        elif fn_type == "sub":
-            name = "numba_dpex_atomic_sub_f32"
-        else:
-            raise TypeError("Operation type is not supported %s" % (fn_type))
-    elif dtype.name == "float64":
-        if True:
-            ll_val = llvmir.DoubleType()
-            ll_p = ll_val.as_pointer()
-            if fn_type == "add":
-                name = "numba_dpex_atomic_add_f64"
-            elif fn_type == "sub":
-                name = "numba_dpex_atomic_sub_f64"
-            else:
-                raise TypeError(
-                    "Operation type is not supported %s" % (fn_type)
-                )
-    else:
-        raise TypeError(
-            "Atomic operation is not supported for type %s" % (dtype.name)
-        )
-
-    if addrspace == address_space.LOCAL:
-        name = name + "_local"
-    else:
-        name = name + "_global"
-
-    assert ll_p is not None
-    assert name != ""
-    ll_p.addrspace = address_space.GENERIC
-
-    mod = builder.module
-    if sig.return_type == types.void:
-        llretty = llvmir.VoidType()
-    else:
-        llretty = context.get_value_type(sig.return_type)
-
-    llargs = [ll_p, context.get_value_type(sig.args[2])]
-    fnty = llvmir.FunctionType(llretty, llargs)
-
-    fn = cgutils.get_or_insert_function(mod, fnty, name)
-    fn.calling_convention = kernel_target.CC_SPIR_FUNC
-
-    generic_ptr = context.addrspacecast(builder, ptr, address_space.GENERIC)
-
-    return builder.call(fn, [generic_ptr, val])
-
-
 def native_atomic_add(context, builder, sig, args):
     aryty, indty, valty = sig.args
     ary, inds, val = args
@@ -282,27 +225,29 @@ def native_atomic_add(context, builder, sig, args):
     return builder.call(fn, fn_args)
 
 
+def support_atomic(dtype: types.Type) -> bool:
+    # This check should be the same as described in sycl documentation:
+    # https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#sec:atomic-references
+    # If atomic is not supported, it will be emulated by the sycl compiler.
+    return (
+        dtype == types.int32
+        or dtype == types.uint32
+        or dtype == types.float32
+        or dtype == types.int64
+        or dtype == types.uint64
+        or dtype == types.float64
+    )
+
+
 @lower(stubs.atomic.add, types.Array, types.intp, types.Any)
 @lower(stubs.atomic.add, types.Array, types.UniTuple, types.Any)
 @lower(stubs.atomic.add, types.Array, types.Tuple, types.Any)
 def atomic_add_tuple(context, builder, sig, args):
-    device_type = dpctl.get_current_queue().sycl_device.device_type
     dtype = sig.args[0].dtype
-
-    if dtype == types.float32 or dtype == types.float64:
-        if (
-            device_type == dpctl.device_type.gpu
-            and config.NATIVE_FP_ATOMICS == 1
-        ):
-            return native_atomic_add(context, builder, sig, args)
-        else:
-            # Currently, DPCPP only supports native floating point
-            # atomics for GPUs.
-            return atomic_add(context, builder, sig, args, "add")
-    elif dtype == types.int32 or dtype == types.int64:
+    if support_atomic(dtype):
         return native_atomic_add(context, builder, sig, args)
     else:
-        raise TypeError("Atomic operation on unsupported type %s" % dtype)
+        raise TypeError(f"Atomic operation on unsupported type {dtype}")
 
 
 def atomic_sub_wrapper(context, builder, sig, args):
@@ -337,81 +282,11 @@ def atomic_sub_wrapper(context, builder, sig, args):
 @lower(stubs.atomic.sub, types.Array, types.UniTuple, types.Any)
 @lower(stubs.atomic.sub, types.Array, types.Tuple, types.Any)
 def atomic_sub_tuple(context, builder, sig, args):
-    device_type = dpctl.get_current_queue().sycl_device.device_type
     dtype = sig.args[0].dtype
-
-    if dtype == types.float32 or dtype == types.float64:
-        if (
-            device_type == dpctl.device_type.gpu
-            and config.NATIVE_FP_ATOMICS == 1
-        ):
-            return atomic_sub_wrapper(context, builder, sig, args)
-        else:
-            # Currently, DPCPP only supports native floating point
-            # atomics for GPUs.
-            return atomic_add(context, builder, sig, args, "sub")
-    elif dtype == types.int32 or dtype == types.int64:
+    if support_atomic(dtype):
         return atomic_sub_wrapper(context, builder, sig, args)
     else:
-        raise TypeError("Atomic operation on unsupported type %s" % dtype)
-
-
-def atomic_add(context, builder, sig, args, name):
-    from .atomics import atomic_support_present
-
-    if atomic_support_present():
-        context.extra_compile_options[kernel_target.LINK_ATOMIC] = True
-        aryty, indty, valty = sig.args
-        ary, inds, val = args
-        dtype = aryty.dtype
-
-        if indty == types.intp:
-            indices = [inds]  # just a single integer
-            indty = [indty]
-        else:
-            indices = cgutils.unpack_tuple(builder, inds, count=len(indty))
-            indices = [
-                context.cast(builder, i, t, types.intp)
-                for t, i in zip(indty, indices)
-            ]
-
-        if dtype != valty:
-            raise TypeError("expecting %s but got %s" % (dtype, valty))
-
-        if aryty.ndim != len(indty):
-            raise TypeError(
-                "indexing %d-D array with %d-D index" % (aryty.ndim, len(indty))
-            )
-
-        lary = context.make_array(aryty)(context, builder, ary)
-        ptr = cgutils.get_item_pointer(context, builder, aryty, lary, indices)
-
-        if isinstance(aryty, Array) and aryty.addrspace == address_space.LOCAL:
-            return insert_and_call_atomic_fn(
-                context,
-                builder,
-                sig,
-                name,
-                dtype,
-                ptr,
-                val,
-                address_space.LOCAL,
-            )
-        else:
-            return insert_and_call_atomic_fn(
-                context,
-                builder,
-                sig,
-                name,
-                dtype,
-                ptr,
-                val,
-                address_space.GLOBAL,
-            )
-    else:
-        raise ImportError(
-            "Atomic support is not present, can not perform atomic_add"
-        )
+        raise TypeError(f"Atomic operation on unsupported type {dtype}")
 
 
 @lower(stubs.private.array, types.IntegerLiteral, types.Any)
diff --git a/numba_dpex/spirv_generator.py b/numba_dpex/spirv_generator.py
@@ -5,7 +5,6 @@
 """A wrapper to connect to the SPIR-V binaries (Tools, Translator)."""
 
 import os
-import shutil
 import tempfile
 from subprocess import CalledProcessError, check_call
 
@@ -75,14 +74,6 @@ def generate(self, llvm_spirv_args, ipath, opath):
         if config.DEBUG:
             llvm_spirv_flags.append("--spirv-debug-info-version=ocl-100")
 
-        if not config.NATIVE_FP_ATOMICS:
-            # Do NOT upgrade version unless you are 100% confident. Not all
-            # kernel outputs can be converted to higher version of spirv.
-            # That results in different spirv file versions. As next step
-            # requires linking of the result file and
-            # numba_dpex/ocl/atomics/atomic_ops.spir it will raise an error
-            # that two spirv files have different version and can't be linked
-            llvm_spirv_args = ["--spirv-max-version", "1.0"] + llvm_spirv_args
         llvm_spirv_tool = self._llvm_spirv()
 
         if config.DEBUG:
@@ -102,17 +93,6 @@ def _llvm_spirv():
         result = dls.get_llvm_spirv_path()
         return result
 
-    def link(self, opath, binaries):
-        """
-        Link spirv modules.
-
-        Args:
-            opath: Output file path of the linked final spirv.
-            binaries: Spirv modules to be linked.
-        """
-        flags = ["--allow-partial-linkage"]
-        check_call(["spirv-link", *flags, "-o", opath, *binaries])
-
 
 class Module(object):
     def __init__(self, context, llvmir, llvmbc):
@@ -162,15 +142,9 @@ def finalize(self):
         # Generate SPIR-V from "friendly" LLVM-based SPIR 2.0
         spirv_path = self._track_temp_file("generated-spirv")
 
-        binary_paths = [spirv_path]
-
         llvm_spirv_args = []
         for key in list(self.context.extra_compile_options.keys()):
-            if key == LINK_ATOMIC:
-                from .ocl.atomics import get_atomic_spirv_path
-
-                binary_paths.append(get_atomic_spirv_path())
-            elif key == LLVM_SPIRV_ARGS:
+            if key == LLVM_SPIRV_ARGS:
                 llvm_spirv_args = self.context.extra_compile_options[key]
             del self.context.extra_compile_options[key]
 
@@ -194,10 +168,6 @@ def finalize(self):
             opath=spirv_path,
         )
 
-        if len(binary_paths) > 1:
-            spirv_path = self._track_temp_file("linked-spirv")
-            self._cmd.link(spirv_path, binary_paths)
-
         if config.SAVE_IR_FILES != 0:
             # Dump the llvmir and llvmbc in file
             with open("generated_spirv.spir", "wb") as f1:
diff --git a/numba_dpex/tests/kernel_tests/test_atomic_op.py b/numba_dpex/tests/kernel_tests/test_atomic_op.py

Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,3 @@`
`1`	`1`	`# SPDX-FileCopyrightText: 2020 - 2023 Intel Corporation`
`2`	`2`	`#`
`3`	`3`	`# SPDX-License-Identifier: Apache-2.0`
`4`		`-`
`5`		`-from .atomics import atomic_support_present`