IntelPython
diff --git a/‎.github/workflows/conda-package.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/conda-package.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎numba_dpex/__init__.py
Lines changed: 0 additions & 3 deletions b/‎numba_dpex/__init__.py
Lines changed: 0 additions & 3 deletions
diff --git a/‎numba_dpex/_patches.py
Lines changed: 0 additions & 181 deletions b/‎numba_dpex/_patches.py
Lines changed: 0 additions & 181 deletions
diff --git a/‎numba_dpex/core/caching.py
Lines changed: 1 addition & 1 deletion b/‎numba_dpex/core/caching.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎numba_dpex/core/descriptor.py
Lines changed: 1 addition & 1 deletion b/‎numba_dpex/core/descriptor.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎numba_dpex/core/parfors/parfor_lowerer.py
Lines changed: 1 addition & 1 deletion b/‎numba_dpex/core/parfors/parfor_lowerer.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎numba_dpex/dpnp_iface/_intrinsic.py
Lines changed: 139 additions & 2 deletions b/‎numba_dpex/dpnp_iface/_intrinsic.py
Lines changed: 139 additions & 2 deletions
@@ -148,7 +148,7 @@ jobs:
           source $CONDA/etc/profile.d/conda.sh
           conda activate numba_dpex_env
           # echo "libintelocl.so" | tee /etc/OpenCL/vendors/intel-cpu.icd
-          for script in $(find . \( -not -name "_*" -not -name "vector_sum2D.py" -not -name "vectorize.py" -not -name "scan.py" -and -name "*.py" \))
+          for script in $(find . \( -not -name "_*" -not -name "side-by-side*" -not -name "vectorize.py" -not -name "scan.py" -and -name "*.py" \))
           do
             echo "Executing ${script}"
             python ${script} || exit 1
 
@@ -15,10 +15,8 @@
 import dpctl
 import llvmlite.binding as ll
 from numba import __version__ as numba_version
-from numba.np import arrayobj
 from numba.np.ufunc.decorators import Vectorize
 
-from numba_dpex._patches import _empty_nd_impl
 from numba_dpex.vectorizers import Vectorize as DpexVectorize
 
 from .numba_patches import (
@@ -31,7 +29,6 @@
 patch_is_ufunc.patch()
 patch_mk_alloc.patch()
 patch_arrayexpr_tree_to_ir.patch()
-arrayobj._empty_nd_impl = _empty_nd_impl
 
 
 def load_dpctl_sycl_interface():
 
@@ -31,7 +31,7 @@ def reduce(self, data):
     def rebuild(self, target_context, reduced_data):
         """Deserialize after unpickling from the cache.
         Args:
-            target_context (numba_dpex.core.target.DpexTargetContext):
+            target_context (numba_dpex.core.target.DpexKernelTargetContext):
                 The target context for the kernel.
             reduced_data (object): The data to be deserialzed after unpickling.
         """
 
@@ -4,7 +4,7 @@
 
 from functools import cached_property
 
-from numba.core import typing, utils
+from numba.core import typing
 from numba.core.cpu import CPUTargetOptions
 from numba.core.descriptors import TargetDescriptor
 
 
@@ -12,11 +12,11 @@
 )
 
 from numba_dpex import config
-from numba_dpex.core.utils.kernel_launcher import KernelLaunchIRBuilder
 from numba_dpex.core.parfors.reduction_helper import (
     ReductionHelper,
     ReductionKernelVariables,
 )
+from numba_dpex.core.utils.kernel_launcher import KernelLaunchIRBuilder
 
 from ..exceptions import UnsupportedParforError
 from ..types.dpnp_ndarray_type import DpnpNdArray
 
@@ -3,18 +3,155 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from llvmlite import ir as llvmir
+from llvmlite.ir import Constant
 from llvmlite.ir.types import DoubleType, FloatType
 from numba import types
+from numba.core import cgutils
+from numba.core import config as numba_config
 from numba.core.typing import signature
-from numba.extending import intrinsic
+from numba.extending import intrinsic, overload_classmethod
 from numba.np.arrayobj import (
-    _empty_nd_impl,
     _parse_empty_args,
     _parse_empty_like_args,
     get_itemsize,
+    make_array,
+    populate_array,
 )
 
 from numba_dpex.core.runtime import context as dpexrt
+from numba_dpex.core.types import DpnpNdArray
+
+
+def _empty_nd_impl(context, builder, arrtype, shapes):
+    """Utility function used for allocating a new array during LLVM code
+    generation (lowering).  Given a target context, builder, array
+    type, and a tuple or list of lowered dimension sizes, returns a
+    LLVM value pointing at a Numba runtime allocated array.
+    """
+
+    arycls = make_array(arrtype)
+    ary = arycls(context, builder)
+
+    datatype = context.get_data_type(arrtype.dtype)
+    itemsize = context.get_constant(types.intp, get_itemsize(context, arrtype))
+
+    # compute array length
+    arrlen = context.get_constant(types.intp, 1)
+    overflow = Constant(llvmir.IntType(1), 0)
+    for s in shapes:
+        arrlen_mult = builder.smul_with_overflow(arrlen, s)
+        arrlen = builder.extract_value(arrlen_mult, 0)
+        overflow = builder.or_(overflow, builder.extract_value(arrlen_mult, 1))
+
+    if arrtype.ndim == 0:
+        strides = ()
+    elif arrtype.layout == "C":
+        strides = [itemsize]
+        for dimension_size in reversed(shapes[1:]):
+            strides.append(builder.mul(strides[-1], dimension_size))
+        strides = tuple(reversed(strides))
+    elif arrtype.layout == "F":
+        strides = [itemsize]
+        for dimension_size in shapes[:-1]:
+            strides.append(builder.mul(strides[-1], dimension_size))
+        strides = tuple(strides)
+    else:
+        raise NotImplementedError(
+            "Don't know how to allocate array with layout '{0}'.".format(
+                arrtype.layout
+            )
+        )
+
+    # Check overflow, numpy also does this after checking order
+    allocsize_mult = builder.smul_with_overflow(arrlen, itemsize)
+    allocsize = builder.extract_value(allocsize_mult, 0)
+    overflow = builder.or_(overflow, builder.extract_value(allocsize_mult, 1))
+
+    with builder.if_then(overflow, likely=False):
+        # Raise same error as numpy, see:
+        # https://github.com/numpy/numpy/blob/2a488fe76a0f732dc418d03b452caace161673da/numpy/core/src/multiarray/ctors.c#L1095-L1101    # noqa: E501
+        context.call_conv.return_user_exc(
+            builder,
+            ValueError,
+            (
+                "array is too big; `arr.size * arr.dtype.itemsize` is larger than"
+                " the maximum possible size.",
+            ),
+        )
+
+    usm_ty = arrtype.usm_type
+    usm_ty_val = 0
+    if usm_ty == "device":
+        usm_ty_val = 1
+    elif usm_ty == "shared":
+        usm_ty_val = 2
+    elif usm_ty == "host":
+        usm_ty_val = 3
+    usm_type = context.get_constant(types.uint64, usm_ty_val)
+    device = context.insert_const_string(builder.module, arrtype.device)
+
+    args = (
+        context.get_dummy_value(),
+        allocsize,
+        usm_type,
+        device,
+    )
+    mip = types.MemInfoPointer(types.voidptr)
+    arytypeclass = types.TypeRef(type(arrtype))
+    sig = signature(
+        mip,
+        arytypeclass,
+        types.intp,
+        types.uint64,
+        types.voidptr,
+    )
+    from numba_dpex.decorators import dpjit
+
+    op = dpjit(_call_usm_allocator)
+    fnop = context.typing_context.resolve_value_type(op)
+    # The _call_usm_allocator function will be compiled and added to registry
+    # when the get_call_type function is invoked.
+    fnop.get_call_type(context.typing_context, sig.args, {})
+    eqfn = context.get_function(fnop, sig)
+    meminfo = eqfn(builder, args)
+
+    data = context.nrt.meminfo_data(builder, meminfo)
+
+    intp_t = context.get_value_type(types.intp)
+    shape_array = cgutils.pack_array(builder, shapes, ty=intp_t)
+    strides_array = cgutils.pack_array(builder, strides, ty=intp_t)
+
+    populate_array(
+        ary,
+        data=builder.bitcast(data, datatype.as_pointer()),
+        shape=shape_array,
+        strides=strides_array,
+        itemsize=itemsize,
+        meminfo=meminfo,
+    )
+
+    return ary
+
+
+numba_config.DISABLE_PERFORMANCE_WARNINGS = 0
+
+
+def _call_usm_allocator(arrtype, size, usm_type, device):
+    """Trampoline to call the intrinsic used for allocation"""
+    return arrtype._usm_allocate(size, usm_type, device)
+
+
+numba_config.DISABLE_PERFORMANCE_WARNINGS = 1
+
+
+@overload_classmethod(DpnpNdArray, "_usm_allocate", target="dpex")
+def _ol_array_allocate(cls, allocsize, usm_type, device):
+    """Implements an allocator for dpnp.ndarrays."""
+
+    def impl(cls, allocsize, usm_type, device):
+        return intrin_usm_alloc(allocsize, usm_type, device)
+
+    return impl
 
 
 def alloc_empty_arrayobj(context, builder, sig, args, is_like=False):
Original file line number	Diff line number	Diff line change
`@@ -12,11 +12,11 @@`
`12`	`12`	`)`
`13`	`13`
`14`	`14`	`from numba_dpex import config`
`15`		`-from numba_dpex.core.utils.kernel_launcher import KernelLaunchIRBuilder`
`16`	`15`	`from numba_dpex.core.parfors.reduction_helper import (`
`17`	`16`	`ReductionHelper,`
`18`	`17`	`ReductionKernelVariables,`
`19`	`18`	`)`
	`19`	`+from numba_dpex.core.utils.kernel_launcher import KernelLaunchIRBuilder`
`20`	`20`
`21`	`21`	`from ..exceptions import UnsupportedParforError`
`22`	`22`	`from ..types.dpnp_ndarray_type import DpnpNdArray`