Skip to content

Commit 39f012d

Browse files
author
Diptorup Deb
authored
Merge pull request #1050 from IntelPython/update/move_empty_nd_impl_changes_intree
Update/move empty nd impl changes in-tree
2 parents e7a4420 + 60eaeb7 commit 39f012d

21 files changed

+240
-433
lines changed

.github/workflows/conda-package.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ jobs:
148148
source $CONDA/etc/profile.d/conda.sh
149149
conda activate numba_dpex_env
150150
# echo "libintelocl.so" | tee /etc/OpenCL/vendors/intel-cpu.icd
151-
for script in $(find . \( -not -name "_*" -not -name "vector_sum2D.py" -not -name "vectorize.py" -not -name "scan.py" -and -name "*.py" \))
151+
for script in $(find . \( -not -name "_*" -not -name "side-by-side*" -not -name "vectorize.py" -not -name "scan.py" -and -name "*.py" \))
152152
do
153153
echo "Executing ${script}"
154154
python ${script} || exit 1

numba_dpex/__init__.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,8 @@
1515
import dpctl
1616
import llvmlite.binding as ll
1717
from numba import __version__ as numba_version
18-
from numba.np import arrayobj
1918
from numba.np.ufunc.decorators import Vectorize
2019

21-
from numba_dpex._patches import _empty_nd_impl
2220
from numba_dpex.vectorizers import Vectorize as DpexVectorize
2321

2422
from .numba_patches import (
@@ -31,7 +29,6 @@
3129
patch_is_ufunc.patch()
3230
patch_mk_alloc.patch()
3331
patch_arrayexpr_tree_to_ir.patch()
34-
arrayobj._empty_nd_impl = _empty_nd_impl
3532

3633

3734
def load_dpctl_sycl_interface():

numba_dpex/_patches.py

Lines changed: 0 additions & 181 deletions
This file was deleted.

numba_dpex/core/caching.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def reduce(self, data):
3131
def rebuild(self, target_context, reduced_data):
3232
"""Deserialize after unpickling from the cache.
3333
Args:
34-
target_context (numba_dpex.core.target.DpexTargetContext):
34+
target_context (numba_dpex.core.target.DpexKernelTargetContext):
3535
The target context for the kernel.
3636
reduced_data (object): The data to be deserialzed after unpickling.
3737
"""

numba_dpex/core/descriptor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
from functools import cached_property
66

7-
from numba.core import typing, utils
7+
from numba.core import typing
88
from numba.core.cpu import CPUTargetOptions
99
from numba.core.descriptors import TargetDescriptor
1010

numba_dpex/core/parfors/parfor_lowerer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@
1212
)
1313

1414
from numba_dpex import config
15-
from numba_dpex.core.utils.kernel_launcher import KernelLaunchIRBuilder
1615
from numba_dpex.core.parfors.reduction_helper import (
1716
ReductionHelper,
1817
ReductionKernelVariables,
1918
)
19+
from numba_dpex.core.utils.kernel_launcher import KernelLaunchIRBuilder
2020

2121
from ..exceptions import UnsupportedParforError
2222
from ..types.dpnp_ndarray_type import DpnpNdArray

numba_dpex/dpnp_iface/_intrinsic.py

Lines changed: 139 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,155 @@
33
# SPDX-License-Identifier: Apache-2.0
44

55
from llvmlite import ir as llvmir
6+
from llvmlite.ir import Constant
67
from llvmlite.ir.types import DoubleType, FloatType
78
from numba import types
9+
from numba.core import cgutils
10+
from numba.core import config as numba_config
811
from numba.core.typing import signature
9-
from numba.extending import intrinsic
12+
from numba.extending import intrinsic, overload_classmethod
1013
from numba.np.arrayobj import (
11-
_empty_nd_impl,
1214
_parse_empty_args,
1315
_parse_empty_like_args,
1416
get_itemsize,
17+
make_array,
18+
populate_array,
1519
)
1620

1721
from numba_dpex.core.runtime import context as dpexrt
22+
from numba_dpex.core.types import DpnpNdArray
23+
24+
25+
def _empty_nd_impl(context, builder, arrtype, shapes):
26+
"""Utility function used for allocating a new array during LLVM code
27+
generation (lowering). Given a target context, builder, array
28+
type, and a tuple or list of lowered dimension sizes, returns a
29+
LLVM value pointing at a Numba runtime allocated array.
30+
"""
31+
32+
arycls = make_array(arrtype)
33+
ary = arycls(context, builder)
34+
35+
datatype = context.get_data_type(arrtype.dtype)
36+
itemsize = context.get_constant(types.intp, get_itemsize(context, arrtype))
37+
38+
# compute array length
39+
arrlen = context.get_constant(types.intp, 1)
40+
overflow = Constant(llvmir.IntType(1), 0)
41+
for s in shapes:
42+
arrlen_mult = builder.smul_with_overflow(arrlen, s)
43+
arrlen = builder.extract_value(arrlen_mult, 0)
44+
overflow = builder.or_(overflow, builder.extract_value(arrlen_mult, 1))
45+
46+
if arrtype.ndim == 0:
47+
strides = ()
48+
elif arrtype.layout == "C":
49+
strides = [itemsize]
50+
for dimension_size in reversed(shapes[1:]):
51+
strides.append(builder.mul(strides[-1], dimension_size))
52+
strides = tuple(reversed(strides))
53+
elif arrtype.layout == "F":
54+
strides = [itemsize]
55+
for dimension_size in shapes[:-1]:
56+
strides.append(builder.mul(strides[-1], dimension_size))
57+
strides = tuple(strides)
58+
else:
59+
raise NotImplementedError(
60+
"Don't know how to allocate array with layout '{0}'.".format(
61+
arrtype.layout
62+
)
63+
)
64+
65+
# Check overflow, numpy also does this after checking order
66+
allocsize_mult = builder.smul_with_overflow(arrlen, itemsize)
67+
allocsize = builder.extract_value(allocsize_mult, 0)
68+
overflow = builder.or_(overflow, builder.extract_value(allocsize_mult, 1))
69+
70+
with builder.if_then(overflow, likely=False):
71+
# Raise same error as numpy, see:
72+
# https://github.com/numpy/numpy/blob/2a488fe76a0f732dc418d03b452caace161673da/numpy/core/src/multiarray/ctors.c#L1095-L1101 # noqa: E501
73+
context.call_conv.return_user_exc(
74+
builder,
75+
ValueError,
76+
(
77+
"array is too big; `arr.size * arr.dtype.itemsize` is larger than"
78+
" the maximum possible size.",
79+
),
80+
)
81+
82+
usm_ty = arrtype.usm_type
83+
usm_ty_val = 0
84+
if usm_ty == "device":
85+
usm_ty_val = 1
86+
elif usm_ty == "shared":
87+
usm_ty_val = 2
88+
elif usm_ty == "host":
89+
usm_ty_val = 3
90+
usm_type = context.get_constant(types.uint64, usm_ty_val)
91+
device = context.insert_const_string(builder.module, arrtype.device)
92+
93+
args = (
94+
context.get_dummy_value(),
95+
allocsize,
96+
usm_type,
97+
device,
98+
)
99+
mip = types.MemInfoPointer(types.voidptr)
100+
arytypeclass = types.TypeRef(type(arrtype))
101+
sig = signature(
102+
mip,
103+
arytypeclass,
104+
types.intp,
105+
types.uint64,
106+
types.voidptr,
107+
)
108+
from numba_dpex.decorators import dpjit
109+
110+
op = dpjit(_call_usm_allocator)
111+
fnop = context.typing_context.resolve_value_type(op)
112+
# The _call_usm_allocator function will be compiled and added to registry
113+
# when the get_call_type function is invoked.
114+
fnop.get_call_type(context.typing_context, sig.args, {})
115+
eqfn = context.get_function(fnop, sig)
116+
meminfo = eqfn(builder, args)
117+
118+
data = context.nrt.meminfo_data(builder, meminfo)
119+
120+
intp_t = context.get_value_type(types.intp)
121+
shape_array = cgutils.pack_array(builder, shapes, ty=intp_t)
122+
strides_array = cgutils.pack_array(builder, strides, ty=intp_t)
123+
124+
populate_array(
125+
ary,
126+
data=builder.bitcast(data, datatype.as_pointer()),
127+
shape=shape_array,
128+
strides=strides_array,
129+
itemsize=itemsize,
130+
meminfo=meminfo,
131+
)
132+
133+
return ary
134+
135+
136+
numba_config.DISABLE_PERFORMANCE_WARNINGS = 0
137+
138+
139+
def _call_usm_allocator(arrtype, size, usm_type, device):
140+
"""Trampoline to call the intrinsic used for allocation"""
141+
return arrtype._usm_allocate(size, usm_type, device)
142+
143+
144+
numba_config.DISABLE_PERFORMANCE_WARNINGS = 1
145+
146+
147+
@overload_classmethod(DpnpNdArray, "_usm_allocate", target="dpex")
148+
def _ol_array_allocate(cls, allocsize, usm_type, device):
149+
"""Implements an allocator for dpnp.ndarrays."""
150+
151+
def impl(cls, allocsize, usm_type, device):
152+
return intrin_usm_alloc(allocsize, usm_type, device)
153+
154+
return impl
18155

19156

20157
def alloc_empty_arrayobj(context, builder, sig, args, is_like=False):

0 commit comments

Comments
 (0)