Skip to content

Commit b494492

Browse files
authored
Merge pull request #1280 from IntelPython/experimental/barrier_ols
Implementation of group barrier operation
2 parents b0ea6aa + 769d960 commit b494492

File tree

5 files changed

+212
-0
lines changed

5 files changed

+212
-0
lines changed

numba_dpex/experimental/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
from ._kernel_dpcpp_spirv_overloads import (
1515
_atomic_ref_overloads,
16+
_group_barrier_overloads,
1617
_index_space_id_overloads,
1718
)
1819
from .decorators import device_func, kernel
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
# SPDX-FileCopyrightText: 2023 Intel Corporation
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
"""
6+
Provides overloads for functions included in kernel_iface.barrier that
7+
generate dpcpp SPIR-V LLVM IR intrinsic function calls.
8+
"""
9+
import warnings
10+
11+
from llvmlite import ir as llvmir
12+
from numba.core import cgutils, types
13+
from numba.extending import intrinsic, overload
14+
15+
from numba_dpex.core import itanium_mangler as ext_itanium_mangler
16+
from numba_dpex.experimental.target import DPEX_KERNEL_EXP_TARGET_NAME
17+
from numba_dpex.kernel_api import group_barrier
18+
from numba_dpex.kernel_api.memory_enums import MemoryOrder, MemoryScope
19+
20+
from ._spv_atomic_inst_helper import get_memory_semantics_mask, get_scope
21+
22+
_SUPPORT_CONVERGENT = True
23+
24+
try:
25+
llvmir.FunctionAttributes("convergent")
26+
except ValueError:
27+
warnings.warn(
28+
"convergent attribute is supported only starting llvmlite "
29+
+ "0.42. Not setting this attribute may result into unexpected behavior"
30+
+ "when using group_barrier"
31+
)
32+
_SUPPORT_CONVERGENT = False
33+
34+
35+
def _get_memory_scope(fence_scope):
36+
if isinstance(fence_scope, types.Literal):
37+
return get_scope(fence_scope.literal_value)
38+
return get_scope(fence_scope.value)
39+
40+
41+
@intrinsic
42+
def _intrinsic_barrier(
43+
ty_context, # pylint: disable=unused-argument
44+
ty_exec_scope, # pylint: disable=unused-argument
45+
ty_mem_scope, # pylint: disable=unused-argument
46+
ty_spirv_mem_sem_mask, # pylint: disable=unused-argument
47+
):
48+
# Signature of `__spirv_control_barrier` call that is
49+
# generated for group_barrier. It takes three arguments -
50+
# exec_scope, memory_scope and memory_semantics_mask.
51+
# All arguments have to be of type unsigned int32.
52+
sig = types.void(types.uint32, types.uint32, types.uint32)
53+
54+
def _intrinsic_barrier_codegen(
55+
context, builder, sig, args # pylint: disable=unused-argument
56+
):
57+
exec_scope_arg = builder.trunc(args[0], llvmir.IntType(32))
58+
mem_scope_arg = builder.trunc(args[1], llvmir.IntType(32))
59+
spirv_memory_semantics_mask_arg = builder.trunc(
60+
args[2], llvmir.IntType(32)
61+
)
62+
63+
fn_args = [
64+
exec_scope_arg,
65+
mem_scope_arg,
66+
spirv_memory_semantics_mask_arg,
67+
]
68+
69+
mangled_fn_name = ext_itanium_mangler.mangle_ext(
70+
"__spirv_ControlBarrier", [types.uint32, types.uint32, types.uint32]
71+
)
72+
73+
spirv_fn_arg_types = [
74+
llvmir.IntType(32),
75+
llvmir.IntType(32),
76+
llvmir.IntType(32),
77+
]
78+
79+
# TODO: split the function declaration from call
80+
fn = cgutils.get_or_insert_function(
81+
builder.module,
82+
llvmir.FunctionType(llvmir.VoidType(), spirv_fn_arg_types),
83+
mangled_fn_name,
84+
)
85+
86+
if _SUPPORT_CONVERGENT:
87+
fn.attributes.add("convergent")
88+
fn.attributes.add("nounwind")
89+
fn.calling_convention = "spir_func"
90+
91+
callinst = builder.call(fn, fn_args)
92+
93+
if _SUPPORT_CONVERGENT:
94+
callinst.attributes.add("convergent")
95+
callinst.attributes.add("nounwind")
96+
97+
return (
98+
sig,
99+
_intrinsic_barrier_codegen,
100+
)
101+
102+
103+
@overload(
104+
group_barrier,
105+
prefer_literal=True,
106+
target=DPEX_KERNEL_EXP_TARGET_NAME,
107+
)
108+
def ol_group_barrier(fence_scope=MemoryScope.WORK_GROUP):
109+
"""SPIR-V overload for
110+
:meth:`numba_dpex.kernel_api.group_barrier`.
111+
112+
Generates the same LLVM IR instruction as dpcpp for the
113+
`group_barrier` function.
114+
115+
Per SYCL spec, group_barrier must perform both control barrier and memory
116+
fence operations. Hence, group_barrier requires two scopes and memory
117+
consistency specification as three arguments.
118+
119+
mem_scope - scope of any memory consistency operations that are performed by
120+
the barrier. By default, mem_scope is set to `work_group`.
121+
exec_scope - scope that determines the set of work-items that synchronize at
122+
barrier. Set to `work_group` for group_barrier always.
123+
spirv_memory_semantics_mask - Based on sycl implementation.
124+
125+
Mask that is set to use sequential consistency memory order semantics
126+
always.
127+
"""
128+
129+
mem_scope = _get_memory_scope(fence_scope)
130+
exec_scope = get_scope(MemoryScope.WORK_GROUP.value)
131+
spirv_memory_semantics_mask = get_memory_semantics_mask(
132+
MemoryOrder.SEQ_CST.value
133+
)
134+
135+
def _ol_group_barrier_impl(
136+
fence_scope=MemoryScope.WORK_GROUP,
137+
): # pylint: disable=unused-argument
138+
# pylint: disable=no-value-for-parameter
139+
return _intrinsic_barrier(
140+
exec_scope, mem_scope, spirv_memory_semantics_mask
141+
)
142+
143+
return _ol_group_barrier_impl

numba_dpex/kernel_api/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
"""
1111

1212
from .atomic_ref import AtomicRef
13+
from .barrier import group_barrier
1314
from .index_space_ids import Item, NdItem
1415
from .memory_enums import AddressSpace, MemoryOrder, MemoryScope
1516
from .ranges import NdRange, Range
@@ -23,4 +24,5 @@
2324
"Range",
2425
"NdItem",
2526
"Item",
27+
"group_barrier",
2628
]

numba_dpex/kernel_api/barrier.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# SPDX-FileCopyrightText: 2023 Intel Corporation
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
"""Python functions that simulate SYCL's barrier primitives.
6+
"""
7+
8+
from .memory_enums import MemoryScope
9+
10+
11+
def group_barrier(fence_scope=MemoryScope.WORK_GROUP):
12+
"""Performs a barrier operation across all work-items in a work group.
13+
14+
The function is modeled after the ``sycl::group_barrier`` function. It
15+
synchronizes work within a group of work items. All the work-items
16+
of the group must execute the barrier construct before any work-item
17+
continues execution beyond the barrier. However, unlike
18+
``sycl::group_barrier`` the numba_dpex function implicitly synchronizes at
19+
the level of a work group and does not allow specifying the group as an
20+
argument. The :func:`sub_group_barrier` function should be used if
21+
synchronization has to be performed only across a sub-group.
22+
23+
The ``group_barrier`` performs mem-fence operations ensuring that memory
24+
accesses issued before the barrier are not re-ordered with those issued
25+
after the barrier: all work-items in group g execute a release fence prior
26+
to synchronizing at the barrier, all work-items in group g execute an
27+
acquire fence afterwards, and there is an implicit synchronization of these
28+
fences as if provided by an explicit atomic operation on an atomic object.
29+
30+
Args:
31+
fence_scope (optional): scope of any memory consistency
32+
operations that are performed by the barrier.
33+
"""
34+
35+
# TODO: A pure Python simulation of a group_barrier will be added later.
36+
raise NotImplementedError
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import dpnp
2+
3+
import numba_dpex as dpex
4+
import numba_dpex.experimental as dpex_exp
5+
from numba_dpex.kernel_api import MemoryScope, NdItem, group_barrier
6+
from numba_dpex.tests._helper import skip_windows
7+
8+
9+
# TODO: https://github.com/IntelPython/numba-dpex/issues/1308
10+
@skip_windows
11+
def test_group_barrier():
12+
"""A test for group_barrier function."""
13+
14+
@dpex_exp.kernel
15+
def _kernel(nd_item: NdItem, a):
16+
i = nd_item.get_global_id(0)
17+
18+
a[i] += 1
19+
group_barrier(MemoryScope.DEVICE)
20+
21+
if i == 0:
22+
for idx in range(1, a.size):
23+
a[0] += a[idx]
24+
25+
N = 16
26+
a = dpnp.ones(N, dtype=dpnp.int32)
27+
28+
dpex_exp.call_kernel(_kernel, dpex.NdRange((N,), (N,)), a)
29+
30+
assert a[0] == N * 2

0 commit comments

Comments
 (0)