Skip to content

Commit 81b9970

Browse files
smazouz42EmilyBournebauom
committed
Feature: Improve Kernel Decorator (#69)
This pull request addresses issue #68 by changing the implantation of kernel decorate, so the function runs multiple times depending on the number of blocks and the number of threads for each block --------- Co-authored-by: EmilyBourne <[email protected]> Co-authored-by: bauom <[email protected]>
1 parent 698a293 commit 81b9970

File tree

8 files changed

+181
-3
lines changed

8 files changed

+181
-3
lines changed

docs/cuda.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,4 +43,22 @@ def my_kernel():
4343
my_kernel[1, 1]()
4444

4545
```
46+
## Cuda Device Methods
47+
The following methods are available for CUDA devices in Pyccel and can be called from either kernels or device functions. Currently, the only import syntax supported is:
48+
```python
49+
from pyccel import cuda
50+
```
51+
Using an alias for the import is not supported, so this is not allowed:
52+
53+
```python
54+
from pyccel import cuda as py_cu
55+
```
56+
57+
| Method | Description |
58+
|--------|-------------|
59+
60+
61+
62+
63+
4664

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
#------------------------------------------------------------------------------------------#
2+
# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
3+
# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. #
4+
#------------------------------------------------------------------------------------------#
5+
"""
6+
This module contains all the CUDA thread indexing methods
7+
"""
8+
class CudaThreadIndexing:
9+
"""
10+
Class representing the CUDA thread indexing.
11+
12+
Class representing the CUDA thread indexing.
13+
14+
Parameters
15+
----------
16+
block_idx : int
17+
The index of the block in the x-dimension.
18+
19+
thread_idx : int
20+
The index of the thread in the x-dimension.
21+
"""
22+
def __init__(self, block_idx, thread_idx):
23+
self._block_idx = block_idx
24+
self._thread_idx = thread_idx
25+
26+
def threadIdx(self, dim):
27+
"""
28+
Get the thread index.
29+
30+
Get the thread index.
31+
32+
Parameters
33+
----------
34+
dim : int
35+
The dimension of the indexing. It can be:
36+
- 0 for the x-dimension
37+
- 1 for the y-dimension
38+
- 2 for the z-dimension
39+
40+
Returns
41+
-------
42+
int
43+
The index of the thread in the specified dimension of its block.
44+
"""
45+
return self._thread_idx
46+
47+
def blockIdx(self, dim):
48+
"""
49+
Get the block index.
50+
51+
Get the block index.
52+
53+
Parameters
54+
----------
55+
dim : int
56+
The dimension of the indexing. It can be:
57+
- 0 for the x-dimension
58+
- 1 for the y-dimension
59+
- 2 for the z-dimension
60+
61+
Returns
62+
-------
63+
int
64+
The index of the block in the specified dimension.
65+
"""
66+
return self._block_idx
67+
68+
def blockDim(self, dim):
69+
"""
70+
Get the block dimension.
71+
72+
Get the block dimension.
73+
74+
Parameters
75+
----------
76+
dim : int
77+
The dimension of the indexing. It can be:
78+
- 0 for the x-dimension
79+
- 1 for the y-dimension
80+
- 2 for the z-dimension
81+
82+
Returns
83+
-------
84+
int
85+
The size of the block in the specified dimension.
86+
"""
87+
return 0
88+

pyccel/decorators.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
"""
77
This module contains all the provided decorator methods.
88
"""
9+
from pyccel.cuda.cuda_thread_indexing import CudaThreadIndexing
910
import warnings
1011

1112
__all__ = (
@@ -139,7 +140,24 @@ class KernelAccessor:
139140
def __init__(self, f):
140141
self._f = f
141142
def __getitem__(self, args):
142-
return self._f
143+
num_blocks, num_threads = args
144+
def internal_loop(*args, **kwargs):
145+
"""
146+
The internal loop for kernel execution.
147+
148+
The internal loop for kernel execution.
149+
"""
150+
for b in range(num_blocks):
151+
for t in range(num_threads):
152+
cu = CudaThreadIndexing(b, t)
153+
if 'cuda' in self._f.__globals__:
154+
self._f.__globals__['cuda'].threadIdx = cu.threadIdx
155+
self._f.__globals__['cuda'].blockIdx = cu.blockIdx
156+
self._f.__globals__['cuda'].blockDim = cu.blockDim
157+
else:
158+
self._f.__globals__['cuda'] = cu
159+
self._f(*args, **kwargs)
160+
return internal_loop
143161

144162
return KernelAccessor(f)
145163

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# pylint: disable=missing-function-docstring, missing-module-docstring
2+
from pyccel.decorators import kernel
3+
from pyccel import cuda
4+
5+
@kernel
6+
def print_block():
7+
print(cuda.blockIdx(0)) # pylint: disable=no-member
8+
9+
def f():
10+
print_block[5,5]()
11+
cuda.synchronize()
12+
13+
if __name__ == '__main__':
14+
f()
15+

tests/pyccel/scripts/kernel/device_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# pylint: disable=missing-function-docstring, missing-module-docstring
22
from pyccel.decorators import device, kernel
3-
from pyccel import cuda
3+
from pyccel import cuda
44

55
@device
66
def device_call():

tests/pyccel/scripts/kernel/hello_kernel.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# pylint: disable=missing-function-docstring, missing-module-docstring
22
from pyccel.decorators import kernel
3-
from pyccel import cuda
3+
from pyccel import cuda
44

55
@kernel
66
def say_hello(its_morning : bool):
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# pylint: disable=missing-function-docstring, missing-module-docstring
2+
from pyccel.decorators import kernel
3+
from pyccel import cuda
4+
5+
@kernel
6+
def print_block():
7+
print(cuda.threadIdx(0)) # pylint: disable=no-member
8+
9+
def f():
10+
print_block[5,5]()
11+
cuda.synchronize()
12+
13+
if __name__ == '__main__':
14+
f()
15+

tests/pyccel/test_pyccel.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -730,6 +730,8 @@ def test_elemental(language):
730730
pyccel_test("scripts/decorators_elemental.py", language = language)
731731

732732
#------------------------------------------------------------------------------
733+
734+
733735
@pytest.mark.cuda
734736
def test_hello_kernel(gpu_available):
735737
types = str
@@ -743,7 +745,29 @@ def test_kernel_collision(gpu_available):
743745
language="cuda", execute_code=gpu_available)
744746

745747
#------------------------------------------------------------------------------
748+
def test_block_idx():
749+
test_file = get_abs_path("scripts/kernel/block_idx.py")
750+
cwd = get_abs_path(os.path.dirname(test_file))
751+
752+
pyth_out = get_python_output(test_file, cwd)
753+
754+
python_block_idx = list(map(int, pyth_out.split()))
755+
756+
for i in range(5):
757+
assert python_block_idx.count(i) == 5
758+
#------------------------------------------------------------------------------
759+
def test_thread_idx():
760+
test_file = get_abs_path("scripts/kernel/thread_idx.py")
761+
cwd = get_abs_path(os.path.dirname(test_file))
762+
763+
pyth_out = get_python_output(test_file, cwd)
746764

765+
python_idx = list(map(int, pyth_out.split()))
766+
767+
for i in range(5):
768+
assert python_idx.count(i) == 5
769+
770+
#------------------------------------------------------------------------------
747771
@pytest.mark.cuda
748772
def test_device_call(gpu_available):
749773
types = str

0 commit comments

Comments
 (0)