Skip to content

Commit ef3e166

Browse files
committed
arch: Add Device.thread_group_size
1 parent 2b2fa02 commit ef3e166

File tree

1 file changed

+21
-0
lines changed

1 file changed

+21
-0
lines changed

devito/arch/archinfo.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -957,6 +957,23 @@ def _detect_isa(self):
957957

958958
class Device(Platform):
959959

960+
"""
961+
A generic Device is based on the SIMT (Single Instruction, Multiple Threads)
962+
programming model. In this execution model, threads are batched together and
963+
execute the same instruction at the same time, though each thread operates on
964+
its own data. Intel, AMD, and Nvidia GPUs are all based on this model.
965+
Unfortunately they use different terminology to refer to the same or at least
966+
very similar concepts. Throughout Devito, whenever possible, we attempt to
967+
adopt a neutral terminology -- the docstrings below provide some examples.
968+
"""
969+
970+
thread_group_size = None
971+
"""
972+
A collection of threads that execute the same instruction in lockstep.
973+
The group size is a hardware-specific property. For example, this is a
974+
"warp" in NVidia GPUs and a "wavefront" in AMD GPUs.
975+
"""
976+
960977
def __init__(self, name, cores_logical=None, cores_physical=None, isa='cpp',
961978
max_threads_per_block=1024, max_threads_dimx=1024,
962979
max_threads_dimy=1024, max_threads_dimz=64,
@@ -1039,6 +1056,8 @@ def march(self):
10391056

10401057
class NvidiaDevice(Device):
10411058

1059+
thread_group_size = 32
1060+
10421061
max_mem_trans_nbytes = 128
10431062

10441063
@cached_property
@@ -1102,6 +1121,8 @@ class Blackwell(Hopper):
11021121

11031122
class AmdDevice(Device):
11041123

1124+
thread_group_size = 64
1125+
11051126
max_mem_trans_nbytes = 256
11061127

11071128
@cached_property

0 commit comments

Comments
 (0)