@@ -957,6 +957,23 @@ def _detect_isa(self):
957957
958958class Device (Platform ):
959959
960+ """
961+ A generic Device is based on the SIMT (Single Instruction, Multiple Threads)
962+ programming model. In this execution model, threads are batched together and
963+ execute the same instruction at the same time, though each thread operates on
964+ its own data. Intel, AMD, and Nvidia GPUs are all based on this model.
965+ Unfortunately they use different terminology to refer to the same or at least
966+ very similar concepts. Throughout Devito, whenever possible, we attempt to
967+ adopt a neutral terminology -- the docstrings below provide some examples.
968+ """
969+
970+ thread_group_size = None
971+ """
972+ A collection of threads that execute the same instruction in lockstep.
973+ The group size is a hardware-specific property. For example, this is a
974+ "warp" in NVidia GPUs and a "wavefront" in AMD GPUs.
975+ """
976+
960977 def __init__ (self , name , cores_logical = None , cores_physical = None , isa = 'cpp' ,
961978 max_threads_per_block = 1024 , max_threads_dimx = 1024 ,
962979 max_threads_dimy = 1024 , max_threads_dimz = 64 ,
@@ -1039,6 +1056,8 @@ def march(self):
10391056
10401057class NvidiaDevice (Device ):
10411058
1059+ thread_group_size = 32
1060+
10421061 max_mem_trans_nbytes = 128
10431062
10441063 @cached_property
@@ -1102,6 +1121,8 @@ class Blackwell(Hopper):
11021121
11031122class AmdDevice (Device ):
11041123
1124+ thread_group_size = 64
1125+
11051126 max_mem_trans_nbytes = 256
11061127
11071128 @cached_property
0 commit comments