@@ -275,7 +275,11 @@ class HipTargetInfo<string targetName, list<Aspect> aspectList, list<int> subGro
275275defvar HipSubgroupSizesGCN2 = [16]; // gfx7
276276defvar HipSubgroupSizesGCN3 = [16]; // gfx8, GCN 3rd gen and 4th gen have the same subgroup sizes
277277defvar HipSubgroupSizesGCN5 = [64]; // gfx900-gfx906 GCN5.0 (known as "Vega"), gfx90c GCN5.1 (known as "Vega 7nm")
278- defvar HipSubgroupSizesRDNA = [32, 64]; // gfxX10-gfx11 (encapsulates RDNA1..3), natively 32 (64-waves mode available)
278+ // According to the "Accelerator and GPU hardware specifications table" docs,
279+ // (see: https://rocm.docs.amd.com/en/latest/reference/gpu-arch-specs.html)
280+ // the ROCm driver selects wave32 mode for the gfx10 and gfx11 family of GPUs.
281+ // Also, see relevant ROCm issue: https://github.com/ROCm/hipamd/issues/59
282+ defvar HipSubgroupSizesRDNA = [32]; // gfxX10-gfx11 (encapsulates RDNA1..3), (wave64 mode available but not used).
279283defvar HipSubgroupSizesCDNA = [64]; // gfx908, gfx90a (encapsulates CDNA1..2)
280284
281285defvar HipMinAspects = [AspectGpu, AspectFp64, AspectOnline_compiler, AspectOnline_linker, AspectQueue_profiling,
@@ -290,9 +294,18 @@ def : HipTargetInfo<"amd_gpu_gfx908", !listconcat(HipMinAspects, AllUSMAspects,
290294def : HipTargetInfo<"amd_gpu_gfx90a", !listconcat(HipMinAspects, AllUSMAspects,
291295 [AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph, AspectExt_oneapi_native_assert]),
292296 HipSubgroupSizesCDNA>;
297+ // TODO: Need to verify whether device-side asserts (oneapi_native_assert) are
298+ // now working for the new CDNA3 gfx940, gfx941, gfx942 GPUs and fixed for the
299+ // other supported, gfx1030 and gfx1100, RDNA3 GPUs.
293300def : HipTargetInfo<"amd_gpu_gfx940", !listconcat(HipMinAspects, AllUSMAspects,
294301 [AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph]),
295302 HipSubgroupSizesCDNA>;
303+ def : HipTargetInfo<"amd_gpu_gfx941", !listconcat(HipMinAspects, AllUSMAspects,
304+ [AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph]),
305+ HipSubgroupSizesCDNA>;
306+ def : HipTargetInfo<"amd_gpu_gfx942", !listconcat(HipMinAspects, AllUSMAspects,
307+ [AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph]),
308+ HipSubgroupSizesCDNA>;
296309def : HipTargetInfo<"amd_gpu_gfx1030", !listconcat(HipMinAspects, AllUSMAspects,
297310 [AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph]),
298311 HipSubgroupSizesRDNA>;
@@ -332,7 +345,5 @@ def : HipTargetInfo<"amd_gpu_gfx1103", !listconcat(HipMinAspects, AllUSMAspects)
332345def : HipTargetInfo<"amd_gpu_gfx1150", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesRDNA>;
333346def : HipTargetInfo<"amd_gpu_gfx1151", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesRDNA>;
334347// TBA
335- def : HipTargetInfo<"amd_gpu_gfx941", [], []>; // CDNA 3
336- def : HipTargetInfo<"amd_gpu_gfx942", [], []>; // CDNA 3
337348def : HipTargetInfo<"amd_gpu_gfx1200", [], []>; // RDNA 4
338349def : HipTargetInfo<"amd_gpu_gfx1201", [], []>; // RDNA 4
0 commit comments