diff --git a/clang/test/Driver/sycl-device-traits-macros-amdgcn.cpp b/clang/test/Driver/sycl-device-traits-macros-amdgcn.cpp index 59f73ded58f11..aa27306d0c3c2 100644 --- a/clang/test/Driver/sycl-device-traits-macros-amdgcn.cpp +++ b/clang/test/Driver/sycl-device-traits-macros-amdgcn.cpp @@ -50,6 +50,10 @@ // RUN: FileCheck %s --check-prefix=CHECK-SYCL-AMDGCN-AMD-AMDHSA-DEVICE-TRIPLE // RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amd_gpu_gfx940 \ // RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \ +// RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amd_gpu_gfx941 \ +// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \ +// RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amd_gpu_gfx942 \ +// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \ // RUN: FileCheck %s --check-prefix=CHECK-SYCL-AMDGCN-AMD-AMDHSA-DEVICE-TRIPLE // RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amd_gpu_gfx1010 \ // RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \ @@ -156,6 +160,12 @@ // RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx940 \ // RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \ // RUN: FileCheck %s --check-prefix=CHECK-SYCL-AMDGCN-AMD-AMDHSA-OFFLOAD-ARCH +// RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx941 \ +// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CHECK-SYCL-AMDGCN-AMD-AMDHSA-OFFLOAD-ARCH +// RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx942 \ +// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CHECK-SYCL-AMDGCN-AMD-AMDHSA-OFFLOAD-ARCH // RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx1010 \ // RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \ // RUN: FileCheck %s --check-prefix=CHECK-SYCL-AMDGCN-AMD-AMDHSA-OFFLOAD-ARCH diff --git a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td index cda1c77bd4b6c..233e703fbe250 100644 --- a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td +++ b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td @@ -275,7 +275,11 @@ class HipTargetInfo aspectList, list subGro defvar HipSubgroupSizesGCN2 = [16]; // gfx7 defvar HipSubgroupSizesGCN3 = [16]; // gfx8, GCN 3rd gen and 4th gen have the same subgroup sizes defvar HipSubgroupSizesGCN5 = [64]; // gfx900-gfx906 GCN5.0 (known as "Vega"), gfx90c GCN5.1 (known as "Vega 7nm") -defvar HipSubgroupSizesRDNA = [32, 64]; // gfxX10-gfx11 (encapsulates RDNA1..3), natively 32 (64-waves mode available) +// According to the "Accelerator and GPU hardware specifications table" docs, +// (see: https://rocm.docs.amd.com/en/latest/reference/gpu-arch-specs.html) +// the ROCm driver selects wave32 mode for the gfx10 and gfx11 family of GPUs. +// Also, see relevant ROCm issue: https://github.com/ROCm/hipamd/issues/59 +defvar HipSubgroupSizesRDNA = [32]; // gfxX10-gfx11 (encapsulates RDNA1..3), (wave64 mode available but not used). defvar HipSubgroupSizesCDNA = [64]; // gfx908, gfx90a (encapsulates CDNA1..2) defvar HipMinAspects = [AspectGpu, AspectFp64, AspectOnline_compiler, AspectOnline_linker, AspectQueue_profiling, @@ -290,9 +294,18 @@ def : HipTargetInfo<"amd_gpu_gfx908", !listconcat(HipMinAspects, AllUSMAspects, def : HipTargetInfo<"amd_gpu_gfx90a", !listconcat(HipMinAspects, AllUSMAspects, [AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph, AspectExt_oneapi_native_assert]), HipSubgroupSizesCDNA>; +// TODO: Need to verify whether device-side asserts (oneapi_native_assert) are +// now working for the new CDNA3 gfx940, gfx941, gfx942 GPUs and fixed for the +// other supported, gfx1030 and gfx1100, RDNA3 GPUs. def : HipTargetInfo<"amd_gpu_gfx940", !listconcat(HipMinAspects, AllUSMAspects, [AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph]), HipSubgroupSizesCDNA>; +def : HipTargetInfo<"amd_gpu_gfx941", !listconcat(HipMinAspects, AllUSMAspects, + [AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph]), + HipSubgroupSizesCDNA>; +def : HipTargetInfo<"amd_gpu_gfx942", !listconcat(HipMinAspects, AllUSMAspects, + [AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph]), + HipSubgroupSizesCDNA>; def : HipTargetInfo<"amd_gpu_gfx1030", !listconcat(HipMinAspects, AllUSMAspects, [AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph]), HipSubgroupSizesRDNA>; @@ -332,7 +345,5 @@ def : HipTargetInfo<"amd_gpu_gfx1103", !listconcat(HipMinAspects, AllUSMAspects) def : HipTargetInfo<"amd_gpu_gfx1150", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesRDNA>; def : HipTargetInfo<"amd_gpu_gfx1151", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesRDNA>; // TBA -def : HipTargetInfo<"amd_gpu_gfx941", [], []>; // CDNA 3 -def : HipTargetInfo<"amd_gpu_gfx942", [], []>; // CDNA 3 def : HipTargetInfo<"amd_gpu_gfx1200", [], []>; // RDNA 4 def : HipTargetInfo<"amd_gpu_gfx1201", [], []>; // RDNA 4