Skip to content

Commit fd46a46

Browse files
committed
Merge branch 'sycl' into multiple-build
2 parents 77821d4 + 1e1757b commit fd46a46

File tree

7 files changed

+47
-14
lines changed

7 files changed

+47
-14
lines changed

clang/test/Driver/sycl-device-traits-macros-amdgcn.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@
5050
// RUN: FileCheck %s --check-prefix=CHECK-SYCL-AMDGCN-AMD-AMDHSA-DEVICE-TRIPLE
5151
// RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amd_gpu_gfx940 \
5252
// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \
53+
// RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amd_gpu_gfx941 \
54+
// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \
55+
// RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amd_gpu_gfx942 \
56+
// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \
5357
// RUN: FileCheck %s --check-prefix=CHECK-SYCL-AMDGCN-AMD-AMDHSA-DEVICE-TRIPLE
5458
// RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amd_gpu_gfx1010 \
5559
// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \
@@ -156,6 +160,12 @@
156160
// RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx940 \
157161
// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \
158162
// RUN: FileCheck %s --check-prefix=CHECK-SYCL-AMDGCN-AMD-AMDHSA-OFFLOAD-ARCH
163+
// RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx941 \
164+
// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \
165+
// RUN: FileCheck %s --check-prefix=CHECK-SYCL-AMDGCN-AMD-AMDHSA-OFFLOAD-ARCH
166+
// RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx942 \
167+
// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \
168+
// RUN: FileCheck %s --check-prefix=CHECK-SYCL-AMDGCN-AMD-AMDHSA-OFFLOAD-ARCH
159169
// RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx1010 \
160170
// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \
161171
// RUN: FileCheck %s --check-prefix=CHECK-SYCL-AMDGCN-AMD-AMDHSA-OFFLOAD-ARCH

libdevice/imf_rounding_op.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -860,7 +860,7 @@ template <typename Ty> Ty __fp_div(Ty x, Ty y, int rd) {
860860
}
861861
}
862862

863-
unsigned get_grs_bits(uint64_t dbits, unsigned bit_num) {
863+
static unsigned get_grs_bits(uint64_t dbits, unsigned bit_num) {
864864
if (bit_num == 1)
865865
return (dbits & 0x1) << 2;
866866
else if (bit_num == 2)
@@ -873,7 +873,7 @@ unsigned get_grs_bits(uint64_t dbits, unsigned bit_num) {
873873
}
874874
}
875875

876-
unsigned get_grs_bits(__iml_ui128 dbits, unsigned bit_num) {
876+
static unsigned get_grs_bits(__iml_ui128 dbits, unsigned bit_num) {
877877
if (bit_num == 1)
878878
return static_cast<uint32_t>(dbits & 0x1) << 2;
879879
else if (bit_num == 2)

llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,11 @@ class HipTargetInfo<string targetName, list<Aspect> aspectList, list<int> subGro
275275
defvar HipSubgroupSizesGCN2 = [16]; // gfx7
276276
defvar HipSubgroupSizesGCN3 = [16]; // gfx8, GCN 3rd gen and 4th gen have the same subgroup sizes
277277
defvar HipSubgroupSizesGCN5 = [64]; // gfx900-gfx906 GCN5.0 (known as "Vega"), gfx90c GCN5.1 (known as "Vega 7nm")
278-
defvar HipSubgroupSizesRDNA = [32, 64]; // gfxX10-gfx11 (encapsulates RDNA1..3), natively 32 (64-waves mode available)
278+
// According to the "Accelerator and GPU hardware specifications table" docs,
279+
// (see: https://rocm.docs.amd.com/en/latest/reference/gpu-arch-specs.html)
280+
// the ROCm driver selects wave32 mode for the gfx10 and gfx11 family of GPUs.
281+
// Also, see relevant ROCm issue: https://github.com/ROCm/hipamd/issues/59
282+
defvar HipSubgroupSizesRDNA = [32]; // gfxX10-gfx11 (encapsulates RDNA1..3), (wave64 mode available but not used).
279283
defvar HipSubgroupSizesCDNA = [64]; // gfx908, gfx90a (encapsulates CDNA1..2)
280284

281285
defvar HipMinAspects = [AspectGpu, AspectFp64, AspectOnline_compiler, AspectOnline_linker, AspectQueue_profiling,
@@ -290,9 +294,18 @@ def : HipTargetInfo<"amd_gpu_gfx908", !listconcat(HipMinAspects, AllUSMAspects,
290294
def : HipTargetInfo<"amd_gpu_gfx90a", !listconcat(HipMinAspects, AllUSMAspects,
291295
[AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph, AspectExt_oneapi_native_assert]),
292296
HipSubgroupSizesCDNA>;
297+
// TODO: Need to verify whether device-side asserts (oneapi_native_assert) are
298+
// now working for the new CDNA3 gfx940, gfx941, gfx942 GPUs and fixed for the
299+
// other supported, gfx1030 and gfx1100, RDNA3 GPUs.
293300
def : HipTargetInfo<"amd_gpu_gfx940", !listconcat(HipMinAspects, AllUSMAspects,
294301
[AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph]),
295302
HipSubgroupSizesCDNA>;
303+
def : HipTargetInfo<"amd_gpu_gfx941", !listconcat(HipMinAspects, AllUSMAspects,
304+
[AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph]),
305+
HipSubgroupSizesCDNA>;
306+
def : HipTargetInfo<"amd_gpu_gfx942", !listconcat(HipMinAspects, AllUSMAspects,
307+
[AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph]),
308+
HipSubgroupSizesCDNA>;
296309
def : HipTargetInfo<"amd_gpu_gfx1030", !listconcat(HipMinAspects, AllUSMAspects,
297310
[AspectExt_intel_device_info_uuid, AspectExt_oneapi_graph, AspectExt_oneapi_limited_graph]),
298311
HipSubgroupSizesRDNA>;
@@ -332,7 +345,5 @@ def : HipTargetInfo<"amd_gpu_gfx1103", !listconcat(HipMinAspects, AllUSMAspects)
332345
def : HipTargetInfo<"amd_gpu_gfx1150", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesRDNA>;
333346
def : HipTargetInfo<"amd_gpu_gfx1151", !listconcat(HipMinAspects, AllUSMAspects), HipSubgroupSizesRDNA>;
334347
// TBA
335-
def : HipTargetInfo<"amd_gpu_gfx941", [], []>; // CDNA 3
336-
def : HipTargetInfo<"amd_gpu_gfx942", [], []>; // CDNA 3
337348
def : HipTargetInfo<"amd_gpu_gfx1200", [], []>; // RDNA 4
338349
def : HipTargetInfo<"amd_gpu_gfx1201", [], []>; // RDNA 4

sycl/doc/extensions/proposed/sycl_ext_intel_cache_controls.asciidoc renamed to sycl/doc/extensions/experimental/sycl_ext_intel_cache_controls.asciidoc

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ https://github.com/intel/llvm/issues
3636

3737
== Dependencies
3838

39-
This extension is written against the SYCL 2020 revision 7 specification. All
39+
This extension is written against the SYCL 2020 revision 9 specification. All
4040
references below to the "core SYCL specification" or to section numbers in the
4141
SYCL specification refer to that revision.
4242

@@ -49,11 +49,12 @@ This extension depends on the following SYCL extensions:
4949

5050
== Status
5151

52-
This is a proposed extension specification, intended to gather community
53-
feedback. Interfaces defined in this specification may not be implemented yet
54-
or may be in a preliminary state. The specification itself may also change in
55-
incompatible ways before it is finalized. *Shipping software products should
56-
not rely on APIs defined in this specification.*
52+
This is an experimental extension specification, intended to provide early
53+
access to features and gather community feedback. Interfaces defined in this
54+
specification are implemented in {dpcpp}, but they are not finalized and may
55+
change incompatibly in future versions of {dpcpp} without prior notice.
56+
*Shipping software products should not rely on APIs defined in this
57+
specification.*
5758

5859

5960
== Overview

sycl/doc/extensions/proposed/sycl_ext_oneapi_free_function_kernels.asciidoc

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -705,12 +705,12 @@ static constexpr size_t WGSIZE = 16;
705705
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<1>))
706706
void iota(float start, float *ptr) {
707707
// Get the ID of this kernel iteration.
708-
size_t id = syclext::this_work_item::get_nd_item().get_global_linear_id();
708+
size_t id = syclext::this_work_item::get_nd_item<1>().get_global_linear_id();
709709

710710
ptr[id] = start + static_cast<float>(id);
711711
}
712712

713-
void main() {
713+
int main() {
714714
sycl::queue q;
715715
sycl::context ctxt = q.get_context();
716716

@@ -963,6 +963,13 @@ int main() {
963963
```
964964
+
965965
Where `kfp` would have some nicer name.
966+
+
967+
With either form above, it seems like we have enough type information for the
968+
header to check that the types of the actual kernel arguments are implicitly
969+
convertible to the types of the formal kernel parameters, and we can raise a
970+
compile-time error if they are not.
971+
In addition, the header can perform any necessary implicit conversions when
972+
setting the kernel argument values.
966973

967974
* We are debating whether we should allow a free function kernel to be defined
968975
with an initial "iteration index" parameter such as:

sycl/doc/extensions/proposed/sycl_ext_oneapi_work_group_memory.asciidoc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -457,7 +457,7 @@ constexpr size_t WGSIZE = 256;
457457
458458
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::nd_range_kernel<1>))
459459
void mykernel(syclexp::work_group_memory<int[WGSIZE]> mem) {
460-
size_t id = syclext::this_work_item::get_nd_item().get_local_linear_id();
460+
size_t id = syclext::this_work_item::get_nd_item<1>().get_local_linear_id();
461461
462462
// Each work-item has its own dedicated element of the device local memory
463463
// array.

sycl/test-e2e/AddressSanitizer/lit.local.cfg

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,7 @@ config.unsupported_features += ['cuda', 'hip']
1212

1313
# FIXME: Skip some of gpu devices, waiting for gfx driver uplifting
1414
config.unsupported_features += ['gpu-intel-gen9', 'gpu-intel-gen11', 'gpu-intel-gen12', 'gpu-intel-pvc']
15+
16+
# GPU testing requires level_zero
17+
if 'opencl:gpu' in config.sycl_devices:
18+
config.required_features += ['level_zero']

0 commit comments

Comments
 (0)