From 2a78464819f6be35be8981523d4c4ab2b1b7d4a7 Mon Sep 17 00:00:00 2001 From: Wenju He Date: Mon, 25 Aug 2025 04:28:43 +0200 Subject: [PATCH 1/2] [libspirv] Use clc workitem functions for libspirv workitem llvm-diff changes: * libspirv-amdgcn--amdhsa.bc has no change. * libspirv-nvptx64--nvidiacl.bc has instruction order changes. * libspirv-native_cpu.bc has new symbols _Z25__spirv_BuiltInSubgroupIdv and __mux_get_sub_group_id. --- libclc/libspirv/lib/amdgcn-amdhsa/SOURCES | 3 +++ .../workitem/get_local_linear_id.cl | 16 ++++++++++++++ .../workitem/get_num_sub_groups.cl | 18 +++++++++++++++ .../workitem/get_sub_group_size.cl | 22 +++++++++++++++++++ libclc/libspirv/lib/generic/SOURCES | 1 + .../generic/workitem/get_local_linear_id.cl | 8 +++---- .../generic/workitem/get_num_sub_groups.cl | 8 ++----- .../lib/generic/workitem/get_sub_group_id.cl | 14 ++++++++++++ libclc/libspirv/lib/native_cpu/SOURCES | 2 ++ .../workitem/get_local_linear_id.cl | 16 ++++++++++++++ .../native_cpu/workitem/get_sub_group_id.cl | 16 ++++++++++++++ libclc/libspirv/lib/ptx-nvidiacl/SOURCES | 1 - .../ptx-nvidiacl/workitem/get_global_id.cl | 10 +++------ .../lib/ptx-nvidiacl/workitem/get_group_id.cl | 12 ++-------- .../lib/ptx-nvidiacl/workitem/get_local_id.cl | 12 ++-------- .../ptx-nvidiacl/workitem/get_local_size.cl | 12 ++-------- .../workitem/get_max_sub_group_size.cl | 5 ++--- .../ptx-nvidiacl/workitem/get_num_groups.cl | 12 ++-------- .../workitem/get_sub_group_local_id.cl | 3 ++- libclc/libspirv/lib/r600/SOURCES | 4 ++++ .../lib/r600/workitem/get_local_linear_id.cl | 16 ++++++++++++++ .../lib/r600/workitem/get_num_sub_groups.cl | 18 +++++++++++++++ .../workitem/get_sub_group_id.cl | 3 --- .../lib/r600/workitem/get_sub_group_size.cl | 22 +++++++++++++++++++ 24 files changed, 188 insertions(+), 66 deletions(-) create mode 100644 libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_local_linear_id.cl create mode 100644 libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_num_sub_groups.cl create mode 100644 libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_sub_group_size.cl create mode 100644 libclc/libspirv/lib/generic/workitem/get_sub_group_id.cl create mode 100644 libclc/libspirv/lib/native_cpu/workitem/get_local_linear_id.cl create mode 100644 libclc/libspirv/lib/native_cpu/workitem/get_sub_group_id.cl create mode 100644 libclc/libspirv/lib/r600/workitem/get_local_linear_id.cl create mode 100644 libclc/libspirv/lib/r600/workitem/get_num_sub_groups.cl rename libclc/libspirv/lib/{ptx-nvidiacl => r600}/workitem/get_sub_group_id.cl (81%) create mode 100644 libclc/libspirv/lib/r600/workitem/get_sub_group_size.cl diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/SOURCES b/libclc/libspirv/lib/amdgcn-amdhsa/SOURCES index 9444083daeca2..20952d424adf9 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/SOURCES +++ b/libclc/libspirv/lib/amdgcn-amdhsa/SOURCES @@ -58,11 +58,14 @@ math/sinh.cl math/tan.cl math/tanh.cl workitem/get_global_size.cl +workitem/get_local_linear_id.cl workitem/get_local_size.cl workitem/get_num_groups.cl +workitem/get_num_sub_groups.cl workitem/get_max_sub_group_size.cl workitem/get_sub_group_id.cl workitem/get_sub_group_local_id.cl +workitem/get_sub_group_size.cl misc/sub_group_shuffle.cl async/wait_group_events.cl assert/__assert_fail.ll diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_local_linear_id.cl b/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_local_linear_id.cl new file mode 100644 index 0000000000000..44aa37f011777 --- /dev/null +++ b/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_local_linear_id.cl @@ -0,0 +1,16 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +_CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInLocalInvocationIndex() { + return __spirv_BuiltInLocalInvocationId(2) * __spirv_BuiltInWorkgroupSize(1) * + __spirv_BuiltInWorkgroupSize(0) + + __spirv_BuiltInLocalInvocationId(1) * __spirv_BuiltInWorkgroupSize(0) + + __spirv_BuiltInLocalInvocationId(0); +} diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_num_sub_groups.cl b/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_num_sub_groups.cl new file mode 100644 index 0000000000000..63f7c3a93ca90 --- /dev/null +++ b/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_num_sub_groups.cl @@ -0,0 +1,18 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +_CLC_DEF _CLC_OVERLOAD uint __spirv_BuiltInNumSubgroups() { + size_t size_x = __spirv_BuiltInWorkgroupSize(0); + size_t size_y = __spirv_BuiltInWorkgroupSize(1); + size_t size_z = __spirv_BuiltInWorkgroupSize(2); + uint sg_size = __spirv_BuiltInSubgroupMaxSize(); + size_t linear_size = size_z * size_y * size_x; + return (uint)((linear_size + sg_size - 1) / sg_size); +} diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_sub_group_size.cl b/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_sub_group_size.cl new file mode 100644 index 0000000000000..d0cb4acc8a60f --- /dev/null +++ b/libclc/libspirv/lib/amdgcn-amdhsa/workitem/get_sub_group_size.cl @@ -0,0 +1,22 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +_CLC_DEF _CLC_OVERLOAD uint __spirv_BuiltInSubgroupSize() { + if (__spirv_BuiltInSubgroupId() != __spirv_BuiltInNumSubgroups() - 1) { + return __spirv_BuiltInSubgroupMaxSize(); + } + size_t size_x = __spirv_BuiltInWorkgroupSize(0); + size_t size_y = __spirv_BuiltInWorkgroupSize(1); + size_t size_z = __spirv_BuiltInWorkgroupSize(2); + size_t linear_size = size_z * size_y * size_x; + size_t uniform_groups = __spirv_BuiltInNumSubgroups() - 1; + size_t uniform_size = __spirv_BuiltInSubgroupMaxSize() * uniform_groups; + return linear_size - uniform_size; +} diff --git a/libclc/libspirv/lib/generic/SOURCES b/libclc/libspirv/lib/generic/SOURCES index 5959660a19b28..8a5d0d279c464 100644 --- a/libclc/libspirv/lib/generic/SOURCES +++ b/libclc/libspirv/lib/generic/SOURCES @@ -179,4 +179,5 @@ workitem/get_global_id.cl workitem/get_global_size.cl workitem/get_local_linear_id.cl workitem/get_num_sub_groups.cl +workitem/get_sub_group_id.cl workitem/get_sub_group_size.cl diff --git a/libclc/libspirv/lib/generic/workitem/get_local_linear_id.cl b/libclc/libspirv/lib/generic/workitem/get_local_linear_id.cl index 44aa37f011777..888b160df3b80 100644 --- a/libclc/libspirv/lib/generic/workitem/get_local_linear_id.cl +++ b/libclc/libspirv/lib/generic/workitem/get_local_linear_id.cl @@ -6,11 +6,9 @@ // //===----------------------------------------------------------------------===// +#include #include -_CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInLocalInvocationIndex() { - return __spirv_BuiltInLocalInvocationId(2) * __spirv_BuiltInWorkgroupSize(1) * - __spirv_BuiltInWorkgroupSize(0) + - __spirv_BuiltInLocalInvocationId(1) * __spirv_BuiltInWorkgroupSize(0) + - __spirv_BuiltInLocalInvocationId(0); +_CLC_OVERLOAD _CLC_DEF size_t __spirv_BuiltInLocalInvocationIndex() { + return __clc_get_local_linear_id(); } diff --git a/libclc/libspirv/lib/generic/workitem/get_num_sub_groups.cl b/libclc/libspirv/lib/generic/workitem/get_num_sub_groups.cl index 63f7c3a93ca90..cf357cb6c193e 100644 --- a/libclc/libspirv/lib/generic/workitem/get_num_sub_groups.cl +++ b/libclc/libspirv/lib/generic/workitem/get_num_sub_groups.cl @@ -6,13 +6,9 @@ // //===----------------------------------------------------------------------===// +#include #include _CLC_DEF _CLC_OVERLOAD uint __spirv_BuiltInNumSubgroups() { - size_t size_x = __spirv_BuiltInWorkgroupSize(0); - size_t size_y = __spirv_BuiltInWorkgroupSize(1); - size_t size_z = __spirv_BuiltInWorkgroupSize(2); - uint sg_size = __spirv_BuiltInSubgroupMaxSize(); - size_t linear_size = size_z * size_y * size_x; - return (uint)((linear_size + sg_size - 1) / sg_size); + return __clc_get_num_sub_groups(); } diff --git a/libclc/libspirv/lib/generic/workitem/get_sub_group_id.cl b/libclc/libspirv/lib/generic/workitem/get_sub_group_id.cl new file mode 100644 index 0000000000000..00377d557dd4b --- /dev/null +++ b/libclc/libspirv/lib/generic/workitem/get_sub_group_id.cl @@ -0,0 +1,14 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +_CLC_OVERLOAD _CLC_DEF uint __spirv_BuiltInSubgroupId() { + return __clc_get_sub_group_id(); +} diff --git a/libclc/libspirv/lib/native_cpu/SOURCES b/libclc/libspirv/lib/native_cpu/SOURCES index a8564e60b99e7..78c3aaecb0ff7 100644 --- a/libclc/libspirv/lib/native_cpu/SOURCES +++ b/libclc/libspirv/lib/native_cpu/SOURCES @@ -15,6 +15,8 @@ atomic/atomic_max.ll atomic/atomic_min.ll workitem/get_global_id.cl workitem/get_global_size.cl +workitem/get_local_linear_id.cl workitem/get_num_sub_groups.cl +workitem/get_sub_group_id.cl workitem/get_sub_group_size.cl cl_khr_int64_extended_atomics/minmax_helpers.ll diff --git a/libclc/libspirv/lib/native_cpu/workitem/get_local_linear_id.cl b/libclc/libspirv/lib/native_cpu/workitem/get_local_linear_id.cl new file mode 100644 index 0000000000000..44aa37f011777 --- /dev/null +++ b/libclc/libspirv/lib/native_cpu/workitem/get_local_linear_id.cl @@ -0,0 +1,16 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +_CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInLocalInvocationIndex() { + return __spirv_BuiltInLocalInvocationId(2) * __spirv_BuiltInWorkgroupSize(1) * + __spirv_BuiltInWorkgroupSize(0) + + __spirv_BuiltInLocalInvocationId(1) * __spirv_BuiltInWorkgroupSize(0) + + __spirv_BuiltInLocalInvocationId(0); +} diff --git a/libclc/libspirv/lib/native_cpu/workitem/get_sub_group_id.cl b/libclc/libspirv/lib/native_cpu/workitem/get_sub_group_id.cl new file mode 100644 index 0000000000000..067be07aceff3 --- /dev/null +++ b/libclc/libspirv/lib/native_cpu/workitem/get_sub_group_id.cl @@ -0,0 +1,16 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +uint __mux_get_sub_group_id(); + +_CLC_OVERLOAD _CLC_DEF uint __spirv_BuiltInSubgroupId() { + return __mux_get_sub_group_id(); +} diff --git a/libclc/libspirv/lib/ptx-nvidiacl/SOURCES b/libclc/libspirv/lib/ptx-nvidiacl/SOURCES index 74466057d6e90..00907f6470a62 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/SOURCES +++ b/libclc/libspirv/lib/ptx-nvidiacl/SOURCES @@ -82,7 +82,6 @@ workitem/get_local_id.cl workitem/get_local_size.cl workitem/get_max_sub_group_size.cl workitem/get_num_groups.cl -workitem/get_sub_group_id.cl workitem/get_sub_group_local_id.cl images/image_helpers.ll images/image.cl diff --git a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_global_id.cl b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_global_id.cl index d617971832405..247adf9022a11 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_global_id.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_global_id.cl @@ -6,18 +6,14 @@ // //===----------------------------------------------------------------------===// +#include #include extern int __nvvm_reflect_ocl(constant char *); _CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInGlobalInvocationId(int dim) { if (__nvvm_reflect_ocl("__CUDA_ID_QUERIES_FIT_IN_INT")) { - return (uint)__spirv_BuiltInWorkgroupId(dim) * - (uint)__spirv_BuiltInWorkgroupSize(dim) + - (uint)__spirv_BuiltInLocalInvocationId(dim) + - (uint)__spirv_BuiltInGlobalOffset(dim); + return (uint)__clc_get_global_id(dim); } - return __spirv_BuiltInWorkgroupId(dim) * __spirv_BuiltInWorkgroupSize(dim) + - __spirv_BuiltInLocalInvocationId(dim) + - __spirv_BuiltInGlobalOffset(dim); + return __clc_get_global_id(dim); } diff --git a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_group_id.cl b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_group_id.cl index 7ea58306e8b36..b92d1fd119392 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_group_id.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_group_id.cl @@ -6,17 +6,9 @@ // //===----------------------------------------------------------------------===// +#include #include _CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInWorkgroupId(int dim) { - switch (dim) { - case 0: - return __nvvm_read_ptx_sreg_ctaid_x(); - case 1: - return __nvvm_read_ptx_sreg_ctaid_y(); - case 2: - return __nvvm_read_ptx_sreg_ctaid_z(); - default: - return 0; - } + return __clc_get_group_id(dim); } diff --git a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_local_id.cl b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_local_id.cl index 2b184d4501b15..c1358c74d8968 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_local_id.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_local_id.cl @@ -6,17 +6,9 @@ // //===----------------------------------------------------------------------===// +#include #include _CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInLocalInvocationId(int dim) { - switch (dim) { - case 0: - return __nvvm_read_ptx_sreg_tid_x(); - case 1: - return __nvvm_read_ptx_sreg_tid_y(); - case 2: - return __nvvm_read_ptx_sreg_tid_z(); - default: - return 0; - } + return __clc_get_local_id(dim); } diff --git a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_local_size.cl b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_local_size.cl index b0b108d897267..9404346e162f4 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_local_size.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_local_size.cl @@ -6,17 +6,9 @@ // //===----------------------------------------------------------------------===// +#include #include _CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInWorkgroupSize(int dim) { - switch (dim) { - case 0: - return __nvvm_read_ptx_sreg_ntid_x(); - case 1: - return __nvvm_read_ptx_sreg_ntid_y(); - case 2: - return __nvvm_read_ptx_sreg_ntid_z(); - default: - return 1; - } + return __clc_get_local_size(dim); } diff --git a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_max_sub_group_size.cl b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_max_sub_group_size.cl index c94c57e64293c..89ce28a21a774 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_max_sub_group_size.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_max_sub_group_size.cl @@ -6,10 +6,9 @@ // //===----------------------------------------------------------------------===// +#include #include _CLC_DEF _CLC_OVERLOAD uint __spirv_BuiltInSubgroupMaxSize() { - return 32; - // FIXME: warpsize is defined by NVVM IR but doesn't compile if used here - // return __nvvm_read_ptx_sreg_warpsize(); + return __clc_get_max_sub_group_size(); } diff --git a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_num_groups.cl b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_num_groups.cl index 273329b9f82e5..81d416867abf7 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_num_groups.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_num_groups.cl @@ -6,17 +6,9 @@ // //===----------------------------------------------------------------------===// +#include #include _CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInNumWorkgroups(int dim) { - switch (dim) { - case 0: - return __nvvm_read_ptx_sreg_nctaid_x(); - case 1: - return __nvvm_read_ptx_sreg_nctaid_y(); - case 2: - return __nvvm_read_ptx_sreg_nctaid_z(); - default: - return 1; - } + return __clc_get_num_groups(dim); } diff --git a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_sub_group_local_id.cl b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_sub_group_local_id.cl index 7d690e9fa1201..a47ed57015c95 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_sub_group_local_id.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_sub_group_local_id.cl @@ -6,8 +6,9 @@ // //===----------------------------------------------------------------------===// +#include #include _CLC_DEF _CLC_OVERLOAD uint __spirv_BuiltInSubgroupLocalInvocationId() { - return __nvvm_read_ptx_sreg_laneid(); + return __clc_get_sub_group_local_id(); } diff --git a/libclc/libspirv/lib/r600/SOURCES b/libclc/libspirv/lib/r600/SOURCES index 300e54c4769e3..8a2df507982f9 100644 --- a/libclc/libspirv/lib/r600/SOURCES +++ b/libclc/libspirv/lib/r600/SOURCES @@ -2,6 +2,10 @@ workitem/get_global_offset.cl workitem/get_group_id.cl workitem/get_global_size.cl workitem/get_local_id.cl +workitem/get_local_linear_id.cl workitem/get_local_size.cl workitem/get_num_groups.cl +workitem/get_num_sub_groups.cl +workitem/get_sub_group_id.cl +workitem/get_sub_group_size.cl workitem/get_work_dim.cl diff --git a/libclc/libspirv/lib/r600/workitem/get_local_linear_id.cl b/libclc/libspirv/lib/r600/workitem/get_local_linear_id.cl new file mode 100644 index 0000000000000..44aa37f011777 --- /dev/null +++ b/libclc/libspirv/lib/r600/workitem/get_local_linear_id.cl @@ -0,0 +1,16 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +_CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInLocalInvocationIndex() { + return __spirv_BuiltInLocalInvocationId(2) * __spirv_BuiltInWorkgroupSize(1) * + __spirv_BuiltInWorkgroupSize(0) + + __spirv_BuiltInLocalInvocationId(1) * __spirv_BuiltInWorkgroupSize(0) + + __spirv_BuiltInLocalInvocationId(0); +} diff --git a/libclc/libspirv/lib/r600/workitem/get_num_sub_groups.cl b/libclc/libspirv/lib/r600/workitem/get_num_sub_groups.cl new file mode 100644 index 0000000000000..63f7c3a93ca90 --- /dev/null +++ b/libclc/libspirv/lib/r600/workitem/get_num_sub_groups.cl @@ -0,0 +1,18 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +_CLC_DEF _CLC_OVERLOAD uint __spirv_BuiltInNumSubgroups() { + size_t size_x = __spirv_BuiltInWorkgroupSize(0); + size_t size_y = __spirv_BuiltInWorkgroupSize(1); + size_t size_z = __spirv_BuiltInWorkgroupSize(2); + uint sg_size = __spirv_BuiltInSubgroupMaxSize(); + size_t linear_size = size_z * size_y * size_x; + return (uint)((linear_size + sg_size - 1) / sg_size); +} diff --git a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_sub_group_id.cl b/libclc/libspirv/lib/r600/workitem/get_sub_group_id.cl similarity index 81% rename from libclc/libspirv/lib/ptx-nvidiacl/workitem/get_sub_group_id.cl rename to libclc/libspirv/lib/r600/workitem/get_sub_group_id.cl index 658c90a8139af..440fd91b3f4a3 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_sub_group_id.cl +++ b/libclc/libspirv/lib/r600/workitem/get_sub_group_id.cl @@ -9,14 +9,11 @@ #include _CLC_DEF _CLC_OVERLOAD uint __spirv_BuiltInSubgroupId() { - // sreg.warpid is volatile and doesn't represent virtual warp index - // see https://docs.nvidia.com/cuda/parallel-thread-execution/index.html size_t id_x = __spirv_BuiltInLocalInvocationId(0); size_t id_y = __spirv_BuiltInLocalInvocationId(1); size_t id_z = __spirv_BuiltInLocalInvocationId(2); size_t size_x = __spirv_BuiltInWorkgroupSize(0); size_t size_y = __spirv_BuiltInWorkgroupSize(1); - size_t size_z = __spirv_BuiltInWorkgroupSize(2); uint sg_size = __spirv_BuiltInSubgroupMaxSize(); return (id_z * size_y * size_x + id_y * size_x + id_x) / sg_size; } diff --git a/libclc/libspirv/lib/r600/workitem/get_sub_group_size.cl b/libclc/libspirv/lib/r600/workitem/get_sub_group_size.cl new file mode 100644 index 0000000000000..d0cb4acc8a60f --- /dev/null +++ b/libclc/libspirv/lib/r600/workitem/get_sub_group_size.cl @@ -0,0 +1,22 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +_CLC_DEF _CLC_OVERLOAD uint __spirv_BuiltInSubgroupSize() { + if (__spirv_BuiltInSubgroupId() != __spirv_BuiltInNumSubgroups() - 1) { + return __spirv_BuiltInSubgroupMaxSize(); + } + size_t size_x = __spirv_BuiltInWorkgroupSize(0); + size_t size_y = __spirv_BuiltInWorkgroupSize(1); + size_t size_z = __spirv_BuiltInWorkgroupSize(2); + size_t linear_size = size_z * size_y * size_x; + size_t uniform_groups = __spirv_BuiltInNumSubgroups() - 1; + size_t uniform_size = __spirv_BuiltInSubgroupMaxSize() * uniform_groups; + return linear_size - uniform_size; +} From ef183ba8cb84ff04e7784edae991c920c94ed255 Mon Sep 17 00:00:00 2001 From: Wenju He Date: Mon, 25 Aug 2025 05:29:59 +0200 Subject: [PATCH 2/2] undo change to libspirv/lib/ptx-nvidiacl/workitem/get_global_id.cl --- .../lib/generic/workitem/get_num_sub_groups.cl | 2 +- .../lib/ptx-nvidiacl/workitem/get_global_id.cl | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/libclc/libspirv/lib/generic/workitem/get_num_sub_groups.cl b/libclc/libspirv/lib/generic/workitem/get_num_sub_groups.cl index cf357cb6c193e..27ee86000f5f9 100644 --- a/libclc/libspirv/lib/generic/workitem/get_num_sub_groups.cl +++ b/libclc/libspirv/lib/generic/workitem/get_num_sub_groups.cl @@ -9,6 +9,6 @@ #include #include -_CLC_DEF _CLC_OVERLOAD uint __spirv_BuiltInNumSubgroups() { +_CLC_OVERLOAD _CLC_DEF uint __spirv_BuiltInNumSubgroups() { return __clc_get_num_sub_groups(); } diff --git a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_global_id.cl b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_global_id.cl index 247adf9022a11..d617971832405 100644 --- a/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_global_id.cl +++ b/libclc/libspirv/lib/ptx-nvidiacl/workitem/get_global_id.cl @@ -6,14 +6,18 @@ // //===----------------------------------------------------------------------===// -#include #include extern int __nvvm_reflect_ocl(constant char *); _CLC_DEF _CLC_OVERLOAD size_t __spirv_BuiltInGlobalInvocationId(int dim) { if (__nvvm_reflect_ocl("__CUDA_ID_QUERIES_FIT_IN_INT")) { - return (uint)__clc_get_global_id(dim); + return (uint)__spirv_BuiltInWorkgroupId(dim) * + (uint)__spirv_BuiltInWorkgroupSize(dim) + + (uint)__spirv_BuiltInLocalInvocationId(dim) + + (uint)__spirv_BuiltInGlobalOffset(dim); } - return __clc_get_global_id(dim); + return __spirv_BuiltInWorkgroupId(dim) * __spirv_BuiltInWorkgroupSize(dim) + + __spirv_BuiltInLocalInvocationId(dim) + + __spirv_BuiltInGlobalOffset(dim); }