Skip to content

Commit 08bbd1c

Browse files
committed
[SYCL] Add esimd device descriptor for 2d load/store/prefetch
1 parent 795ff19 commit 08bbd1c

File tree

10 files changed

+127
-11
lines changed

10 files changed

+127
-11
lines changed

sycl/cmake/modules/FetchUnifiedRuntime.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ if(SYCL_UR_USE_FETCH_CONTENT)
116116
CACHE PATH "Path to external '${name}' adapter source dir" FORCE)
117117
endfunction()
118118

119-
set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
119+
set(UNIFIED_RUNTIME_REPO "https://github.com/againull/unified-runtime")
120120
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/UnifiedRuntimeTag.cmake)
121121

122122
set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES")
Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1 @@
1-
# commit 9937d029c7fdcbf101e89f8515f640c145e059c5
2-
# Merge: 9ac6d5d9 10b0e101
3-
# Author: Callum Fare <[email protected]>
4-
# Date: Wed Nov 20 14:49:17 2024 +0000
5-
# Merge pull request #2258 from aarongreig/aaron/tryUseExtensionSubgroupInfo
6-
# Use extension version of clGetKernelSubGroupInfo when necessary.
7-
set(UNIFIED_RUNTIME_TAG 9937d029c7fdcbf101e89f8515f640c145e059c5)
1+
set(UNIFIED_RUNTIME_TAG 0c814b6e8ff2b87b54286d2b77269ebff480adea)

sycl/doc/extensions/supported/sycl_ext_intel_esimd/sycl_ext_intel_esimd.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
- [__regcall Calling convention](#__regcall-calling-convention)
3232
- [Inline assembly](#inline-assembly)
3333
- [Device aspect](#device-aspect)
34+
- [Device Information Descriptors](#device-information-descriptors)
3435
- [Device queries and conditional dispatching of the code](#device-queries-and-conditional-dispatching-of-the-code)
3536
- [Implementation restrictions](#implementation-restrictions)
3637
- [Features not supported with the ESIMD extension](#features-not-supported-with-the-esimd-extension)
@@ -1018,6 +1019,11 @@ The new aspect has the following behavior when queried via `device::has()`:
10181019
|--------|-------------|
10191020
|`aspect::ext_intel_esimd` | Indicates that the device supports the `sycl_ext_intel_esimd` extension as defined in this document. |
10201021

1022+
## Device Information Descriptors
1023+
| Device Descriptors | Return Type | Description |
1024+
| ------------------ | ----------- | ----------- |
1025+
| `ext::intel::esimd::info::device::has_2d_block_io_support` | bool | Returns the boolean indicating whether 2D load/store/prefetch instructions are supported by the device. |
1026+
10211027
## Examples
10221028
### Vector addition (USM)
10231029
```cpp

sycl/doc/extensions/supported/sycl_ext_intel_esimd/sycl_ext_intel_esimd_functions.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -552,7 +552,7 @@ Loads and returns a vector `simd<T, N>` where `N` is `BlockWidth * BlockHeight *
552552
`props` - The optional compile-time properties. Only cache hint properties are used.
553553
554554
### Restrictions
555-
* This function is available only for Intel® Data Center GPU Max Series (aka PVC).
555+
* This function is available only for devices with the `ext::intel::esimd::info::device::has_2d_block_io_support` information descriptor value equal to `true`.
556556
* `Cache-hint` properties, if passed, must follow the [rules](#valid-combinations-of-l1-and-l2-cache-hints-for-load-functions) for `load` functions.
557557
* `Transformed` and `Transposed` cannot be set to true at the same time.
558558
* `BlockWidth` * `BlockHeight` * `NBlocks` * sizeof(`T`) must not exceed 2048.
@@ -598,7 +598,7 @@ Prefetches elements from a memory block of the size `BlockWidth * BlockHeight *
598598
`props` - The compile-time properties, which must specify cache-hints.
599599

600600
### Restrictions
601-
* This function is available only for Intel® Data Center GPU Max Series (aka PVC).
601+
* This function is available only for devices with the `ext::intel::esimd::info::device::has_2d_block_io_support` information descriptor value equal to `true`.
602602
* `Cache-hint` properties must follow the [rules](#valid-combinations-of-l1-and-l2-cache-hints-for-prefetch-functions) for `prefetch` functions.
603603
* `BlockWidth` * `BlockHeight` * `NBlocks` * sizeof(`T`) must not exceed 2048.
604604
* `NBlocks` must be {1,2,4} for `bytes` and `words`, {1,2} for `dwords`, 1 for `qwords`.
@@ -630,7 +630,7 @@ Stores the vector `Vals` of the type `simd<T, N>` to 2D memory block where `N` i
630630
`props` - The optional compile-time properties. Only cache hint properties are used.
631631
632632
### Restrictions
633-
* This function is available only for Intel® Data Center GPU Max Series (aka PVC).
633+
* This function is available only for devices with the `ext::intel::esimd::info::device::has_2d_block_io_support` information descriptor value equal to `true`.
634634
* `Cache-hint` properties, if passed, must follow the [rules](#valid-combinations-of-l1-and-l2-cache-hints-for-store-functions) for `store` functions.
635635
* `BlockWidth` * `BlockHeight` * sizeof(`T`) must not exceed 512.
636636
* `BlockHeight` must not exceed 8.

sycl/include/sycl/info/ext_intel_device_traits.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ __SYCL_PARAM_TRAITS_SPEC(ext::intel, device, free_memory, uint64_t, UR_DEVICE_IN
1616
__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, memory_clock_rate, uint32_t, UR_DEVICE_INFO_MEMORY_CLOCK_RATE)
1717
__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, memory_bus_width, uint32_t, UR_DEVICE_INFO_MEMORY_BUS_WIDTH)
1818
__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, max_compute_queue_indices, int32_t, UR_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES)
19+
__SYCL_PARAM_TRAITS_SPEC(ext::intel::esimd, device, has_2d_block_io_support, bool, UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP)
1920
#ifdef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC_NEEDS_UNDEF
2021
#undef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC
2122
#undef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC_NEEDS_UNDEF

sycl/source/detail/device_info.hpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1563,6 +1563,25 @@ get_device_info<ext::intel::info::device::memory_bus_width>(
15631563
return get_device_info_impl<Param::return_type, Param>::get(Dev);
15641564
}
15651565

1566+
template <>
1567+
inline ext::intel::esimd::info::device::has_2d_block_io_support::return_type
1568+
get_device_info<ext::intel::esimd::info::device::has_2d_block_io_support>(
1569+
const DeviceImplPtr &Dev) {
1570+
if (!Dev->has(aspect::ext_intel_esimd))
1571+
return false;
1572+
1573+
ur_exp_device_2d_block_array_capability_flags_t BlockArrayCapabilities;
1574+
Dev->getAdapter()->call<UrApiKind::urDeviceGetInfo>(
1575+
Dev->getHandleRef(),
1576+
UrInfoCode<
1577+
ext::intel::esimd::info::device::has_2d_block_io_support>::value,
1578+
sizeof(BlockArrayCapabilities), &BlockArrayCapabilities, nullptr);
1579+
return (BlockArrayCapabilities &
1580+
UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD) &&
1581+
(BlockArrayCapabilities &
1582+
UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE);
1583+
}
1584+
15661585
// Returns the list of all progress guarantees that can be requested for
15671586
// work_groups from the coordination level of root_group when using the device
15681587
// given by Dev. First it calls getProgressGuarantee to get the strongest
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
4+
// Test has_2d_block_io_supported device descriptor for some known
5+
// architectures.
6+
7+
#include <sycl/detail/core.hpp>
8+
#include <sycl/ext/intel/esimd.hpp>
9+
10+
namespace syclex = sycl::ext::oneapi::experimental;
11+
12+
int main() {
13+
sycl::queue Q;
14+
auto Arch = Q.get_device().get_info<syclex::info::device::architecture>();
15+
bool Has2DBlockIOSupport =
16+
Q.get_device()
17+
.get_info<
18+
sycl::ext::intel::esimd::info::device::has_2d_block_io_support>();
19+
if (Arch == syclex::architecture::intel_gpu_pvc) {
20+
if (!Has2DBlockIOSupport) {
21+
std::cerr << "Error: has_2d_block_io_support is expected to be true for "
22+
"PVC architecture"
23+
<< std::endl;
24+
return 1;
25+
}
26+
}
27+
if (Arch == syclex::architecture::intel_gpu_tgllp ||
28+
Arch == syclex::architecture::intel_gpu_dg2_g10 ||
29+
Arch == syclex::architecture::intel_gpu_dg2_g11 ||
30+
Arch == syclex::architecture::intel_gpu_dg2_g12) {
31+
if (Has2DBlockIOSupport) {
32+
std::cerr << "Error: has_2d_block_io_support is expected to be false for "
33+
"Tiger Lake and DG2"
34+
<< std::endl;
35+
return 1;
36+
}
37+
}
38+
return 0;
39+
}

sycl/test/abi/sycl_symbols_linux.dump

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3675,6 +3675,7 @@ _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device25gpu_eu_co
36753675
_ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device25max_compute_queue_indicesEEENT_11return_typeEv
36763676
_ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device4uuidEEENT_11return_typeEv
36773677
_ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device9device_idEEENT_11return_typeEv
3678+
_ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel5esimd4info6device23has_2d_block_io_supportEEENT_11return_typeEv
36783679
_ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext6oneapi12experimental4info6device12architectureEEENT_11return_typeEv
36793680
_ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext6oneapi12experimental4info6device15max_work_groupsILi1EEEEENT_11return_typeEv
36803681
_ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext6oneapi12experimental4info6device15max_work_groupsILi2EEEEENT_11return_typeEv
@@ -3780,6 +3781,7 @@ _ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device25gpu_eu_count_per
37803781
_ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device25max_compute_queue_indicesEEENS0_6detail11ABINeutralTINS8_19is_device_info_descIT_E11return_typeEE4typeEv
37813782
_ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device4uuidEEENS0_6detail11ABINeutralTINS8_19is_device_info_descIT_E11return_typeEE4typeEv
37823783
_ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device9device_idEEENS0_6detail11ABINeutralTINS8_19is_device_info_descIT_E11return_typeEE4typeEv
3784+
_ZNK4sycl3_V16device13get_info_implINS0_3ext5intel5esimd4info6device23has_2d_block_io_supportEEENS0_6detail11ABINeutralTINS9_19is_device_info_descIT_E11return_typeEE4typeEv
37833785
_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device12architectureEEENS0_6detail11ABINeutralTINS9_19is_device_info_descIT_E11return_typeEE4typeEv
37843786
_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device15max_work_groupsILi1EEEEENS0_6detail11ABINeutralTINSA_19is_device_info_descIT_E11return_typeEE4typeEv
37853787
_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device15max_work_groupsILi2EEEEENS0_6detail11ABINeutralTINSA_19is_device_info_descIT_E11return_typeEE4typeEv

sycl/test/abi/sycl_symbols_windows.dump

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
??$get_info@Ugpu_hw_threads_per_eu@device@info@intel@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBAIXZ
6868
??$get_info@Ugpu_slices@device@info@intel@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBAIXZ
6969
??$get_info@Ugpu_subslices_per_slice@device@info@intel@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBAIXZ
70+
??$get_info@Uhas_2d_block_io_support@device@info@esimd@intel@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA_NXZ
7071
??$get_info@Uimage_row_pitch_align@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBAIXZ
7172
??$get_info@Umatrix_combinations@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$vector@Ucombination@matrix@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Ucombination@matrix@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ
7273
??$get_info@Umax_compute_queue_indices@device@info@intel@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBAHXZ
@@ -156,6 +157,7 @@
156157
??$get_info_impl@Ugpu_slices@device@info@intel@ext@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ
157158
??$get_info_impl@Ugpu_subslices_per_slice@device@info@intel@ext@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ
158159
??$get_info_impl@Uhalf_fp_config@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4fp_config@info@_V1@sycl@@V?$allocator@W4fp_config@info@_V1@sycl@@@std@@@std@@XZ
160+
??$get_info_impl@Uhas_2d_block_io_support@device@info@esimd@intel@ext@_V1@sycl@@@device@_V1@sycl@@AEBA_NXZ
159161
??$get_info_impl@Uhost_unified_memory@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA_NXZ
160162
??$get_info_impl@Uimage2d_max_height@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA_KXZ
161163
??$get_info_impl@Uimage2d_max_width@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA_KXZ

sycl/unittests/kernel-and-program/DeviceInfo.cpp

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ struct TestCtx {
2727

2828
static std::unique_ptr<TestCtx> TestContext;
2929

30+
ur_exp_device_2d_block_array_capability_flags_t Flags2DBlockIO = 0;
31+
bool HasESIMDSupport = false;
32+
3033
static ur_result_t redefinedDeviceGetInfo(void *pParams) {
3134
auto params = *static_cast<ur_device_get_info_params_t *>(pParams);
3235
if (*params.ppropName == UR_DEVICE_INFO_UUID) {
@@ -72,6 +75,20 @@ static ur_result_t redefinedDeviceGetInfo(void *pParams) {
7275
}
7376
}
7477

78+
if (*params.ppropName == UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP) {
79+
assert(*params.ppropSize ==
80+
sizeof(ur_exp_device_2d_block_array_capability_flags_t));
81+
if (*params.ppPropValue) {
82+
*static_cast<ur_exp_device_2d_block_array_capability_flags_t *>(
83+
*params.ppPropValue) = Flags2DBlockIO;
84+
}
85+
}
86+
87+
if (*params.ppropName == UR_DEVICE_INFO_ESIMD_SUPPORT) {
88+
assert(*params.ppropSize == sizeof(bool));
89+
if (*params.ppPropValue)
90+
*static_cast<bool *>(*params.ppPropValue) = HasESIMDSupport;
91+
}
7592
return UR_RESULT_SUCCESS;
7693
}
7794

@@ -185,6 +202,42 @@ TEST_F(DeviceInfoTest, GetDeviceMemoryBusWidth) {
185202
<< "Expect memory_bus_width to be of uint32_t size";
186203
}
187204

205+
TEST_F(DeviceInfoTest, GetDeviceESIMD2DBlockIOSupport) {
206+
context Ctx{Plt.get_devices()[0]};
207+
TestContext.reset(new TestCtx(Ctx));
208+
209+
device Dev = Ctx.get_devices()[0];
210+
211+
HasESIMDSupport = true;
212+
Flags2DBlockIO = UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD |
213+
UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE;
214+
auto HasSupport =
215+
Dev.get_info<ext::intel::esimd::info::device::has_2d_block_io_support>();
216+
EXPECT_TRUE(HasSupport);
217+
218+
Flags2DBlockIO = UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD;
219+
HasSupport =
220+
Dev.get_info<ext::intel::esimd::info::device::has_2d_block_io_support>();
221+
EXPECT_FALSE(HasSupport);
222+
223+
Flags2DBlockIO = UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE;
224+
HasSupport =
225+
Dev.get_info<ext::intel::esimd::info::device::has_2d_block_io_support>();
226+
EXPECT_FALSE(HasSupport);
227+
228+
Flags2DBlockIO = 0;
229+
HasSupport =
230+
Dev.get_info<ext::intel::esimd::info::device::has_2d_block_io_support>();
231+
EXPECT_FALSE(HasSupport);
232+
233+
Flags2DBlockIO = UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD |
234+
UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE;
235+
HasESIMDSupport = false;
236+
HasSupport =
237+
Dev.get_info<ext::intel::esimd::info::device::has_2d_block_io_support>();
238+
EXPECT_FALSE(HasSupport);
239+
}
240+
188241
TEST_F(DeviceInfoTest, BuiltInKernelIDs) {
189242
context Ctx{Plt.get_devices()[0]};
190243
TestContext.reset(new TestCtx(Ctx));

0 commit comments

Comments
 (0)