Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion sycl/cmake/modules/FetchUnifiedRuntime.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ if(SYCL_UR_USE_FETCH_CONTENT)
CACHE PATH "Path to external '${name}' adapter source dir" FORCE)
endfunction()

set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
set(UNIFIED_RUNTIME_REPO "https://github.com/againull/unified-runtime")
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/UnifiedRuntimeTag.cmake)

set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES")
Expand Down
8 changes: 1 addition & 7 deletions sycl/cmake/modules/UnifiedRuntimeTag.cmake
Original file line number Diff line number Diff line change
@@ -1,7 +1 @@
# commit 9937d029c7fdcbf101e89f8515f640c145e059c5
# Merge: 9ac6d5d9 10b0e101
# Author: Callum Fare <[email protected]>
# Date: Wed Nov 20 14:49:17 2024 +0000
# Merge pull request #2258 from aarongreig/aaron/tryUseExtensionSubgroupInfo
# Use extension version of clGetKernelSubGroupInfo when necessary.
set(UNIFIED_RUNTIME_TAG 9937d029c7fdcbf101e89f8515f640c145e059c5)
set(UNIFIED_RUNTIME_TAG 0c814b6e8ff2b87b54286d2b77269ebff480adea)
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
- [__regcall Calling convention](#__regcall-calling-convention)
- [Inline assembly](#inline-assembly)
- [Device aspect](#device-aspect)
- [Device Information Descriptors](#device-information-descriptors)
- [Device queries and conditional dispatching of the code](#device-queries-and-conditional-dispatching-of-the-code)
- [Implementation restrictions](#implementation-restrictions)
- [Features not supported with the ESIMD extension](#features-not-supported-with-the-esimd-extension)
Expand Down Expand Up @@ -1018,6 +1019,11 @@ The new aspect has the following behavior when queried via `device::has()`:
|--------|-------------|
|`aspect::ext_intel_esimd` | Indicates that the device supports the `sycl_ext_intel_esimd` extension as defined in this document. |

## Device Information Descriptors
| Device Descriptors | Return Type | Description |
| ------------------ | ----------- | ----------- |
| `ext::intel::esimd::info::device::has_2d_block_io_support` | bool | Returns the boolean indicating whether 2D load/store/prefetch instructions are supported by the device. |

## Examples
### Vector addition (USM)
```cpp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -552,7 +552,7 @@ Loads and returns a vector `simd<T, N>` where `N` is `BlockWidth * BlockHeight *
`props` - The optional compile-time properties. Only cache hint properties are used.

### Restrictions
* This function is available only for Intel® Data Center GPU Max Series (aka PVC).
* This function is available only for devices with the `ext::intel::esimd::info::device::has_2d_block_io_support` information descriptor value equal to `true`.
* `Cache-hint` properties, if passed, must follow the [rules](#valid-combinations-of-l1-and-l2-cache-hints-for-load-functions) for `load` functions.
* `Transformed` and `Transposed` cannot be set to true at the same time.
* `BlockWidth` * `BlockHeight` * `NBlocks` * sizeof(`T`) must not exceed 2048.
Expand Down Expand Up @@ -598,7 +598,7 @@ Prefetches elements from a memory block of the size `BlockWidth * BlockHeight *
`props` - The compile-time properties, which must specify cache-hints.

### Restrictions
* This function is available only for Intel® Data Center GPU Max Series (aka PVC).
* This function is available only for devices with the `ext::intel::esimd::info::device::has_2d_block_io_support` information descriptor value equal to `true`.
* `Cache-hint` properties must follow the [rules](#valid-combinations-of-l1-and-l2-cache-hints-for-prefetch-functions) for `prefetch` functions.
* `BlockWidth` * `BlockHeight` * `NBlocks` * sizeof(`T`) must not exceed 2048.
* `NBlocks` must be {1,2,4} for `bytes` and `words`, {1,2} for `dwords`, 1 for `qwords`.
Expand Down Expand Up @@ -630,7 +630,7 @@ Stores the vector `Vals` of the type `simd<T, N>` to 2D memory block where `N` i
`props` - The optional compile-time properties. Only cache hint properties are used.

### Restrictions
* This function is available only for Intel® Data Center GPU Max Series (aka PVC).
* This function is available only for devices with the `ext::intel::esimd::info::device::has_2d_block_io_support` information descriptor value equal to `true`.
* `Cache-hint` properties, if passed, must follow the [rules](#valid-combinations-of-l1-and-l2-cache-hints-for-store-functions) for `store` functions.
* `BlockWidth` * `BlockHeight` * sizeof(`T`) must not exceed 512.
* `BlockHeight` must not exceed 8.
Expand Down
1 change: 1 addition & 0 deletions sycl/include/sycl/info/ext_intel_device_traits.def
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ __SYCL_PARAM_TRAITS_SPEC(ext::intel, device, free_memory, uint64_t, UR_DEVICE_IN
__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, memory_clock_rate, uint32_t, UR_DEVICE_INFO_MEMORY_CLOCK_RATE)
__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, memory_bus_width, uint32_t, UR_DEVICE_INFO_MEMORY_BUS_WIDTH)
__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, max_compute_queue_indices, int32_t, UR_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES)
__SYCL_PARAM_TRAITS_SPEC(ext::intel::esimd, device, has_2d_block_io_support, bool, UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP)
#ifdef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC_NEEDS_UNDEF
#undef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC
#undef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC_NEEDS_UNDEF
Expand Down
19 changes: 19 additions & 0 deletions sycl/source/detail/device_info.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1563,6 +1563,25 @@ get_device_info<ext::intel::info::device::memory_bus_width>(
return get_device_info_impl<Param::return_type, Param>::get(Dev);
}

template <>
inline ext::intel::esimd::info::device::has_2d_block_io_support::return_type
get_device_info<ext::intel::esimd::info::device::has_2d_block_io_support>(
const DeviceImplPtr &Dev) {
if (!Dev->has(aspect::ext_intel_esimd))
return false;

ur_exp_device_2d_block_array_capability_flags_t BlockArrayCapabilities;
Dev->getAdapter()->call<UrApiKind::urDeviceGetInfo>(
Dev->getHandleRef(),
UrInfoCode<
ext::intel::esimd::info::device::has_2d_block_io_support>::value,
sizeof(BlockArrayCapabilities), &BlockArrayCapabilities, nullptr);
return (BlockArrayCapabilities &
UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD) &&
(BlockArrayCapabilities &
UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE);
}

// Returns the list of all progress guarantees that can be requested for
// work_groups from the coordination level of root_group when using the device
// given by Dev. First it calls getProgressGuarantee to get the strongest
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

// Test has_2d_block_io_supported device descriptor for some known
// architectures.

#include <sycl/detail/core.hpp>
#include <sycl/ext/intel/esimd.hpp>

namespace syclex = sycl::ext::oneapi::experimental;

int main() {
sycl::queue Q;
auto Arch = Q.get_device().get_info<syclex::info::device::architecture>();
bool Has2DBlockIOSupport =
Q.get_device()
.get_info<
sycl::ext::intel::esimd::info::device::has_2d_block_io_support>();
if (Arch == syclex::architecture::intel_gpu_pvc) {
if (!Has2DBlockIOSupport) {
std::cerr << "Error: has_2d_block_io_support is expected to be true for "
"PVC architecture"
<< std::endl;
return 1;
}
}
if (Arch == syclex::architecture::intel_gpu_tgllp ||
Arch == syclex::architecture::intel_gpu_dg2_g10 ||
Arch == syclex::architecture::intel_gpu_dg2_g11 ||
Arch == syclex::architecture::intel_gpu_dg2_g12) {
if (Has2DBlockIOSupport) {
std::cerr << "Error: has_2d_block_io_support is expected to be false for "
"Tiger Lake and DG2"
<< std::endl;
return 1;
}
}
return 0;
}
2 changes: 2 additions & 0 deletions sycl/test/abi/sycl_symbols_linux.dump
Original file line number Diff line number Diff line change
Expand Up @@ -3675,6 +3675,7 @@ _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device25gpu_eu_co
_ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device25max_compute_queue_indicesEEENT_11return_typeEv
_ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device4uuidEEENT_11return_typeEv
_ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device9device_idEEENT_11return_typeEv
_ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel5esimd4info6device23has_2d_block_io_supportEEENT_11return_typeEv
_ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext6oneapi12experimental4info6device12architectureEEENT_11return_typeEv
_ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext6oneapi12experimental4info6device15max_work_groupsILi1EEEEENT_11return_typeEv
_ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext6oneapi12experimental4info6device15max_work_groupsILi2EEEEENT_11return_typeEv
Expand Down Expand Up @@ -3780,6 +3781,7 @@ _ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device25gpu_eu_count_per
_ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device25max_compute_queue_indicesEEENS0_6detail11ABINeutralTINS8_19is_device_info_descIT_E11return_typeEE4typeEv
_ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device4uuidEEENS0_6detail11ABINeutralTINS8_19is_device_info_descIT_E11return_typeEE4typeEv
_ZNK4sycl3_V16device13get_info_implINS0_3ext5intel4info6device9device_idEEENS0_6detail11ABINeutralTINS8_19is_device_info_descIT_E11return_typeEE4typeEv
_ZNK4sycl3_V16device13get_info_implINS0_3ext5intel5esimd4info6device23has_2d_block_io_supportEEENS0_6detail11ABINeutralTINS9_19is_device_info_descIT_E11return_typeEE4typeEv
_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device12architectureEEENS0_6detail11ABINeutralTINS9_19is_device_info_descIT_E11return_typeEE4typeEv
_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device15max_work_groupsILi1EEEEENS0_6detail11ABINeutralTINSA_19is_device_info_descIT_E11return_typeEE4typeEv
_ZNK4sycl3_V16device13get_info_implINS0_3ext6oneapi12experimental4info6device15max_work_groupsILi2EEEEENS0_6detail11ABINeutralTINSA_19is_device_info_descIT_E11return_typeEE4typeEv
Expand Down
2 changes: 2 additions & 0 deletions sycl/test/abi/sycl_symbols_windows.dump
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
??$get_info@Ugpu_hw_threads_per_eu@device@info@intel@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBAIXZ
??$get_info@Ugpu_slices@device@info@intel@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBAIXZ
??$get_info@Ugpu_subslices_per_slice@device@info@intel@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBAIXZ
??$get_info@Uhas_2d_block_io_support@device@info@esimd@intel@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA_NXZ
??$get_info@Uimage_row_pitch_align@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBAIXZ
??$get_info@Umatrix_combinations@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AV?$vector@Ucombination@matrix@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Ucombination@matrix@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ
??$get_info@Umax_compute_queue_indices@device@info@intel@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBAHXZ
Expand Down Expand Up @@ -156,6 +157,7 @@
??$get_info_impl@Ugpu_slices@device@info@intel@ext@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ
??$get_info_impl@Ugpu_subslices_per_slice@device@info@intel@ext@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ
??$get_info_impl@Uhalf_fp_config@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4fp_config@info@_V1@sycl@@V?$allocator@W4fp_config@info@_V1@sycl@@@std@@@std@@XZ
??$get_info_impl@Uhas_2d_block_io_support@device@info@esimd@intel@ext@_V1@sycl@@@device@_V1@sycl@@AEBA_NXZ
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this shouldnt be abi breaking right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, adding a new symbol is not a breaking change.

??$get_info_impl@Uhost_unified_memory@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA_NXZ
??$get_info_impl@Uimage2d_max_height@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA_KXZ
??$get_info_impl@Uimage2d_max_width@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA_KXZ
Expand Down
53 changes: 53 additions & 0 deletions sycl/unittests/kernel-and-program/DeviceInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ struct TestCtx {

static std::unique_ptr<TestCtx> TestContext;

ur_exp_device_2d_block_array_capability_flags_t Flags2DBlockIO = 0;
bool HasESIMDSupport = false;

static ur_result_t redefinedDeviceGetInfo(void *pParams) {
auto params = *static_cast<ur_device_get_info_params_t *>(pParams);
if (*params.ppropName == UR_DEVICE_INFO_UUID) {
Expand Down Expand Up @@ -72,6 +75,20 @@ static ur_result_t redefinedDeviceGetInfo(void *pParams) {
}
}

if (*params.ppropName == UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP) {
assert(*params.ppropSize ==
sizeof(ur_exp_device_2d_block_array_capability_flags_t));
if (*params.ppPropValue) {
*static_cast<ur_exp_device_2d_block_array_capability_flags_t *>(
*params.ppPropValue) = Flags2DBlockIO;
}
}

if (*params.ppropName == UR_DEVICE_INFO_ESIMD_SUPPORT) {
assert(*params.ppropSize == sizeof(bool));
if (*params.ppPropValue)
*static_cast<bool *>(*params.ppPropValue) = HasESIMDSupport;
}
return UR_RESULT_SUCCESS;
}

Expand Down Expand Up @@ -185,6 +202,42 @@ TEST_F(DeviceInfoTest, GetDeviceMemoryBusWidth) {
<< "Expect memory_bus_width to be of uint32_t size";
}

TEST_F(DeviceInfoTest, GetDeviceESIMD2DBlockIOSupport) {
context Ctx{Plt.get_devices()[0]};
TestContext.reset(new TestCtx(Ctx));

device Dev = Ctx.get_devices()[0];

HasESIMDSupport = true;
Flags2DBlockIO = UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD |
UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE;
auto HasSupport =
Dev.get_info<ext::intel::esimd::info::device::has_2d_block_io_support>();
EXPECT_TRUE(HasSupport);

Flags2DBlockIO = UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD;
HasSupport =
Dev.get_info<ext::intel::esimd::info::device::has_2d_block_io_support>();
EXPECT_FALSE(HasSupport);

Flags2DBlockIO = UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE;
HasSupport =
Dev.get_info<ext::intel::esimd::info::device::has_2d_block_io_support>();
EXPECT_FALSE(HasSupport);

Flags2DBlockIO = 0;
HasSupport =
Dev.get_info<ext::intel::esimd::info::device::has_2d_block_io_support>();
EXPECT_FALSE(HasSupport);

Flags2DBlockIO = UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD |
UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE;
HasESIMDSupport = false;
HasSupport =
Dev.get_info<ext::intel::esimd::info::device::has_2d_block_io_support>();
EXPECT_FALSE(HasSupport);
}

TEST_F(DeviceInfoTest, BuiltInKernelIDs) {
context Ctx{Plt.get_devices()[0]};
TestContext.reset(new TestCtx(Ctx));
Expand Down
Loading