Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion cmake/FetchLevelZero.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ set(UR_LEVEL_ZERO_LOADER_LIBRARY "" CACHE FILEPATH "Path of the Level Zero Loade
set(UR_LEVEL_ZERO_INCLUDE_DIR "" CACHE FILEPATH "Directory containing the Level Zero Headers")
set(UR_LEVEL_ZERO_LOADER_REPO "" CACHE STRING "Github repo to get the Level Zero loader sources from")
set(UR_LEVEL_ZERO_LOADER_TAG "" CACHE STRING " GIT tag of the Level Loader taken from github repo")
set(UR_COMPUTE_RUNTIME_REPO "" CACHE STRING "Github repo to get the compute runtime sources from")
set(UR_COMPUTE_RUNTIME_TAG "" CACHE STRING " GIT tag of the compute runtime taken from github repo")

# Copy Level Zero loader/headers locally to the build to avoid leaking their path.
set(LEVEL_ZERO_COPY_DIR ${CMAKE_CURRENT_BINARY_DIR}/level_zero_loader)
Expand Down Expand Up @@ -87,8 +89,31 @@ target_link_libraries(LevelZeroLoader
INTERFACE "${LEVEL_ZERO_LIB_NAME}"
)

file(GLOB LEVEL_ZERO_LOADER_API_HEADERS "${LEVEL_ZERO_INCLUDE_DIR}/*.h")
file(COPY ${LEVEL_ZERO_LOADER_API_HEADERS} DESTINATION ${LEVEL_ZERO_INCLUDE_DIR}/level_zero)
add_library(LevelZeroLoader-Headers INTERFACE)
target_include_directories(LevelZeroLoader-Headers
INTERFACE "$<BUILD_INTERFACE:${LEVEL_ZERO_INCLUDE_DIR}>"
INTERFACE "$<BUILD_INTERFACE:${LEVEL_ZERO_INCLUDE_DIR};${LEVEL_ZERO_INCLUDE_DIR}/level_zero>"
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>"
)

if (UR_COMPUTE_RUNTIME_REPO STREQUAL "")
set(UR_COMPUTE_RUNTIME_REPO "https://github.com/intel/compute-runtime.git")
endif()
if (UR_COMPUTE_RUNTIME_TAG STREQUAL "")
set(UR_COMPUTE_RUNTIME_TAG 24.39.31294.12)
endif()
include(FetchContent)
# Sparse fetch only the dir with level zero headers to avoid pulling in the entire compute-runtime.
FetchContentSparse_Declare(compute-runtime-level-zero-headers ${UR_COMPUTE_RUNTIME_REPO} "${UR_COMPUTE_RUNTIME_TAG}" "level_zero/include")
FetchContent_GetProperties(compute-runtime-level-zero-headers)
if(NOT compute-runtime-level-zero-headers_POPULATED)
FetchContent_Populate(compute-runtime-level-zero-headers)
endif()
add_library(ComputeRuntimeLevelZero-Headers INTERFACE)
set(COMPUTE_RUNTIME_LEVEL_ZERO_INCLUDE "${compute-runtime-level-zero-headers_SOURCE_DIR}/../..")
message(STATUS "Level Zero Adapter: Using Level Zero headers from ${COMPUTE_RUNTIME_LEVEL_ZERO_INCLUDE}")
target_include_directories(ComputeRuntimeLevelZero-Headers
INTERFACE "$<BUILD_INTERFACE:${COMPUTE_RUNTIME_LEVEL_ZERO_INCLUDE}>"
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>"
)
25 changes: 24 additions & 1 deletion include/ur_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -1705,6 +1705,8 @@ typedef enum ur_device_info_t {
UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP = 0x2020, ///< [::ur_bool_t] returns true if the device supports enqueueing of native
///< work
UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP = 0x2021, ///< [::ur_bool_t] returns true if the device supports low-power events.
UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP = 0x2022, ///< [::ur_exp_device_2d_block_array_capability_flags_t] return a bit-field
///< of Intel GPU 2D block array capabilities
/// @cond
UR_DEVICE_INFO_FORCE_UINT32 = 0x7fffffff
/// @endcond
Expand All @@ -1730,7 +1732,7 @@ typedef enum ur_device_info_t {
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
/// + `NULL == hDevice`
/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION
/// + `::UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP < propName`
/// + `::UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP < propName`
/// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION
/// + If `propName` is not supported by the adapter.
/// - ::UR_RESULT_ERROR_INVALID_SIZE
Expand Down Expand Up @@ -7428,6 +7430,27 @@ urEnqueueWriteHostPipe(
///< an element of the phEventWaitList array.
);

#if !defined(__GNUC__)
#pragma endregion
#endif
// Intel 'oneAPI' Unified Runtime Experimental device descriptor for querying Intel device 2D block array capabilities
#if !defined(__GNUC__)
#pragma region 2d_block_array_capabilities_(experimental)
#endif
///////////////////////////////////////////////////////////////////////////////
/// @brief Intel GPU 2D block array capabilities
typedef uint32_t ur_exp_device_2d_block_array_capability_flags_t;
typedef enum ur_exp_device_2d_block_array_capability_flag_t {
UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD = UR_BIT(0), ///< Load instructions are supported
UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE = UR_BIT(1), ///< Store instructions are supported
/// @cond
UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_FORCE_UINT32 = 0x7fffffff
/// @endcond

} ur_exp_device_2d_block_array_capability_flag_t;
/// @brief Bit Mask for validating ur_exp_device_2d_block_array_capability_flags_t
#define UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAGS_MASK 0xfffffffc

#if !defined(__GNUC__)
#pragma endregion
#endif
Expand Down
8 changes: 8 additions & 0 deletions include/ur_print.h
Original file line number Diff line number Diff line change
Expand Up @@ -874,6 +874,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintMapFlags(enum ur_map_flag_t value, ch
/// - `buff_size < out_size`
UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmMigrationFlags(enum ur_usm_migration_flag_t value, char *buffer, const size_t buff_size, size_t *out_size);

///////////////////////////////////////////////////////////////////////////////
/// @brief Print ur_exp_device_2d_block_array_capability_flag_t enum
/// @returns
/// - ::UR_RESULT_SUCCESS
/// - ::UR_RESULT_ERROR_INVALID_SIZE
/// - `buff_size < out_size`
UR_APIEXPORT ur_result_t UR_APICALL urPrintExpDevice_2dBlockArrayCapabilityFlags(enum ur_exp_device_2d_block_array_capability_flag_t value, char *buffer, const size_t buff_size, size_t *out_size);

///////////////////////////////////////////////////////////////////////////////
/// @brief Print ur_exp_image_copy_flag_t enum
/// @returns
Expand Down
78 changes: 78 additions & 0 deletions include/ur_print.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,9 @@ inline ur_result_t printFlag<ur_map_flag_t>(std::ostream &os, uint32_t flag);
template <>
inline ur_result_t printFlag<ur_usm_migration_flag_t>(std::ostream &os, uint32_t flag);

template <>
inline ur_result_t printFlag<ur_exp_device_2d_block_array_capability_flag_t>(std::ostream &os, uint32_t flag);

template <>
inline ur_result_t printFlag<ur_exp_image_copy_flag_t>(std::ostream &os, uint32_t flag);

Expand Down Expand Up @@ -328,6 +331,7 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct
inline std::ostream &operator<<(std::ostream &os, enum ur_execution_info_t value);
inline std::ostream &operator<<(std::ostream &os, enum ur_map_flag_t value);
inline std::ostream &operator<<(std::ostream &os, enum ur_usm_migration_flag_t value);
inline std::ostream &operator<<(std::ostream &os, enum ur_exp_device_2d_block_array_capability_flag_t value);
inline std::ostream &operator<<(std::ostream &os, enum ur_exp_image_copy_flag_t value);
inline std::ostream &operator<<(std::ostream &os, enum ur_exp_sampler_cubemap_filter_mode_t value);
inline std::ostream &operator<<(std::ostream &os, enum ur_exp_external_mem_type_t value);
Expand Down Expand Up @@ -2665,6 +2669,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) {
case UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP:
os << "UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP";
break;
case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP:
os << "UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP";
break;
default:
os << "unknown enumerator";
break;
Expand Down Expand Up @@ -4472,6 +4479,19 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_device_info

os << ")";
} break;
case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP: {
const ur_exp_device_2d_block_array_capability_flags_t *tptr = (const ur_exp_device_2d_block_array_capability_flags_t *)ptr;
if (sizeof(ur_exp_device_2d_block_array_capability_flags_t) > size) {
os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_exp_device_2d_block_array_capability_flags_t) << ")";
return UR_RESULT_ERROR_INVALID_SIZE;
}
os << (const void *)(tptr) << " (";

ur::details::printFlag<ur_exp_device_2d_block_array_capability_flag_t>(os,
*tptr);

os << ")";
} break;
default:
os << "unknown enumerator";
return UR_RESULT_ERROR_INVALID_ENUMERATION;
Expand Down Expand Up @@ -9455,6 +9475,64 @@ inline ur_result_t printFlag<ur_usm_migration_flag_t>(std::ostream &os, uint32_t
}
} // namespace ur::details
///////////////////////////////////////////////////////////////////////////////
/// @brief Print operator for the ur_exp_device_2d_block_array_capability_flag_t type
/// @returns
/// std::ostream &
inline std::ostream &operator<<(std::ostream &os, enum ur_exp_device_2d_block_array_capability_flag_t value) {
switch (value) {
case UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD:
os << "UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD";
break;
case UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE:
os << "UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE";
break;
default:
os << "unknown enumerator";
break;
}
return os;
}

namespace ur::details {
///////////////////////////////////////////////////////////////////////////////
/// @brief Print ur_exp_device_2d_block_array_capability_flag_t flag
template <>
inline ur_result_t printFlag<ur_exp_device_2d_block_array_capability_flag_t>(std::ostream &os, uint32_t flag) {
uint32_t val = flag;
bool first = true;

if ((val & UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD) == (uint32_t)UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD) {
val ^= (uint32_t)UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD;
if (!first) {
os << " | ";
} else {
first = false;
}
os << UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD;
}

if ((val & UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE) == (uint32_t)UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE) {
val ^= (uint32_t)UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE;
if (!first) {
os << " | ";
} else {
first = false;
}
os << UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE;
}
if (val != 0) {
std::bitset<32> bits(val);
if (!first) {
os << " | ";
}
os << "unknown bit flags " << bits;
} else if (first) {
os << "0";
}
return UR_RESULT_SUCCESS;
}
} // namespace ur::details
///////////////////////////////////////////////////////////////////////////////
/// @brief Print operator for the ur_exp_image_copy_flag_t type
/// @returns
/// std::ostream &
Expand Down
62 changes: 62 additions & 0 deletions scripts/core/EXP-2D-BLOCK-ARRAY-CAPABILITIES.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
<%
OneApi=tags['$OneApi']
x=tags['$x']
X=x.upper()
%>

.. _experimental-2D-block-array-capabilities:

================================================================================
2D Block Array Capabilities
================================================================================

.. warning::

Experimental features:

* May be replaced, updated, or removed at any time.
* Do not require maintaining API/ABI stability of their own additions over
time.
* Do not require conformance testing of their own additions.


Motivation
--------------------------------------------------------------------------------
Some Intel GPU devices support 2D block array operations which may be used to optimize applications on Intel GPUs.
This extension provides a device descriptor which allows to query the 2D block array capabilities of a device.

API
--------------------------------------------------------------------------------

Enums
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

* ${x}_device_info_t
* ${X}_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP

* ${x}_exp_device_2d_block_array_capability_flags_t
* ${X}_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD
* ${X}_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE

Changelog
--------------------------------------------------------------------------------

+-----------+------------------------+
| Revision | Changes |
+===========+========================+
| 1.0 | Initial Draft |
+-----------+------------------------+


Support
--------------------------------------------------------------------------------

Adapters which support this experimental feature *must* return ${X}_RESULT_SUCCESS from
the ${x}DeviceGetInfo call with the new ${X}_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP
device descriptor.


Contributors
--------------------------------------------------------------------------------

* Artur Gainullin `[email protected] <[email protected]>`_
36 changes: 36 additions & 0 deletions scripts/core/exp-2d-block-array-capabilities.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#
# Copyright (C) 2024 Intel Corporation
#
# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
# See LICENSE.TXT
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# See YaML.md for syntax definition
#
--- #--------------------------------------------------------------------------
type: header
desc: "Intel $OneApi Unified Runtime Experimental device descriptor for querying Intel device 2D block array capabilities"
ordinal: "99"
--- #--------------------------------------------------------------------------
type: enum
extend: true
typed_etors: true
desc: "Extension enum to $x_device_info_t to query Intel device 2D block array capabilities."
name: $x_device_info_t
etors:
- name: 2D_BLOCK_ARRAY_CAPABILITIES_EXP
value: "0x2022"
desc: "[$x_exp_device_2d_block_array_capability_flags_t] return a bit-field of Intel GPU 2D block array capabilities"
--- #--------------------------------------------------------------------------
type: enum
desc: "Intel GPU 2D block array capabilities"
class: $xDevice
name: $x_exp_device_2d_block_array_capability_flags_t
etors:
- name: LOAD
desc: "Load instructions are supported"
value: "$X_BIT(0)"
- name: STORE
desc: "Store instructions are supported"
value: "$X_BIT(1)"

4 changes: 3 additions & 1 deletion source/adapters/cuda/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1088,7 +1088,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
case UR_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE:
case UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU:
return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;

case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP:
return ReturnValue(
static_cast<ur_exp_device_2d_block_array_capability_flags_t>(0));
case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP:
case UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP:
return ReturnValue(true);
Expand Down
3 changes: 3 additions & 0 deletions source/adapters/hip/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -905,6 +905,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
case UR_DEVICE_INFO_IL_VERSION:
case UR_DEVICE_INFO_ASYNC_BARRIER:
return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP:
return ReturnValue(
static_cast<ur_exp_device_2d_block_array_capability_flags_t>(0));
case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: {
int DriverVersion = 0;
UR_CHECK_ERROR(hipDriverGetVersion(&DriverVersion));
Expand Down
4 changes: 3 additions & 1 deletion source/adapters/level_zero/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ if(UR_BUILD_ADAPTER_L0)
# 'utils' target from 'level-zero-loader' includes path which is prefixed
# in the source directory, this breaks the installation of 'utils' target.
set_target_properties(utils PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "")
install(TARGETS ur_umf LevelZeroLoader LevelZeroLoader-Headers ze_loader utils
install(TARGETS ur_umf LevelZeroLoader LevelZeroLoader-Headers ComputeRuntimeLevelZero-Headers ze_loader utils
EXPORT ${PROJECT_NAME}-targets
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
Expand Down Expand Up @@ -109,6 +109,7 @@ if(UR_BUILD_ADAPTER_L0)
${PROJECT_NAME}::umf
LevelZeroLoader
LevelZeroLoader-Headers
ComputeRuntimeLevelZero-Headers
)

target_include_directories(ur_adapter_level_zero PRIVATE
Expand Down Expand Up @@ -203,6 +204,7 @@ if(UR_BUILD_ADAPTER_L0_V2)
${PROJECT_NAME}::umf
LevelZeroLoader
LevelZeroLoader-Headers
ComputeRuntimeLevelZero-Headers
)

target_include_directories(ur_adapter_level_zero_v2 PRIVATE
Expand Down
9 changes: 9 additions & 0 deletions source/adapters/level_zero/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "common.hpp"
#include "logger/ur_logger.hpp"
#include "usm.hpp"
#include <level_zero/include/ze_intel_gpu.h>

ur_result_t ze2urResult(ze_result_t ZeResult) {
if (ZeResult == ZE_RESULT_SUCCESS)
Expand Down Expand Up @@ -330,6 +331,14 @@ template <> zes_structure_type_t getZesStructureType<zes_mem_properties_t>() {
return ZES_STRUCTURE_TYPE_MEM_PROPERTIES;
}

#ifdef ZE_INTEL_DEVICE_BLOCK_ARRAY_EXP_NAME
template <>
ze_structure_type_t
getZeStructureType<ze_intel_device_block_array_exp_properties_t>() {
return ZE_INTEL_DEVICE_BLOCK_ARRAY_EXP_PROPERTIES;
}
#endif // ZE_INTEL_DEVICE_BLOCK_ARRAY_EXP_NAME

// Global variables for ZER_EXT_RESULT_ADAPTER_SPECIFIC_ERROR
thread_local ur_result_t ErrorMessageCode = UR_RESULT_SUCCESS;
thread_local char ErrorMessage[MaxMessageSize];
Expand Down
Loading
Loading