From 1433f042f17d2a3f1986bf7b677cae3e0bfba5a0 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Tue, 2 Jul 2024 18:02:46 +0100 Subject: [PATCH 01/12] Add initial spec for tensor map APIs --- include/ur_api.h | 242 ++++++ include/ur_ddi.h | 66 ++ include/ur_print.h | 56 ++ include/ur_print.hpp | 717 ++++++++++++++++++ scripts/core/EXP-TENSOR-MAP.rst | 69 ++ scripts/core/exp-tensor-map.yml | 207 +++++ scripts/core/registry.yml | 6 + source/adapters/adapter.def.in | 1 + source/adapters/adapter.map.in | 1 + source/adapters/mock/ur_mockddi.cpp | 198 +++++ source/loader/layers/tracing/ur_trcddi.cpp | 194 +++++ source/loader/layers/validation/ur_valddi.cpp | 259 +++++++ source/loader/loader.def.in | 10 + source/loader/loader.map.in | 10 + source/loader/ur_ldrddi.cpp | 200 +++++ source/loader/ur_ldrddi.hpp | 5 + source/loader/ur_libapi.cpp | 148 ++++ source/loader/ur_libddi.cpp | 5 + source/loader/ur_print.cpp | 59 ++ source/ur_api.cpp | 127 ++++ 20 files changed, 2580 insertions(+) create mode 100644 scripts/core/EXP-TENSOR-MAP.rst create mode 100644 scripts/core/exp-tensor-map.yml diff --git a/include/ur_api.h b/include/ur_api.h index eb8b07221c..13334a9c8e 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -231,6 +231,8 @@ typedef enum ur_function_t { UR_FUNCTION_COMMAND_BUFFER_UPDATE_WAIT_EVENTS_EXP = 244, ///< Enumerator for ::urCommandBufferUpdateWaitEventsExp UR_FUNCTION_BINDLESS_IMAGES_MAP_EXTERNAL_LINEAR_MEMORY_EXP = 245, ///< Enumerator for ::urBindlessImagesMapExternalLinearMemoryExp UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT = 246, ///< Enumerator for ::urEnqueueEventsWaitWithBarrierExt + UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP = 230, ///< Enumerator for ::urTensorMapEncodeIm2ColExp + UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP = 231, ///< Enumerator for ::urTensorMapEncodeTiledExp /// @cond UR_FUNCTION_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -10161,6 +10163,203 @@ urEnqueueNativeCommandExp( ///< not NULL, phEvent must not refer to an element of the phEventWaitList array. ); +#if !defined(__GNUC__) +#pragma endregion +#endif +// Intel 'oneAPI' Unified Runtime Experimental API for enqueuing work through native APIs +#if !defined(__GNUC__) +#pragma region tensor map(experimental) +#endif +/////////////////////////////////////////////////////////////////////////////// +/// @brief Handle of tensor map object +typedef struct ur_exp_tensor_map_handle_t_ *ur_exp_tensor_map_handle_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Tensor map data type +typedef uint32_t ur_exp_tensor_map_data_type_flags_t; +typedef enum ur_exp_tensor_map_data_type_flag_t { + UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT8 = UR_BIT(0), ///< 1 byte + UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT16 = UR_BIT(1), ///< 2 bytes + UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT32 = UR_BIT(2), ///< 4 bytes + UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT32 = UR_BIT(3), ///< 4 bytes + UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT64 = UR_BIT(4), ///< 8 bytes + UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT64 = UR_BIT(5), ///< 8 bytes + UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT16 = UR_BIT(6), ///< 2 bytes + UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32 = UR_BIT(7), ///< 4 bytes + UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT64 = UR_BIT(8), ///< 8 bytes + UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_BFLOAT16 = UR_BIT(9), ///< 2 bytes + UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32_FTZ = UR_BIT(10), ///< 4 bytes + UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32 = UR_BIT(11), ///< 4 bytes + UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32_FTZ = UR_BIT(12), ///< 4 bytes + /// @cond + UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FORCE_UINT32 = 0x7fffffff + /// @endcond + +} ur_exp_tensor_map_data_type_flag_t; +/// @brief Bit Mask for validating ur_exp_tensor_map_data_type_flags_t +#define UR_EXP_TENSOR_MAP_DATA_TYPE_FLAGS_MASK 0xffffe000 + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Tensor map interleave +typedef uint32_t ur_exp_tensor_map_interleave_flags_t; +typedef enum ur_exp_tensor_map_interleave_flag_t { + UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_NONE = UR_BIT(0), ///< No interleave + UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_16B = UR_BIT(1), ///< 16B interleave + UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_32B = UR_BIT(2), ///< 32B interleave + /// @cond + UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_FORCE_UINT32 = 0x7fffffff + /// @endcond + +} ur_exp_tensor_map_interleave_flag_t; +/// @brief Bit Mask for validating ur_exp_tensor_map_interleave_flags_t +#define UR_EXP_TENSOR_MAP_INTERLEAVE_FLAGS_MASK 0xfffffff8 + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Tensor map l2 promotion +typedef uint32_t ur_exp_tensor_map_l2_promotion_flags_t; +typedef enum ur_exp_tensor_map_l2_promotion_flag_t { + UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_NONE = UR_BIT(0), ///< No promotion type + UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_64B = UR_BIT(1), ///< 64B promotion type + UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_128B = UR_BIT(2), ///< 128B promotion type + UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_256B = UR_BIT(3), ///< 256B promotion type + /// @cond + UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_FORCE_UINT32 = 0x7fffffff + /// @endcond + +} ur_exp_tensor_map_l2_promotion_flag_t; +/// @brief Bit Mask for validating ur_exp_tensor_map_l2_promotion_flags_t +#define UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAGS_MASK 0xfffffff0 + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Tensor map swizzle +typedef uint32_t ur_exp_tensor_map_swizzle_flags_t; +typedef enum ur_exp_tensor_map_swizzle_flag_t { + UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_NONE = UR_BIT(0), ///< No swizzle + UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_32B = UR_BIT(1), ///< 32B swizzle + UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_64B = UR_BIT(2), ///< 64B swizzle + UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_128B = UR_BIT(3), ///< 128B swizzle + /// @cond + UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_FORCE_UINT32 = 0x7fffffff + /// @endcond + +} ur_exp_tensor_map_swizzle_flag_t; +/// @brief Bit Mask for validating ur_exp_tensor_map_swizzle_flags_t +#define UR_EXP_TENSOR_MAP_SWIZZLE_FLAGS_MASK 0xfffffff0 + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Tensor map OOB fill +typedef uint32_t ur_exp_tensor_map_oob_fill_flags_t; +typedef enum ur_exp_tensor_map_oob_fill_flag_t { + UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_NONE = UR_BIT(0), ///< No OOB fill + UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_REQUEST_ZERO_FMA = UR_BIT(1), ///< Refer to NVIDIA docs + /// @cond + UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_FORCE_UINT32 = 0x7fffffff + /// @endcond + +} ur_exp_tensor_map_oob_fill_flag_t; +/// @brief Bit Mask for validating ur_exp_tensor_map_oob_fill_flags_t +#define UR_EXP_TENSOR_MAP_OOB_FILL_FLAGS_MASK 0xfffffffc + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Encode tensor map with image data +/// +/// @details +/// - Map encode using im2col. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_EXP_TENSOR_MAP_DATA_TYPE_FLAGS_MASK & TensorMapType` +/// + `::UR_EXP_TENSOR_MAP_INTERLEAVE_FLAGS_MASK & Interleave` +/// + `::UR_EXP_TENSOR_MAP_SWIZZLE_FLAGS_MASK & Swizzle` +/// + `::UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAGS_MASK & L2Promotion` +/// + `::UR_EXP_TENSOR_MAP_OOB_FILL_FLAGS_MASK & OobFill` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == GlobalAddress` +/// + `NULL == GlobalDim` +/// + `NULL == GlobalStrides` +/// + `NULL == PixelBoxLowerCorner` +/// + `NULL == PixelBoxUpperCorner` +/// + `NULL == ElementStrides` +/// + `NULL == hTensorMap` +UR_APIEXPORT ur_result_t UR_APICALL +urTensorMapEncodeIm2ColExp( + ur_device_handle_t hDevice, ///< [in] Handle of the device object. + ur_exp_tensor_map_data_type_flags_t TensorMapType, ///< [in] Data type of the tensor object. + uint32_t TensorRank, ///< [in] Dimensionality of tensor; must be at least 3. + void *GlobalAddress, ///< [in] Starting address of memory region described by tensor. + const uint64_t *GlobalDim, ///< [in] Array containing tensor size (number of elements) along each of + ///< the TensorRank dimensions. + const uint64_t *GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the + ///< tensorRank - 1 dimensions. + const int *PixelBoxLowerCorner, ///< [in] Array containing DHW dimensions of lower box corner. + const int *PixelBoxUpperCorner, ///< [in] Array containing DHW dimensions of upper box corner. + uint32_t ChannelsPerPixel, ///< [in] Number of channels per pixel. + uint32_t PixelsPerColumn, ///< [in] Number of pixels per column. + const uint32_t *ElementStrides, ///< [in] Array containing traversal stride in each of the tensorRank + ///< dimensions. + ur_exp_tensor_map_interleave_flags_t Interleave, ///< [in] Type of interleaved layout the tensor addresses + ur_exp_tensor_map_swizzle_flags_t Swizzle, ///< [in] Bank swizzling pattern inside shared memory + ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, ///< [in] L2 promotion size. + ur_exp_tensor_map_oob_fill_flags_t OobFill, ///< [in] Indicate whether zero or special NaN constant will be used to + ///< fill out-of-bound elements. + ur_exp_tensor_map_handle_t *hTensorMap ///< [out] Handle of the tensor map object. +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Encode tensor map with tiled data +/// +/// @details +/// - Tiled map encode. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_EXP_TENSOR_MAP_DATA_TYPE_FLAGS_MASK & TensorMapType` +/// + `::UR_EXP_TENSOR_MAP_INTERLEAVE_FLAGS_MASK & Interleave` +/// + `::UR_EXP_TENSOR_MAP_SWIZZLE_FLAGS_MASK & Swizzle` +/// + `::UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAGS_MASK & L2Promotion` +/// + `::UR_EXP_TENSOR_MAP_OOB_FILL_FLAGS_MASK & OobFill` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == GlobalAddress` +/// + `NULL == GlobalDim` +/// + `NULL == GlobalStrides` +/// + `NULL == BoxDim` +/// + `NULL == ElementStrides` +/// + `NULL == hTensorMap` +UR_APIEXPORT ur_result_t UR_APICALL +urTensorMapEncodeTiledExp( + ur_device_handle_t hDevice, ///< [in] Handle of the device object. + ur_exp_tensor_map_data_type_flags_t TensorMapType, ///< [in] Data type of the tensor object. + uint32_t TensorRank, ///< [in] Dimensionality of tensor; must be at least 3. + void *GlobalAddress, ///< [in] Starting address of memory region described by tensor. + const uint64_t *GlobalDim, ///< [in] Array containing tensor size (number of elements) along each of + ///< the TensorRank dimensions. + const uint64_t *GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the + ///< tensorRank - 1 dimensions. + const uint32_t *BoxDim, ///< [in] Array containing traversal box size (number of elments) along + ///< each of the tensorRank dimensions. Specifies how many elements to be + ///< traversed along each tensor dimension. + const uint32_t *ElementStrides, ///< [in] Array containing traversal stride in each of the tensorRank + ///< dimensions. + ur_exp_tensor_map_interleave_flags_t Interleave, ///< [in] Type of interleaved layout the tensor addresses + ur_exp_tensor_map_swizzle_flags_t Swizzle, ///< [in] Bank swizzling pattern inside shared memory + ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, ///< [in] L2 promotion size. + ur_exp_tensor_map_oob_fill_flags_t OobFill, ///< [in] Indicate whether zero or special NaN constant will be used to + ///< fill out-of-bound elements. + ur_exp_tensor_map_handle_t *hTensorMap ///< [out] Handle of the tensor map object. +); + #if !defined(__GNUC__) #pragma endregion #endif @@ -12333,6 +12532,49 @@ typedef struct ur_command_buffer_command_get_info_exp_params_t { size_t **ppPropSizeRet; } ur_command_buffer_command_get_info_exp_params_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urTensorMapEncodeIm2ColExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_tensor_map_encode_im_2_col_exp_params_t { + ur_device_handle_t *phDevice; + ur_exp_tensor_map_data_type_flags_t *pTensorMapType; + uint32_t *pTensorRank; + void **pGlobalAddress; + const uint64_t **pGlobalDim; + const uint64_t **pGlobalStrides; + const int **pPixelBoxLowerCorner; + const int **pPixelBoxUpperCorner; + uint32_t *pChannelsPerPixel; + uint32_t *pPixelsPerColumn; + const uint32_t **pElementStrides; + ur_exp_tensor_map_interleave_flags_t *pInterleave; + ur_exp_tensor_map_swizzle_flags_t *pSwizzle; + ur_exp_tensor_map_l2_promotion_flags_t *pL2Promotion; + ur_exp_tensor_map_oob_fill_flags_t *pOobFill; + ur_exp_tensor_map_handle_t **phTensorMap; +} ur_tensor_map_encode_im_2_col_exp_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urTensorMapEncodeTiledExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_tensor_map_encode_tiled_exp_params_t { + ur_device_handle_t *phDevice; + ur_exp_tensor_map_data_type_flags_t *pTensorMapType; + uint32_t *pTensorRank; + void **pGlobalAddress; + const uint64_t **pGlobalDim; + const uint64_t **pGlobalStrides; + const uint32_t **pBoxDim; + const uint32_t **pElementStrides; + ur_exp_tensor_map_interleave_flags_t *pInterleave; + ur_exp_tensor_map_swizzle_flags_t *pSwizzle; + ur_exp_tensor_map_l2_promotion_flags_t *pL2Promotion; + ur_exp_tensor_map_oob_fill_flags_t *pOobFill; + ur_exp_tensor_map_handle_t **phTensorMap; +} ur_tensor_map_encode_tiled_exp_params_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urUsmP2PEnablePeerAccessExp /// @details Each entry is a pointer to the parameter passed to the function; diff --git a/include/ur_ddi.h b/include/ur_ddi.h index 40a6c5c269..695c1885b0 100644 --- a/include/ur_ddi.h +++ b/include/ur_ddi.h @@ -2248,6 +2248,71 @@ typedef ur_result_t(UR_APICALL *ur_pfnGetCommandBufferExpProcAddrTable_t)( ur_api_version_t, ur_command_buffer_exp_dditable_t *); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urTensorMapEncodeIm2ColExp +typedef ur_result_t(UR_APICALL *ur_pfnTensorMapEncodeIm2ColExp_t)( + ur_device_handle_t, + ur_exp_tensor_map_data_type_flags_t, + uint32_t, + void *, + const uint64_t *, + const uint64_t *, + const int *, + const int *, + uint32_t, + uint32_t, + const uint32_t *, + ur_exp_tensor_map_interleave_flags_t, + ur_exp_tensor_map_swizzle_flags_t, + ur_exp_tensor_map_l2_promotion_flags_t, + ur_exp_tensor_map_oob_fill_flags_t, + ur_exp_tensor_map_handle_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urTensorMapEncodeTiledExp +typedef ur_result_t(UR_APICALL *ur_pfnTensorMapEncodeTiledExp_t)( + ur_device_handle_t, + ur_exp_tensor_map_data_type_flags_t, + uint32_t, + void *, + const uint64_t *, + const uint64_t *, + const uint32_t *, + const uint32_t *, + ur_exp_tensor_map_interleave_flags_t, + ur_exp_tensor_map_swizzle_flags_t, + ur_exp_tensor_map_l2_promotion_flags_t, + ur_exp_tensor_map_oob_fill_flags_t, + ur_exp_tensor_map_handle_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Table of TensorMapExp functions pointers +typedef struct ur_tensor_map_exp_dditable_t { + ur_pfnTensorMapEncodeIm2ColExp_t pfnEncodeIm2ColExp; + ur_pfnTensorMapEncodeTiledExp_t pfnEncodeTiledExp; +} ur_tensor_map_exp_dditable_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's TensorMapExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL +urGetTensorMapExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_tensor_map_exp_dditable_t *pDdiTable ///< [in,out] pointer to table of DDI function pointers +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urGetTensorMapExpProcAddrTable +typedef ur_result_t(UR_APICALL *ur_pfnGetTensorMapExpProcAddrTable_t)( + ur_api_version_t, + ur_tensor_map_exp_dditable_t *); + /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urUsmP2PEnablePeerAccessExp typedef ur_result_t(UR_APICALL *ur_pfnUsmP2PEnablePeerAccessExp_t)( @@ -2515,6 +2580,7 @@ typedef struct ur_dditable_t { ur_usm_dditable_t USM; ur_usm_exp_dditable_t USMExp; ur_command_buffer_exp_dditable_t CommandBufferExp; + ur_tensor_map_exp_dditable_t TensorMapExp; ur_usm_p2p_exp_dditable_t UsmP2PExp; ur_virtual_mem_dditable_t VirtualMem; ur_device_dditable_t Device; diff --git a/include/ur_print.h b/include/ur_print.h index c2adb18067..3782ffb5ce 100644 --- a/include/ur_print.h +++ b/include/ur_print.h @@ -1098,6 +1098,46 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintExpEnqueueNativeCommandFlags(enum ur_ /// - `buff_size < out_size` UR_APIEXPORT ur_result_t UR_APICALL urPrintExpEnqueueNativeCommandProperties(const struct ur_exp_enqueue_native_command_properties_t params, char *buffer, const size_t buff_size, size_t *out_size); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_tensor_map_data_type_flag_t enum +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintExpTensorMapDataTypeFlags(enum ur_exp_tensor_map_data_type_flag_t value, char *buffer, const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_tensor_map_interleave_flag_t enum +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintExpTensorMapInterleaveFlags(enum ur_exp_tensor_map_interleave_flag_t value, char *buffer, const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_tensor_map_l2_promotion_flag_t enum +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintExpTensorMapL2PromotionFlags(enum ur_exp_tensor_map_l2_promotion_flag_t value, char *buffer, const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_tensor_map_swizzle_flag_t enum +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintExpTensorMapSwizzleFlags(enum ur_exp_tensor_map_swizzle_flag_t value, char *buffer, const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_tensor_map_oob_fill_flag_t enum +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintExpTensorMapOobFillFlags(enum ur_exp_tensor_map_oob_fill_flag_t value, char *buffer, const size_t buff_size, size_t *out_size); + /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_loader_config_create_params_t struct /// @returns @@ -2522,6 +2562,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintCommandBufferGetInfoExpParams(const s /// - `buff_size < out_size` UR_APIEXPORT ur_result_t UR_APICALL urPrintCommandBufferCommandGetInfoExpParams(const struct ur_command_buffer_command_get_info_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_tensor_map_encode_im_2_col_exp_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintTensorMapEncodeIm_2ColExpParams(const struct ur_tensor_map_encode_im_2_col_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_tensor_map_encode_tiled_exp_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintTensorMapEncodeTiledExpParams(const struct ur_tensor_map_encode_tiled_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); + /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_usm_p2p_enable_peer_access_exp_params_t struct /// @returns diff --git a/include/ur_print.hpp b/include/ur_print.hpp index 8888a74f91..dafe882726 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -56,6 +56,8 @@ template <> struct is_handle : std::true_type {}; template <> struct is_handle : std::true_type {}; +template <> +struct is_handle : std::true_type {}; template inline constexpr bool is_handle_v = is_handle::value; template @@ -222,6 +224,21 @@ inline ur_result_t printFlag(std::ostream &os, uint32 template <> inline ur_result_t printFlag(std::ostream &os, uint32_t flag); +template <> +inline ur_result_t printFlag(std::ostream &os, uint32_t flag); + +template <> +inline ur_result_t printFlag(std::ostream &os, uint32_t flag); + +template <> +inline ur_result_t printFlag(std::ostream &os, uint32_t flag); + +template <> +inline ur_result_t printFlag(std::ostream &os, uint32_t flag); + +template <> +inline ur_result_t printFlag(std::ostream &os, uint32_t flag); + } // namespace ur::details inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value); @@ -359,6 +376,11 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_exp_enqueue_ext_flag_t inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_enqueue_ext_properties_t params); inline std::ostream &operator<<(std::ostream &os, enum ur_exp_enqueue_native_command_flag_t value); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_enqueue_native_command_properties_t params); +inline std::ostream &operator<<(std::ostream &os, enum ur_exp_tensor_map_data_type_flag_t value); +inline std::ostream &operator<<(std::ostream &os, enum ur_exp_tensor_map_interleave_flag_t value); +inline std::ostream &operator<<(std::ostream &os, enum ur_exp_tensor_map_l2_promotion_flag_t value); +inline std::ostream &operator<<(std::ostream &os, enum ur_exp_tensor_map_swizzle_flag_t value); +inline std::ostream &operator<<(std::ostream &os, enum ur_exp_tensor_map_oob_fill_flag_t value); /////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_function_t type @@ -965,6 +987,11 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value) { break; case UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT: os << "UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT"; + case UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP: + os << "UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP"; + break; + case UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP: + os << "UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP"; break; default: os << "unknown enumerator"; @@ -10662,6 +10689,504 @@ inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_enqueue_na os << "}"; return os; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_tensor_map_data_type_flag_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, enum ur_exp_tensor_map_data_type_flag_t value) { + switch (value) { + case UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT8: + os << "UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT8"; + break; + case UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT16: + os << "UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT16"; + break; + case UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT32: + os << "UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT32"; + break; + case UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT32: + os << "UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT32"; + break; + case UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT64: + os << "UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT64"; + break; + case UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT64: + os << "UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT64"; + break; + case UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT16: + os << "UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT16"; + break; + case UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32: + os << "UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32"; + break; + case UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT64: + os << "UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT64"; + break; + case UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_BFLOAT16: + os << "UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_BFLOAT16"; + break; + case UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32_FTZ: + os << "UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32_FTZ"; + break; + case UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32: + os << "UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32"; + break; + case UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32_FTZ: + os << "UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32_FTZ"; + break; + default: + os << "unknown enumerator"; + break; + } + return os; +} + +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_tensor_map_data_type_flag_t flag +template <> +inline ur_result_t printFlag(std::ostream &os, uint32_t flag) { + uint32_t val = flag; + bool first = true; + + if ((val & UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT8) == (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT8) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT8; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT8; + } + + if ((val & UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT16) == (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT16) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT16; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT16; + } + + if ((val & UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT32) == (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT32) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT32; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT32; + } + + if ((val & UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT32) == (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT32) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT32; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT32; + } + + if ((val & UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT64) == (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT64) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT64; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT64; + } + + if ((val & UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT64) == (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT64) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT64; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT64; + } + + if ((val & UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT16) == (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT16) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT16; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT16; + } + + if ((val & UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32) == (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32; + } + + if ((val & UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT64) == (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT64) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT64; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT64; + } + + if ((val & UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_BFLOAT16) == (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_BFLOAT16) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_BFLOAT16; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_BFLOAT16; + } + + if ((val & UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32_FTZ) == (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32_FTZ) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32_FTZ; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32_FTZ; + } + + if ((val & UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32) == (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32; + } + + if ((val & UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32_FTZ) == (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32_FTZ) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32_FTZ; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32_FTZ; + } + if (val != 0) { + std::bitset<32> bits(val); + if (!first) { + os << " | "; + } + os << "unknown bit flags " << bits; + } else if (first) { + os << "0"; + } + return UR_RESULT_SUCCESS; +} +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_tensor_map_interleave_flag_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, enum ur_exp_tensor_map_interleave_flag_t value) { + switch (value) { + case UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_NONE: + os << "UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_NONE"; + break; + case UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_16B: + os << "UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_16B"; + break; + case UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_32B: + os << "UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_32B"; + break; + default: + os << "unknown enumerator"; + break; + } + return os; +} + +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_tensor_map_interleave_flag_t flag +template <> +inline ur_result_t printFlag(std::ostream &os, uint32_t flag) { + uint32_t val = flag; + bool first = true; + + if ((val & UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_NONE) == (uint32_t)UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_NONE) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_NONE; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_NONE; + } + + if ((val & UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_16B) == (uint32_t)UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_16B) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_16B; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_16B; + } + + if ((val & UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_32B) == (uint32_t)UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_32B) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_32B; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_32B; + } + if (val != 0) { + std::bitset<32> bits(val); + if (!first) { + os << " | "; + } + os << "unknown bit flags " << bits; + } else if (first) { + os << "0"; + } + return UR_RESULT_SUCCESS; +} +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_tensor_map_l2_promotion_flag_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, enum ur_exp_tensor_map_l2_promotion_flag_t value) { + switch (value) { + case UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_NONE: + os << "UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_NONE"; + break; + case UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_64B: + os << "UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_64B"; + break; + case UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_128B: + os << "UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_128B"; + break; + case UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_256B: + os << "UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_256B"; + break; + default: + os << "unknown enumerator"; + break; + } + return os; +} + +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_tensor_map_l2_promotion_flag_t flag +template <> +inline ur_result_t printFlag(std::ostream &os, uint32_t flag) { + uint32_t val = flag; + bool first = true; + + if ((val & UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_NONE) == (uint32_t)UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_NONE) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_NONE; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_NONE; + } + + if ((val & UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_64B) == (uint32_t)UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_64B) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_64B; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_64B; + } + + if ((val & UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_128B) == (uint32_t)UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_128B) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_128B; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_128B; + } + + if ((val & UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_256B) == (uint32_t)UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_256B) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_256B; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_256B; + } + if (val != 0) { + std::bitset<32> bits(val); + if (!first) { + os << " | "; + } + os << "unknown bit flags " << bits; + } else if (first) { + os << "0"; + } + return UR_RESULT_SUCCESS; +} +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_tensor_map_swizzle_flag_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, enum ur_exp_tensor_map_swizzle_flag_t value) { + switch (value) { + case UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_NONE: + os << "UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_NONE"; + break; + case UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_32B: + os << "UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_32B"; + break; + case UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_64B: + os << "UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_64B"; + break; + case UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_128B: + os << "UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_128B"; + break; + default: + os << "unknown enumerator"; + break; + } + return os; +} + +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_tensor_map_swizzle_flag_t flag +template <> +inline ur_result_t printFlag(std::ostream &os, uint32_t flag) { + uint32_t val = flag; + bool first = true; + + if ((val & UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_NONE) == (uint32_t)UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_NONE) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_NONE; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_NONE; + } + + if ((val & UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_32B) == (uint32_t)UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_32B) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_32B; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_32B; + } + + if ((val & UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_64B) == (uint32_t)UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_64B) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_64B; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_64B; + } + + if ((val & UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_128B) == (uint32_t)UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_128B) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_128B; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_128B; + } + if (val != 0) { + std::bitset<32> bits(val); + if (!first) { + os << " | "; + } + os << "unknown bit flags " << bits; + } else if (first) { + os << "0"; + } + return UR_RESULT_SUCCESS; +} +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_tensor_map_oob_fill_flag_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, enum ur_exp_tensor_map_oob_fill_flag_t value) { + switch (value) { + case UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_NONE: + os << "UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_NONE"; + break; + case UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_REQUEST_ZERO_FMA: + os << "UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_REQUEST_ZERO_FMA"; + break; + default: + os << "unknown enumerator"; + break; + } + return os; +} + +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_tensor_map_oob_fill_flag_t flag +template <> +inline ur_result_t printFlag(std::ostream &os, uint32_t flag) { + uint32_t val = flag; + bool first = true; + + if ((val & UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_NONE) == (uint32_t)UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_NONE) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_NONE; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_NONE; + } + + if ((val & UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_REQUEST_ZERO_FMA) == (uint32_t)UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_REQUEST_ZERO_FMA) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_REQUEST_ZERO_FMA; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_REQUEST_ZERO_FMA; + } + if (val != 0) { + std::bitset<32> bits(val); + if (!first) { + os << " | "; + } + os << "unknown bit flags " << bits; + } else if (first) { + os << "0"; + } + return UR_RESULT_SUCCESS; +} +} // namespace ur::details /////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_loader_config_create_params_t type @@ -17932,6 +18457,192 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct return os; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_tensor_map_encode_im_2_col_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_tensor_map_encode_im_2_col_exp_params_t *params) { + + os << ".hDevice = "; + + ur::details::printPtr(os, + *(params->phDevice)); + + os << ", "; + os << ".TensorMapType = "; + + ur::details::printFlag(os, + *(params->pTensorMapType)); + + os << ", "; + os << ".TensorRank = "; + + os << *(params->pTensorRank); + + os << ", "; + os << ".GlobalAddress = "; + + ur::details::printPtr(os, + *(params->pGlobalAddress)); + + os << ", "; + os << ".GlobalDim = "; + + ur::details::printPtr(os, + *(params->pGlobalDim)); + + os << ", "; + os << ".GlobalStrides = "; + + ur::details::printPtr(os, + *(params->pGlobalStrides)); + + os << ", "; + os << ".PixelBoxLowerCorner = "; + + ur::details::printPtr(os, + *(params->pPixelBoxLowerCorner)); + + os << ", "; + os << ".PixelBoxUpperCorner = "; + + ur::details::printPtr(os, + *(params->pPixelBoxUpperCorner)); + + os << ", "; + os << ".ChannelsPerPixel = "; + + os << *(params->pChannelsPerPixel); + + os << ", "; + os << ".PixelsPerColumn = "; + + os << *(params->pPixelsPerColumn); + + os << ", "; + os << ".ElementStrides = "; + + ur::details::printPtr(os, + *(params->pElementStrides)); + + os << ", "; + os << ".Interleave = "; + + ur::details::printFlag(os, + *(params->pInterleave)); + + os << ", "; + os << ".Swizzle = "; + + ur::details::printFlag(os, + *(params->pSwizzle)); + + os << ", "; + os << ".L2Promotion = "; + + ur::details::printFlag(os, + *(params->pL2Promotion)); + + os << ", "; + os << ".OobFill = "; + + ur::details::printFlag(os, + *(params->pOobFill)); + + os << ", "; + os << ".hTensorMap = "; + + ur::details::printPtr(os, + *(params->phTensorMap)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_tensor_map_encode_tiled_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_tensor_map_encode_tiled_exp_params_t *params) { + + os << ".hDevice = "; + + ur::details::printPtr(os, + *(params->phDevice)); + + os << ", "; + os << ".TensorMapType = "; + + ur::details::printFlag(os, + *(params->pTensorMapType)); + + os << ", "; + os << ".TensorRank = "; + + os << *(params->pTensorRank); + + os << ", "; + os << ".GlobalAddress = "; + + ur::details::printPtr(os, + *(params->pGlobalAddress)); + + os << ", "; + os << ".GlobalDim = "; + + ur::details::printPtr(os, + *(params->pGlobalDim)); + + os << ", "; + os << ".GlobalStrides = "; + + ur::details::printPtr(os, + *(params->pGlobalStrides)); + + os << ", "; + os << ".BoxDim = "; + + ur::details::printPtr(os, + *(params->pBoxDim)); + + os << ", "; + os << ".ElementStrides = "; + + ur::details::printPtr(os, + *(params->pElementStrides)); + + os << ", "; + os << ".Interleave = "; + + ur::details::printFlag(os, + *(params->pInterleave)); + + os << ", "; + os << ".Swizzle = "; + + ur::details::printFlag(os, + *(params->pSwizzle)); + + os << ", "; + os << ".L2Promotion = "; + + ur::details::printFlag(os, + *(params->pL2Promotion)); + + os << ", "; + os << ".OobFill = "; + + ur::details::printFlag(os, + *(params->pOobFill)); + + os << ", "; + os << ".hTensorMap = "; + + ur::details::printPtr(os, + *(params->phTensorMap)); + + return os; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_usm_p2p_enable_peer_access_exp_params_t type /// @returns @@ -19170,6 +19881,12 @@ inline ur_result_t UR_APICALL printFunctionParams(std::ostream &os, ur_function_ case UR_FUNCTION_COMMAND_BUFFER_COMMAND_GET_INFO_EXP: { os << (const struct ur_command_buffer_command_get_info_exp_params_t *)params; } break; + case UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP: { + os << (const struct ur_tensor_map_encode_im_2_col_exp_params_t *)params; + } break; + case UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP: { + os << (const struct ur_tensor_map_encode_tiled_exp_params_t *)params; + } break; case UR_FUNCTION_USM_P2P_ENABLE_PEER_ACCESS_EXP: { os << (const struct ur_usm_p2p_enable_peer_access_exp_params_t *)params; } break; diff --git a/scripts/core/EXP-TENSOR-MAP.rst b/scripts/core/EXP-TENSOR-MAP.rst new file mode 100644 index 0000000000..3679f3cfd1 --- /dev/null +++ b/scripts/core/EXP-TENSOR-MAP.rst @@ -0,0 +1,69 @@ +<% + OneApi=tags['$OneApi'] + x=tags['$x'] + X=x.upper() +%> + +.. _experimental-enqueue-native-command: + +================================================================================ +Tensor Mapping APIs +================================================================================ + +.. warning:: + + Experimental features: + + * May be replaced, updated, or removed at any time. + * Do not require maintaining API/ABI stability of their own additions over + time. + * Do not require conformance testing of their own additions. + + +Motivation +-------------------------------------------------------------------------------- + +Used to target the CUDA entry points cuTensorMapEncodeIm2col and +cuTensorMapEncodeTiled. + +API +-------------------------------------------------------------------------------- + +Enums +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +${x}_exp_tensor_map_data_type_flags_t +${x}_exp_tensor_map_interleave_flags_t +${x}_exp_tensor_map_l2_promotion_flags_t +${x}_exp_tensor_map_swizzle_flags_t +${x}_exp_tensor_map_oob_fill_flags_t + +Types +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +${x}_exp_tensor_map_handle_t + +Functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +* ${x}TensorMapEncodeIm2ColExp +* ${x}TensorMapEncodeTiledExp + +Changelog +-------------------------------------------------------------------------------- + ++-----------+------------------------+ +| Revision | Changes | ++===========+========================+ +| 1.0 | Initial Draft | ++-----------+------------------------+ + + +Support +-------------------------------------------------------------------------------- + +This is only supported in the CUDA adapter. + +Contributors +-------------------------------------------------------------------------------- + +* Hugh Delaney `hugh.delaney@codeplay.com `_ diff --git a/scripts/core/exp-tensor-map.yml b/scripts/core/exp-tensor-map.yml new file mode 100644 index 0000000000..258a2403f0 --- /dev/null +++ b/scripts/core/exp-tensor-map.yml @@ -0,0 +1,207 @@ +# +# Copyright (C) 2024 Intel Corporation +# +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# See YaML.md for syntax definition +# +--- #-------------------------------------------------------------------------- +type: header +desc: "Intel $OneApi Unified Runtime Experimental API for enqueuing work through native APIs" +ordinal: "100" + +--- #-------------------------------------------------------------------------- +type: handle +desc: "Handle of tensor map object" +class: $xTensorMap +name: "$x_exp_tensor_map_handle_t" + +--- #-------------------------------------------------------------------------- +type: enum +desc: "Tensor map data type" +name: $x_exp_tensor_map_data_type_flags_t +etors: + - name: UINT8 + desc: "1 byte" + - name: UINT16 + desc: "2 bytes" + - name: UINT32 + desc: "4 bytes" + - name: INT32 + desc: "4 bytes" + - name: UINT64 + desc: "8 bytes" + - name: INT64 + desc: "8 bytes" + - name: FLOAT16 + desc: "2 bytes" + - name: FLOAT32 + desc: "4 bytes" + - name: FLOAT64 + desc: "8 bytes" + - name: BFLOAT16 + desc: "2 bytes" + - name: FLOAT32_FTZ + desc: "4 bytes" + - name: TFLOAT32 + desc: "4 bytes" + - name: TFLOAT32_FTZ + desc: "4 bytes" + +--- #-------------------------------------------------------------------------- +type: enum +desc: "Tensor map interleave" +name: $x_exp_tensor_map_interleave_flags_t +etors: + - name: NONE + desc: "No interleave" + - name: 16B + desc: "16B interleave" + - name: 32B + desc: "32B interleave" + +--- #-------------------------------------------------------------------------- +type: enum +desc: "Tensor map l2 promotion" +name: $x_exp_tensor_map_l2_promotion_flags_t +etors: + - name: NONE + desc: "No promotion type" + - name: 64B + desc: "64B promotion type" + - name: 128B + desc: "128B promotion type" + - name: 256B + desc: "256B promotion type" + +--- #-------------------------------------------------------------------------- +type: enum +desc: "Tensor map swizzle" +name: $x_exp_tensor_map_swizzle_flags_t +etors: + - name: NONE + desc: "No swizzle" + - name: 32B + desc: "32B swizzle" + - name: 64B + desc: "64B swizzle" + - name: 128B + desc: "128B swizzle" + +--- #-------------------------------------------------------------------------- +type: enum +desc: "Tensor map OOB fill" +name: $x_exp_tensor_map_oob_fill_flags_t +etors: + - name: NONE + desc: "No OOB fill" + - name: REQUEST_ZERO_FMA + desc: "Refer to NVIDIA docs" + +--- #-------------------------------------------------------------------------- +type: function +desc: "Encode tensor map with image data" +class: $xTensorMap +name: EncodeIm2ColExp +details: + - "Map encode using im2col." +params: + - type: $x_device_handle_t + name: hDevice + desc: "[in] Handle of the device object." + - type: $x_exp_tensor_map_data_type_flags_t + name: TensorMapType + desc: "[in] Data type of the tensor object." + - type: uint32_t + name: TensorRank + desc: "[in] Dimensionality of tensor; must be at least 3." + - type: void* + name: GlobalAddress + desc: "[in] Starting address of memory region described by tensor." + - type: const uint64_t* + name: GlobalDim + desc: "[in] Array containing tensor size (number of elements) along each of the TensorRank dimensions." + - type: const uint64_t* + name: GlobalStrides + desc: "[in] Array containing stride size (in bytes) along each of the tensorRank - 1 dimensions." + - type: const int* + name: PixelBoxLowerCorner + desc: "[in] Array containing DHW dimensions of lower box corner." + - type: const int* + name: PixelBoxUpperCorner + desc: "[in] Array containing DHW dimensions of upper box corner." + - type: uint32_t + name: ChannelsPerPixel + desc: "[in] Number of channels per pixel." + - type: uint32_t + name: PixelsPerColumn + desc: "[in] Number of pixels per column." + - type: const uint32_t* + name: ElementStrides + desc: "[in] Array containing traversal stride in each of the tensorRank dimensions." + - type: $x_exp_tensor_map_interleave_flags_t + name: Interleave + desc: "[in] Type of interleaved layout the tensor addresses" + - type: $x_exp_tensor_map_swizzle_flags_t + name: Swizzle + desc: "[in] Bank swizzling pattern inside shared memory" + - type: $x_exp_tensor_map_l2_promotion_flags_t + name: L2Promotion + desc: "[in] L2 promotion size." + - type: $x_exp_tensor_map_oob_fill_flags_t + name: OobFill + desc: "[in] Indicate whether zero or special NaN constant will be used to fill out-of-bound elements." + - type: $x_exp_tensor_map_handle_t* + name: hTensorMap + desc: "[out] Handle of the tensor map object." + +--- #-------------------------------------------------------------------------- +type: function +desc: "Encode tensor map with tiled data" +class: $xTensorMap +name: EncodeTiledExp +details: + - "Tiled map encode." +params: + - type: $x_device_handle_t + name: hDevice + desc: "[in] Handle of the device object." + - type: $x_exp_tensor_map_data_type_flags_t + name: TensorMapType + desc: "[in] Data type of the tensor object." + - type: uint32_t + name: TensorRank + desc: "[in] Dimensionality of tensor; must be at least 3." + - type: void* + name: GlobalAddress + desc: "[in] Starting address of memory region described by tensor." + - type: const uint64_t* + name: GlobalDim + desc: "[in] Array containing tensor size (number of elements) along each of the TensorRank dimensions." + - type: const uint64_t* + name: GlobalStrides + desc: "[in] Array containing stride size (in bytes) along each of the tensorRank - 1 dimensions." + - type: const uint32_t* + name: BoxDim + desc: "[in] Array containing traversal box size (number of elments) along each of the tensorRank dimensions. Specifies how many elements to be traversed along each tensor dimension." + - type: const uint32_t* + name: ElementStrides + desc: "[in] Array containing traversal stride in each of the tensorRank dimensions." + - type: $x_exp_tensor_map_interleave_flags_t + name: Interleave + desc: "[in] Type of interleaved layout the tensor addresses" + - type: $x_exp_tensor_map_swizzle_flags_t + name: Swizzle + desc: "[in] Bank swizzling pattern inside shared memory" + - type: $x_exp_tensor_map_l2_promotion_flags_t + name: L2Promotion + desc: "[in] L2 promotion size." + - type: $x_exp_tensor_map_oob_fill_flags_t + name: OobFill + desc: "[in] Indicate whether zero or special NaN constant will be used to fill out-of-bound elements." + - type: $x_exp_tensor_map_handle_t* + name: hTensorMap + desc: "[out] Handle of the tensor map object." + diff --git a/scripts/core/registry.yml b/scripts/core/registry.yml index 2133e1c889..059e23c2a0 100644 --- a/scripts/core/registry.yml +++ b/scripts/core/registry.yml @@ -607,6 +607,12 @@ etors: - name: ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT desc: Enumerator for $xEnqueueEventsWaitWithBarrierExt value: '246' +- name: TENSOR_MAP_ENCODE_IM_2_COL_EXP + desc: Enumerator for $xTensorMapEncodeIm2ColExp + value: '230' +- name: TENSOR_MAP_ENCODE_TILED_EXP + desc: Enumerator for $xTensorMapEncodeTiledExp + value: '231' --- type: enum desc: Defines structure types diff --git a/source/adapters/adapter.def.in b/source/adapters/adapter.def.in index 3c18c78bd1..fd37178966 100644 --- a/source/adapters/adapter.def.in +++ b/source/adapters/adapter.def.in @@ -16,6 +16,7 @@ EXPORTS urGetProgramExpProcAddrTable urGetQueueProcAddrTable urGetSamplerProcAddrTable + urGetTensorMapExpProcAddrTable urGetUSMProcAddrTable urGetUSMExpProcAddrTable urGetUsmP2PExpProcAddrTable diff --git a/source/adapters/adapter.map.in b/source/adapters/adapter.map.in index bb08ae7d88..50db54ef40 100644 --- a/source/adapters/adapter.map.in +++ b/source/adapters/adapter.map.in @@ -16,6 +16,7 @@ urGetProgramExpProcAddrTable; urGetQueueProcAddrTable; urGetSamplerProcAddrTable; + urGetTensorMapExpProcAddrTable; urGetUSMProcAddrTable; urGetUSMExpProcAddrTable; urGetUsmP2PExpProcAddrTable; diff --git a/source/adapters/mock/ur_mockddi.cpp b/source/adapters/mock/ur_mockddi.cpp index 42c342444d..f2849e73ff 100644 --- a/source/adapters/mock/ur_mockddi.cpp +++ b/source/adapters/mock/ur_mockddi.cpp @@ -10731,6 +10731,172 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueNativeCommandExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urTensorMapEncodeIm2ColExp +__urdlllocal ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( + ur_device_handle_t hDevice, ///< [in] Handle of the device object. + ur_exp_tensor_map_data_type_flags_t + TensorMapType, ///< [in] Data type of the tensor object. + uint32_t TensorRank, ///< [in] Dimensionality of tensor; must be at least 3. + void * + GlobalAddress, ///< [in] Starting address of memory region described by tensor. + const uint64_t * + GlobalDim, ///< [in] Array containing tensor size (number of elements) along each of + ///< the TensorRank dimensions. + const uint64_t * + GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the + ///< tensorRank - 1 dimensions. + const int * + PixelBoxLowerCorner, ///< [in] Array containing DHW dimensions of lower box corner. + const int * + PixelBoxUpperCorner, ///< [in] Array containing DHW dimensions of upper box corner. + uint32_t ChannelsPerPixel, ///< [in] Number of channels per pixel. + uint32_t PixelsPerColumn, ///< [in] Number of pixels per column. + const uint32_t * + ElementStrides, ///< [in] Array containing traversal stride in each of the tensorRank + ///< dimensions. + ur_exp_tensor_map_interleave_flags_t + Interleave, ///< [in] Type of interleaved layout the tensor addresses + ur_exp_tensor_map_swizzle_flags_t + Swizzle, ///< [in] Bank swizzling pattern inside shared memory + ur_exp_tensor_map_l2_promotion_flags_t + L2Promotion, ///< [in] L2 promotion size. + ur_exp_tensor_map_oob_fill_flags_t + OobFill, ///< [in] Indicate whether zero or special NaN constant will be used to + ///< fill out-of-bound elements. + ur_exp_tensor_map_handle_t + *hTensorMap ///< [out] Handle of the tensor map object. + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + ur_tensor_map_encode_im_2_col_exp_params_t params = {&hDevice, + &TensorMapType, + &TensorRank, + &GlobalAddress, + &GlobalDim, + &GlobalStrides, + &PixelBoxLowerCorner, + &PixelBoxUpperCorner, + &ChannelsPerPixel, + &PixelsPerColumn, + &ElementStrides, + &Interleave, + &Swizzle, + &L2Promotion, + &OobFill, + &hTensorMap}; + + auto beforeCallback = reinterpret_cast( + mock::getCallbacks().get_before_callback("urTensorMapEncodeIm2ColExp")); + if (beforeCallback) { + result = beforeCallback(¶ms); + if (result != UR_RESULT_SUCCESS) { + return result; + } + } + + auto replaceCallback = reinterpret_cast( + mock::getCallbacks().get_replace_callback( + "urTensorMapEncodeIm2ColExp")); + if (replaceCallback) { + result = replaceCallback(¶ms); + } else { + + *hTensorMap = mock::createDummyHandle(); + result = UR_RESULT_SUCCESS; + } + + if (result != UR_RESULT_SUCCESS) { + return result; + } + + auto afterCallback = reinterpret_cast( + mock::getCallbacks().get_after_callback("urTensorMapEncodeIm2ColExp")); + if (afterCallback) { + return afterCallback(¶ms); + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urTensorMapEncodeTiledExp +__urdlllocal ur_result_t UR_APICALL urTensorMapEncodeTiledExp( + ur_device_handle_t hDevice, ///< [in] Handle of the device object. + ur_exp_tensor_map_data_type_flags_t + TensorMapType, ///< [in] Data type of the tensor object. + uint32_t TensorRank, ///< [in] Dimensionality of tensor; must be at least 3. + void * + GlobalAddress, ///< [in] Starting address of memory region described by tensor. + const uint64_t * + GlobalDim, ///< [in] Array containing tensor size (number of elements) along each of + ///< the TensorRank dimensions. + const uint64_t * + GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the + ///< tensorRank - 1 dimensions. + const uint32_t * + BoxDim, ///< [in] Array containing traversal box size (number of elments) along + ///< each of the tensorRank dimensions. Specifies how many elements to be + ///< traversed along each tensor dimension. + const uint32_t * + ElementStrides, ///< [in] Array containing traversal stride in each of the tensorRank + ///< dimensions. + ur_exp_tensor_map_interleave_flags_t + Interleave, ///< [in] Type of interleaved layout the tensor addresses + ur_exp_tensor_map_swizzle_flags_t + Swizzle, ///< [in] Bank swizzling pattern inside shared memory + ur_exp_tensor_map_l2_promotion_flags_t + L2Promotion, ///< [in] L2 promotion size. + ur_exp_tensor_map_oob_fill_flags_t + OobFill, ///< [in] Indicate whether zero or special NaN constant will be used to + ///< fill out-of-bound elements. + ur_exp_tensor_map_handle_t + *hTensorMap ///< [out] Handle of the tensor map object. + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + ur_tensor_map_encode_tiled_exp_params_t params = { + &hDevice, &TensorMapType, &TensorRank, &GlobalAddress, + &GlobalDim, &GlobalStrides, &BoxDim, &ElementStrides, + &Interleave, &Swizzle, &L2Promotion, &OobFill, + &hTensorMap}; + + auto beforeCallback = reinterpret_cast( + mock::getCallbacks().get_before_callback("urTensorMapEncodeTiledExp")); + if (beforeCallback) { + result = beforeCallback(¶ms); + if (result != UR_RESULT_SUCCESS) { + return result; + } + } + + auto replaceCallback = reinterpret_cast( + mock::getCallbacks().get_replace_callback("urTensorMapEncodeTiledExp")); + if (replaceCallback) { + result = replaceCallback(¶ms); + } else { + + *hTensorMap = mock::createDummyHandle(); + result = UR_RESULT_SUCCESS; + } + + if (result != UR_RESULT_SUCCESS) { + return result; + } + + auto afterCallback = reinterpret_cast( + mock::getCallbacks().get_after_callback("urTensorMapEncodeTiledExp")); + if (afterCallback) { + return afterCallback(¶ms); + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + } // namespace driver #if defined(__cplusplus) @@ -11550,6 +11716,38 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's TensorMapExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL urGetTensorMapExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_tensor_map_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers + ) try { + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (driver::d_context.version < version) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + ur_result_t result = UR_RESULT_SUCCESS; + + pDdiTable->pfnEncodeIm2ColExp = driver::urTensorMapEncodeIm2ColExp; + + pDdiTable->pfnEncodeTiledExp = driver::urTensorMapEncodeTiledExp; + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's USM table /// with current process' addresses diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index 64489c39ac..5b28fd9f30 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -9221,6 +9221,158 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueNativeCommandExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urTensorMapEncodeIm2ColExp +__urdlllocal ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( + ur_device_handle_t hDevice, ///< [in] Handle of the device object. + ur_exp_tensor_map_data_type_flags_t + TensorMapType, ///< [in] Data type of the tensor object. + uint32_t TensorRank, ///< [in] Dimensionality of tensor; must be at least 3. + void * + GlobalAddress, ///< [in] Starting address of memory region described by tensor. + const uint64_t * + GlobalDim, ///< [in] Array containing tensor size (number of elements) along each of + ///< the TensorRank dimensions. + const uint64_t * + GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the + ///< tensorRank - 1 dimensions. + const int * + PixelBoxLowerCorner, ///< [in] Array containing DHW dimensions of lower box corner. + const int * + PixelBoxUpperCorner, ///< [in] Array containing DHW dimensions of upper box corner. + uint32_t ChannelsPerPixel, ///< [in] Number of channels per pixel. + uint32_t PixelsPerColumn, ///< [in] Number of pixels per column. + const uint32_t * + ElementStrides, ///< [in] Array containing traversal stride in each of the tensorRank + ///< dimensions. + ur_exp_tensor_map_interleave_flags_t + Interleave, ///< [in] Type of interleaved layout the tensor addresses + ur_exp_tensor_map_swizzle_flags_t + Swizzle, ///< [in] Bank swizzling pattern inside shared memory + ur_exp_tensor_map_l2_promotion_flags_t + L2Promotion, ///< [in] L2 promotion size. + ur_exp_tensor_map_oob_fill_flags_t + OobFill, ///< [in] Indicate whether zero or special NaN constant will be used to + ///< fill out-of-bound elements. + ur_exp_tensor_map_handle_t + *hTensorMap ///< [out] Handle of the tensor map object. +) { + auto pfnEncodeIm2ColExp = + getContext()->urDdiTable.TensorMapExp.pfnEncodeIm2ColExp; + + if (nullptr == pfnEncodeIm2ColExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_tensor_map_encode_im_2_col_exp_params_t params = {&hDevice, + &TensorMapType, + &TensorRank, + &GlobalAddress, + &GlobalDim, + &GlobalStrides, + &PixelBoxLowerCorner, + &PixelBoxUpperCorner, + &ChannelsPerPixel, + &PixelsPerColumn, + &ElementStrides, + &Interleave, + &Swizzle, + &L2Promotion, + &OobFill, + &hTensorMap}; + uint64_t instance = + getContext()->notify_begin(UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP, + "urTensorMapEncodeIm2ColExp", ¶ms); + + getContext()->logger.info("---> urTensorMapEncodeIm2ColExp"); + + ur_result_t result = pfnEncodeIm2ColExp( + hDevice, TensorMapType, TensorRank, GlobalAddress, GlobalDim, + GlobalStrides, PixelBoxLowerCorner, PixelBoxUpperCorner, + ChannelsPerPixel, PixelsPerColumn, ElementStrides, Interleave, Swizzle, + L2Promotion, OobFill, hTensorMap); + + getContext()->notify_end(UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP, + "urTensorMapEncodeIm2ColExp", ¶ms, &result, + instance); + + std::ostringstream args_str; + ur::extras::printFunctionParams( + args_str, UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP, ¶ms); + getContext()->logger.info("({}) -> {};\n", args_str.str(), result); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urTensorMapEncodeTiledExp +__urdlllocal ur_result_t UR_APICALL urTensorMapEncodeTiledExp( + ur_device_handle_t hDevice, ///< [in] Handle of the device object. + ur_exp_tensor_map_data_type_flags_t + TensorMapType, ///< [in] Data type of the tensor object. + uint32_t TensorRank, ///< [in] Dimensionality of tensor; must be at least 3. + void * + GlobalAddress, ///< [in] Starting address of memory region described by tensor. + const uint64_t * + GlobalDim, ///< [in] Array containing tensor size (number of elements) along each of + ///< the TensorRank dimensions. + const uint64_t * + GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the + ///< tensorRank - 1 dimensions. + const uint32_t * + BoxDim, ///< [in] Array containing traversal box size (number of elments) along + ///< each of the tensorRank dimensions. Specifies how many elements to be + ///< traversed along each tensor dimension. + const uint32_t * + ElementStrides, ///< [in] Array containing traversal stride in each of the tensorRank + ///< dimensions. + ur_exp_tensor_map_interleave_flags_t + Interleave, ///< [in] Type of interleaved layout the tensor addresses + ur_exp_tensor_map_swizzle_flags_t + Swizzle, ///< [in] Bank swizzling pattern inside shared memory + ur_exp_tensor_map_l2_promotion_flags_t + L2Promotion, ///< [in] L2 promotion size. + ur_exp_tensor_map_oob_fill_flags_t + OobFill, ///< [in] Indicate whether zero or special NaN constant will be used to + ///< fill out-of-bound elements. + ur_exp_tensor_map_handle_t + *hTensorMap ///< [out] Handle of the tensor map object. +) { + auto pfnEncodeTiledExp = + getContext()->urDdiTable.TensorMapExp.pfnEncodeTiledExp; + + if (nullptr == pfnEncodeTiledExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_tensor_map_encode_tiled_exp_params_t params = { + &hDevice, &TensorMapType, &TensorRank, &GlobalAddress, + &GlobalDim, &GlobalStrides, &BoxDim, &ElementStrides, + &Interleave, &Swizzle, &L2Promotion, &OobFill, + &hTensorMap}; + uint64_t instance = + getContext()->notify_begin(UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP, + "urTensorMapEncodeTiledExp", ¶ms); + + getContext()->logger.info("---> urTensorMapEncodeTiledExp"); + + ur_result_t result = pfnEncodeTiledExp( + hDevice, TensorMapType, TensorRank, GlobalAddress, GlobalDim, + GlobalStrides, BoxDim, ElementStrides, Interleave, Swizzle, L2Promotion, + OobFill, hTensorMap); + + getContext()->notify_end(UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP, + "urTensorMapEncodeTiledExp", ¶ms, &result, + instance); + + std::ostringstream args_str; + ur::extras::printFunctionParams( + args_str, UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP, ¶ms); + getContext()->logger.info("({}) -> {};\n", args_str.str(), result); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Global table /// with current process' addresses @@ -10266,6 +10418,43 @@ __urdlllocal ur_result_t UR_APICALL urGetSamplerProcAddrTable( return result; } /////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's TensorMapExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +__urdlllocal ur_result_t UR_APICALL urGetTensorMapExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_tensor_map_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + auto &dditable = ur_tracing_layer::getContext()->urDdiTable.TensorMapExp; + + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (UR_MAJOR_VERSION(ur_tracing_layer::getContext()->version) != + UR_MAJOR_VERSION(version) || + UR_MINOR_VERSION(ur_tracing_layer::getContext()->version) > + UR_MINOR_VERSION(version)) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + ur_result_t result = UR_RESULT_SUCCESS; + + dditable.pfnEncodeIm2ColExp = pDdiTable->pfnEncodeIm2ColExp; + pDdiTable->pfnEncodeIm2ColExp = + ur_tracing_layer::urTensorMapEncodeIm2ColExp; + + dditable.pfnEncodeTiledExp = pDdiTable->pfnEncodeTiledExp; + pDdiTable->pfnEncodeTiledExp = ur_tracing_layer::urTensorMapEncodeTiledExp; + + return result; +} +/////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's USM table /// with current process' addresses /// @@ -10610,6 +10799,11 @@ ur_result_t context_t::init(ur_dditable_t *dditable, UR_API_VERSION_CURRENT, &dditable->Sampler); } + if (UR_RESULT_SUCCESS == result) { + result = ur_tracing_layer::urGetTensorMapExpProcAddrTable( + UR_API_VERSION_CURRENT, &dditable->TensorMapExp); + } + if (UR_RESULT_SUCCESS == result) { result = ur_tracing_layer::urGetUSMProcAddrTable(UR_API_VERSION_CURRENT, &dditable->USM); diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index b3969de10f..a46da9af2b 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -10274,6 +10274,221 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueNativeCommandExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urTensorMapEncodeIm2ColExp +__urdlllocal ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( + ur_device_handle_t hDevice, ///< [in] Handle of the device object. + ur_exp_tensor_map_data_type_flags_t + TensorMapType, ///< [in] Data type of the tensor object. + uint32_t TensorRank, ///< [in] Dimensionality of tensor; must be at least 3. + void * + GlobalAddress, ///< [in] Starting address of memory region described by tensor. + const uint64_t * + GlobalDim, ///< [in] Array containing tensor size (number of elements) along each of + ///< the TensorRank dimensions. + const uint64_t * + GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the + ///< tensorRank - 1 dimensions. + const int * + PixelBoxLowerCorner, ///< [in] Array containing DHW dimensions of lower box corner. + const int * + PixelBoxUpperCorner, ///< [in] Array containing DHW dimensions of upper box corner. + uint32_t ChannelsPerPixel, ///< [in] Number of channels per pixel. + uint32_t PixelsPerColumn, ///< [in] Number of pixels per column. + const uint32_t * + ElementStrides, ///< [in] Array containing traversal stride in each of the tensorRank + ///< dimensions. + ur_exp_tensor_map_interleave_flags_t + Interleave, ///< [in] Type of interleaved layout the tensor addresses + ur_exp_tensor_map_swizzle_flags_t + Swizzle, ///< [in] Bank swizzling pattern inside shared memory + ur_exp_tensor_map_l2_promotion_flags_t + L2Promotion, ///< [in] L2 promotion size. + ur_exp_tensor_map_oob_fill_flags_t + OobFill, ///< [in] Indicate whether zero or special NaN constant will be used to + ///< fill out-of-bound elements. + ur_exp_tensor_map_handle_t + *hTensorMap ///< [out] Handle of the tensor map object. +) { + auto pfnEncodeIm2ColExp = + getContext()->urDdiTable.TensorMapExp.pfnEncodeIm2ColExp; + + if (nullptr == pfnEncodeIm2ColExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (getContext()->enableParameterValidation) { + if (NULL == hDevice) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == GlobalAddress) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL == GlobalDim) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL == GlobalStrides) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL == PixelBoxLowerCorner) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL == PixelBoxUpperCorner) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL == ElementStrides) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL == hTensorMap) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (UR_EXP_TENSOR_MAP_DATA_TYPE_FLAGS_MASK & TensorMapType) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (UR_EXP_TENSOR_MAP_INTERLEAVE_FLAGS_MASK & Interleave) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (UR_EXP_TENSOR_MAP_SWIZZLE_FLAGS_MASK & Swizzle) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAGS_MASK & L2Promotion) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (UR_EXP_TENSOR_MAP_OOB_FILL_FLAGS_MASK & OobFill) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + } + + if (getContext()->enableLifetimeValidation && + !getContext()->refCountContext->isReferenceValid(hDevice)) { + getContext()->refCountContext->logInvalidReference(hDevice); + } + + ur_result_t result = pfnEncodeIm2ColExp( + hDevice, TensorMapType, TensorRank, GlobalAddress, GlobalDim, + GlobalStrides, PixelBoxLowerCorner, PixelBoxUpperCorner, + ChannelsPerPixel, PixelsPerColumn, ElementStrides, Interleave, Swizzle, + L2Promotion, OobFill, hTensorMap); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urTensorMapEncodeTiledExp +__urdlllocal ur_result_t UR_APICALL urTensorMapEncodeTiledExp( + ur_device_handle_t hDevice, ///< [in] Handle of the device object. + ur_exp_tensor_map_data_type_flags_t + TensorMapType, ///< [in] Data type of the tensor object. + uint32_t TensorRank, ///< [in] Dimensionality of tensor; must be at least 3. + void * + GlobalAddress, ///< [in] Starting address of memory region described by tensor. + const uint64_t * + GlobalDim, ///< [in] Array containing tensor size (number of elements) along each of + ///< the TensorRank dimensions. + const uint64_t * + GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the + ///< tensorRank - 1 dimensions. + const uint32_t * + BoxDim, ///< [in] Array containing traversal box size (number of elments) along + ///< each of the tensorRank dimensions. Specifies how many elements to be + ///< traversed along each tensor dimension. + const uint32_t * + ElementStrides, ///< [in] Array containing traversal stride in each of the tensorRank + ///< dimensions. + ur_exp_tensor_map_interleave_flags_t + Interleave, ///< [in] Type of interleaved layout the tensor addresses + ur_exp_tensor_map_swizzle_flags_t + Swizzle, ///< [in] Bank swizzling pattern inside shared memory + ur_exp_tensor_map_l2_promotion_flags_t + L2Promotion, ///< [in] L2 promotion size. + ur_exp_tensor_map_oob_fill_flags_t + OobFill, ///< [in] Indicate whether zero or special NaN constant will be used to + ///< fill out-of-bound elements. + ur_exp_tensor_map_handle_t + *hTensorMap ///< [out] Handle of the tensor map object. +) { + auto pfnEncodeTiledExp = + getContext()->urDdiTable.TensorMapExp.pfnEncodeTiledExp; + + if (nullptr == pfnEncodeTiledExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (getContext()->enableParameterValidation) { + if (NULL == hDevice) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == GlobalAddress) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL == GlobalDim) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL == GlobalStrides) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL == BoxDim) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL == ElementStrides) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL == hTensorMap) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (UR_EXP_TENSOR_MAP_DATA_TYPE_FLAGS_MASK & TensorMapType) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (UR_EXP_TENSOR_MAP_INTERLEAVE_FLAGS_MASK & Interleave) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (UR_EXP_TENSOR_MAP_SWIZZLE_FLAGS_MASK & Swizzle) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAGS_MASK & L2Promotion) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (UR_EXP_TENSOR_MAP_OOB_FILL_FLAGS_MASK & OobFill) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + } + + if (getContext()->enableLifetimeValidation && + !getContext()->refCountContext->isReferenceValid(hDevice)) { + getContext()->refCountContext->logInvalidReference(hDevice); + } + + ur_result_t result = pfnEncodeTiledExp( + hDevice, TensorMapType, TensorRank, GlobalAddress, GlobalDim, + GlobalStrides, BoxDim, ElementStrides, Interleave, Swizzle, L2Promotion, + OobFill, hTensorMap); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Global table /// with current process' addresses @@ -11344,6 +11559,45 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's TensorMapExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL urGetTensorMapExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_tensor_map_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + auto &dditable = ur_validation_layer::getContext()->urDdiTable.TensorMapExp; + + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (UR_MAJOR_VERSION(ur_validation_layer::getContext()->version) != + UR_MAJOR_VERSION(version) || + UR_MINOR_VERSION(ur_validation_layer::getContext()->version) > + UR_MINOR_VERSION(version)) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + ur_result_t result = UR_RESULT_SUCCESS; + + dditable.pfnEncodeIm2ColExp = pDdiTable->pfnEncodeIm2ColExp; + pDdiTable->pfnEncodeIm2ColExp = + ur_validation_layer::urTensorMapEncodeIm2ColExp; + + dditable.pfnEncodeTiledExp = pDdiTable->pfnEncodeTiledExp; + pDdiTable->pfnEncodeTiledExp = + ur_validation_layer::urTensorMapEncodeTiledExp; + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's USM table /// with current process' addresses @@ -11711,6 +11965,11 @@ ur_result_t context_t::init(ur_dditable_t *dditable, UR_API_VERSION_CURRENT, &dditable->Sampler); } + if (UR_RESULT_SUCCESS == result) { + result = ur_validation_layer::urGetTensorMapExpProcAddrTable( + UR_API_VERSION_CURRENT, &dditable->TensorMapExp); + } + if (UR_RESULT_SUCCESS == result) { result = ur_validation_layer::urGetUSMProcAddrTable( UR_API_VERSION_CURRENT, &dditable->USM); diff --git a/source/loader/loader.def.in b/source/loader/loader.def.in index a336da153d..5ca6d99113 100644 --- a/source/loader/loader.def.in +++ b/source/loader/loader.def.in @@ -119,6 +119,7 @@ EXPORTS urGetProgramProcAddrTable urGetQueueProcAddrTable urGetSamplerProcAddrTable + urGetTensorMapExpProcAddrTable urGetUSMExpProcAddrTable urGetUSMProcAddrTable urGetUsmP2PExpProcAddrTable @@ -331,6 +332,11 @@ EXPORTS urPrintExpSamplerCubemapFilterMode urPrintExpSamplerCubemapProperties urPrintExpSamplerMipProperties + urPrintExpTensorMapDataTypeFlags + urPrintExpTensorMapInterleaveFlags + urPrintExpTensorMapL2PromotionFlags + urPrintExpTensorMapOobFillFlags + urPrintExpTensorMapSwizzleFlags urPrintExpWin32Handle urPrintFunction urPrintFunctionParams @@ -465,6 +471,8 @@ EXPORTS urPrintSamplerRetainParams urPrintSpecializationConstantInfo urPrintStructureType + urPrintTensorMapEncodeIm_2ColExpParams + urPrintTensorMapEncodeTiledExpParams urPrintUsmAdviceFlags urPrintUsmAllocInfo urPrintUsmAllocLocationDesc @@ -535,6 +543,8 @@ EXPORTS urSamplerGetNativeHandle urSamplerRelease urSamplerRetain + urTensorMapEncodeIm2ColExp + urTensorMapEncodeTiledExp urUSMDeviceAlloc urUSMFree urUSMGetMemAllocInfo diff --git a/source/loader/loader.map.in b/source/loader/loader.map.in index 59a8a8d107..706d28dd01 100644 --- a/source/loader/loader.map.in +++ b/source/loader/loader.map.in @@ -119,6 +119,7 @@ urGetProgramProcAddrTable; urGetQueueProcAddrTable; urGetSamplerProcAddrTable; + urGetTensorMapExpProcAddrTable; urGetUSMExpProcAddrTable; urGetUSMProcAddrTable; urGetUsmP2PExpProcAddrTable; @@ -331,6 +332,11 @@ urPrintExpSamplerCubemapFilterMode; urPrintExpSamplerCubemapProperties; urPrintExpSamplerMipProperties; + urPrintExpTensorMapDataTypeFlags; + urPrintExpTensorMapInterleaveFlags; + urPrintExpTensorMapL2PromotionFlags; + urPrintExpTensorMapOobFillFlags; + urPrintExpTensorMapSwizzleFlags; urPrintExpWin32Handle; urPrintFunction; urPrintFunctionParams; @@ -465,6 +471,8 @@ urPrintSamplerRetainParams; urPrintSpecializationConstantInfo; urPrintStructureType; + urPrintTensorMapEncodeIm_2ColExpParams; + urPrintTensorMapEncodeTiledExpParams; urPrintUsmAdviceFlags; urPrintUsmAllocInfo; urPrintUsmAllocLocationDesc; @@ -535,6 +543,8 @@ urSamplerGetNativeHandle; urSamplerRelease; urSamplerRetain; + urTensorMapEncodeIm2ColExp; + urTensorMapEncodeTiledExp; urUSMDeviceAlloc; urUSMFree; urUSMGetMemAllocInfo; diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index 86a6ad95a0..598e92c311 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -9364,6 +9364,149 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueNativeCommandExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urTensorMapEncodeIm2ColExp +__urdlllocal ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( + ur_device_handle_t hDevice, ///< [in] Handle of the device object. + ur_exp_tensor_map_data_type_flags_t + TensorMapType, ///< [in] Data type of the tensor object. + uint32_t TensorRank, ///< [in] Dimensionality of tensor; must be at least 3. + void * + GlobalAddress, ///< [in] Starting address of memory region described by tensor. + const uint64_t * + GlobalDim, ///< [in] Array containing tensor size (number of elements) along each of + ///< the TensorRank dimensions. + const uint64_t * + GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the + ///< tensorRank - 1 dimensions. + const int * + PixelBoxLowerCorner, ///< [in] Array containing DHW dimensions of lower box corner. + const int * + PixelBoxUpperCorner, ///< [in] Array containing DHW dimensions of upper box corner. + uint32_t ChannelsPerPixel, ///< [in] Number of channels per pixel. + uint32_t PixelsPerColumn, ///< [in] Number of pixels per column. + const uint32_t * + ElementStrides, ///< [in] Array containing traversal stride in each of the tensorRank + ///< dimensions. + ur_exp_tensor_map_interleave_flags_t + Interleave, ///< [in] Type of interleaved layout the tensor addresses + ur_exp_tensor_map_swizzle_flags_t + Swizzle, ///< [in] Bank swizzling pattern inside shared memory + ur_exp_tensor_map_l2_promotion_flags_t + L2Promotion, ///< [in] L2 promotion size. + ur_exp_tensor_map_oob_fill_flags_t + OobFill, ///< [in] Indicate whether zero or special NaN constant will be used to + ///< fill out-of-bound elements. + ur_exp_tensor_map_handle_t + *hTensorMap ///< [out] Handle of the tensor map object. +) { + ur_result_t result = UR_RESULT_SUCCESS; + + [[maybe_unused]] auto context = getContext(); + + // extract platform's function pointer table + auto dditable = reinterpret_cast(hDevice)->dditable; + auto pfnEncodeIm2ColExp = dditable->ur.TensorMapExp.pfnEncodeIm2ColExp; + if (nullptr == pfnEncodeIm2ColExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hDevice = reinterpret_cast(hDevice)->handle; + + // forward to device-platform + result = pfnEncodeIm2ColExp( + hDevice, TensorMapType, TensorRank, GlobalAddress, GlobalDim, + GlobalStrides, PixelBoxLowerCorner, PixelBoxUpperCorner, + ChannelsPerPixel, PixelsPerColumn, ElementStrides, Interleave, Swizzle, + L2Promotion, OobFill, hTensorMap); + + if (UR_RESULT_SUCCESS != result) { + return result; + } + + try { + // convert platform handle to loader handle + *hTensorMap = reinterpret_cast( + context->factories.ur_exp_tensor_map_factory.getInstance( + *hTensorMap, dditable)); + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urTensorMapEncodeTiledExp +__urdlllocal ur_result_t UR_APICALL urTensorMapEncodeTiledExp( + ur_device_handle_t hDevice, ///< [in] Handle of the device object. + ur_exp_tensor_map_data_type_flags_t + TensorMapType, ///< [in] Data type of the tensor object. + uint32_t TensorRank, ///< [in] Dimensionality of tensor; must be at least 3. + void * + GlobalAddress, ///< [in] Starting address of memory region described by tensor. + const uint64_t * + GlobalDim, ///< [in] Array containing tensor size (number of elements) along each of + ///< the TensorRank dimensions. + const uint64_t * + GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the + ///< tensorRank - 1 dimensions. + const uint32_t * + BoxDim, ///< [in] Array containing traversal box size (number of elments) along + ///< each of the tensorRank dimensions. Specifies how many elements to be + ///< traversed along each tensor dimension. + const uint32_t * + ElementStrides, ///< [in] Array containing traversal stride in each of the tensorRank + ///< dimensions. + ur_exp_tensor_map_interleave_flags_t + Interleave, ///< [in] Type of interleaved layout the tensor addresses + ur_exp_tensor_map_swizzle_flags_t + Swizzle, ///< [in] Bank swizzling pattern inside shared memory + ur_exp_tensor_map_l2_promotion_flags_t + L2Promotion, ///< [in] L2 promotion size. + ur_exp_tensor_map_oob_fill_flags_t + OobFill, ///< [in] Indicate whether zero or special NaN constant will be used to + ///< fill out-of-bound elements. + ur_exp_tensor_map_handle_t + *hTensorMap ///< [out] Handle of the tensor map object. +) { + ur_result_t result = UR_RESULT_SUCCESS; + + [[maybe_unused]] auto context = getContext(); + + // extract platform's function pointer table + auto dditable = reinterpret_cast(hDevice)->dditable; + auto pfnEncodeTiledExp = dditable->ur.TensorMapExp.pfnEncodeTiledExp; + if (nullptr == pfnEncodeTiledExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hDevice = reinterpret_cast(hDevice)->handle; + + // forward to device-platform + result = pfnEncodeTiledExp(hDevice, TensorMapType, TensorRank, + GlobalAddress, GlobalDim, GlobalStrides, BoxDim, + ElementStrides, Interleave, Swizzle, L2Promotion, + OobFill, hTensorMap); + + if (UR_RESULT_SUCCESS != result) { + return result; + } + + try { + // convert platform handle to loader handle + *hTensorMap = reinterpret_cast( + context->factories.ur_exp_tensor_map_factory.getInstance( + *hTensorMap, dditable)); + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + return result; +} + } // namespace ur_loader #if defined(__cplusplus) @@ -10524,6 +10667,63 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's TensorMapExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL urGetTensorMapExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_tensor_map_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (ur_loader::getContext()->version < version) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + ur_result_t result = UR_RESULT_SUCCESS; + + // Load the device-platform DDI tables + for (auto &platform : ur_loader::getContext()->platforms) { + if (platform.initStatus != UR_RESULT_SUCCESS) { + continue; + } + auto getTable = reinterpret_cast( + ur_loader::LibLoader::getFunctionPtr( + platform.handle.get(), "urGetTensorMapExpProcAddrTable")); + if (!getTable) { + continue; + } + platform.initStatus = + getTable(version, &platform.dditable.ur.TensorMapExp); + } + + if (UR_RESULT_SUCCESS == result) { + if (ur_loader::getContext()->platforms.size() != 1 || + ur_loader::getContext()->forceIntercept) { + // return pointers to loader's DDIs + pDdiTable->pfnEncodeIm2ColExp = + ur_loader::urTensorMapEncodeIm2ColExp; + pDdiTable->pfnEncodeTiledExp = ur_loader::urTensorMapEncodeTiledExp; + } else { + // return pointers directly to platform's DDIs + *pDdiTable = ur_loader::getContext() + ->platforms.front() + .dditable.ur.TensorMapExp; + } + } + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's USM table /// with current process' addresses diff --git a/source/loader/ur_ldrddi.hpp b/source/loader/ur_ldrddi.hpp index f748500c73..309fb6cc65 100644 --- a/source/loader/ur_ldrddi.hpp +++ b/source/loader/ur_ldrddi.hpp @@ -87,6 +87,10 @@ using ur_exp_command_buffer_command_factory_t = singleton_factory_t; +using ur_exp_tensor_map_object_t = object_t; +using ur_exp_tensor_map_factory_t = + singleton_factory_t; + struct handle_factories { ur_adapter_factory_t ur_adapter_factory; ur_platform_factory_t ur_platform_factory; @@ -105,6 +109,7 @@ struct handle_factories { ur_exp_command_buffer_factory_t ur_exp_command_buffer_factory; ur_exp_command_buffer_command_factory_t ur_exp_command_buffer_command_factory; + ur_exp_tensor_map_factory_t ur_exp_tensor_map_factory; }; } // namespace ur_loader diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index 3340363737..3129eec2f7 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -9551,4 +9551,152 @@ ur_result_t UR_APICALL urEnqueueNativeCommandExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Encode tensor map with image data +/// +/// @details +/// - Map encode using im2col. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_EXP_TENSOR_MAP_DATA_TYPE_FLAGS_MASK & TensorMapType` +/// + `::UR_EXP_TENSOR_MAP_INTERLEAVE_FLAGS_MASK & Interleave` +/// + `::UR_EXP_TENSOR_MAP_SWIZZLE_FLAGS_MASK & Swizzle` +/// + `::UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAGS_MASK & L2Promotion` +/// + `::UR_EXP_TENSOR_MAP_OOB_FILL_FLAGS_MASK & OobFill` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == GlobalAddress` +/// + `NULL == GlobalDim` +/// + `NULL == GlobalStrides` +/// + `NULL == PixelBoxLowerCorner` +/// + `NULL == PixelBoxUpperCorner` +/// + `NULL == ElementStrides` +/// + `NULL == hTensorMap` +ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( + ur_device_handle_t hDevice, ///< [in] Handle of the device object. + ur_exp_tensor_map_data_type_flags_t + TensorMapType, ///< [in] Data type of the tensor object. + uint32_t TensorRank, ///< [in] Dimensionality of tensor; must be at least 3. + void * + GlobalAddress, ///< [in] Starting address of memory region described by tensor. + const uint64_t * + GlobalDim, ///< [in] Array containing tensor size (number of elements) along each of + ///< the TensorRank dimensions. + const uint64_t * + GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the + ///< tensorRank - 1 dimensions. + const int * + PixelBoxLowerCorner, ///< [in] Array containing DHW dimensions of lower box corner. + const int * + PixelBoxUpperCorner, ///< [in] Array containing DHW dimensions of upper box corner. + uint32_t ChannelsPerPixel, ///< [in] Number of channels per pixel. + uint32_t PixelsPerColumn, ///< [in] Number of pixels per column. + const uint32_t * + ElementStrides, ///< [in] Array containing traversal stride in each of the tensorRank + ///< dimensions. + ur_exp_tensor_map_interleave_flags_t + Interleave, ///< [in] Type of interleaved layout the tensor addresses + ur_exp_tensor_map_swizzle_flags_t + Swizzle, ///< [in] Bank swizzling pattern inside shared memory + ur_exp_tensor_map_l2_promotion_flags_t + L2Promotion, ///< [in] L2 promotion size. + ur_exp_tensor_map_oob_fill_flags_t + OobFill, ///< [in] Indicate whether zero or special NaN constant will be used to + ///< fill out-of-bound elements. + ur_exp_tensor_map_handle_t + *hTensorMap ///< [out] Handle of the tensor map object. + ) try { + auto pfnEncodeIm2ColExp = + ur_lib::getContext()->urDdiTable.TensorMapExp.pfnEncodeIm2ColExp; + if (nullptr == pfnEncodeIm2ColExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnEncodeIm2ColExp(hDevice, TensorMapType, TensorRank, GlobalAddress, + GlobalDim, GlobalStrides, PixelBoxLowerCorner, + PixelBoxUpperCorner, ChannelsPerPixel, + PixelsPerColumn, ElementStrides, Interleave, + Swizzle, L2Promotion, OobFill, hTensorMap); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Encode tensor map with tiled data +/// +/// @details +/// - Tiled map encode. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_EXP_TENSOR_MAP_DATA_TYPE_FLAGS_MASK & TensorMapType` +/// + `::UR_EXP_TENSOR_MAP_INTERLEAVE_FLAGS_MASK & Interleave` +/// + `::UR_EXP_TENSOR_MAP_SWIZZLE_FLAGS_MASK & Swizzle` +/// + `::UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAGS_MASK & L2Promotion` +/// + `::UR_EXP_TENSOR_MAP_OOB_FILL_FLAGS_MASK & OobFill` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == GlobalAddress` +/// + `NULL == GlobalDim` +/// + `NULL == GlobalStrides` +/// + `NULL == BoxDim` +/// + `NULL == ElementStrides` +/// + `NULL == hTensorMap` +ur_result_t UR_APICALL urTensorMapEncodeTiledExp( + ur_device_handle_t hDevice, ///< [in] Handle of the device object. + ur_exp_tensor_map_data_type_flags_t + TensorMapType, ///< [in] Data type of the tensor object. + uint32_t TensorRank, ///< [in] Dimensionality of tensor; must be at least 3. + void * + GlobalAddress, ///< [in] Starting address of memory region described by tensor. + const uint64_t * + GlobalDim, ///< [in] Array containing tensor size (number of elements) along each of + ///< the TensorRank dimensions. + const uint64_t * + GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the + ///< tensorRank - 1 dimensions. + const uint32_t * + BoxDim, ///< [in] Array containing traversal box size (number of elments) along + ///< each of the tensorRank dimensions. Specifies how many elements to be + ///< traversed along each tensor dimension. + const uint32_t * + ElementStrides, ///< [in] Array containing traversal stride in each of the tensorRank + ///< dimensions. + ur_exp_tensor_map_interleave_flags_t + Interleave, ///< [in] Type of interleaved layout the tensor addresses + ur_exp_tensor_map_swizzle_flags_t + Swizzle, ///< [in] Bank swizzling pattern inside shared memory + ur_exp_tensor_map_l2_promotion_flags_t + L2Promotion, ///< [in] L2 promotion size. + ur_exp_tensor_map_oob_fill_flags_t + OobFill, ///< [in] Indicate whether zero or special NaN constant will be used to + ///< fill out-of-bound elements. + ur_exp_tensor_map_handle_t + *hTensorMap ///< [out] Handle of the tensor map object. + ) try { + auto pfnEncodeTiledExp = + ur_lib::getContext()->urDdiTable.TensorMapExp.pfnEncodeTiledExp; + if (nullptr == pfnEncodeTiledExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnEncodeTiledExp(hDevice, TensorMapType, TensorRank, GlobalAddress, + GlobalDim, GlobalStrides, BoxDim, ElementStrides, + Interleave, Swizzle, L2Promotion, OobFill, + hTensorMap); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + } // extern "C" diff --git a/source/loader/ur_libddi.cpp b/source/loader/ur_libddi.cpp index 4d88bb2044..910cbfe607 100644 --- a/source/loader/ur_libddi.cpp +++ b/source/loader/ur_libddi.cpp @@ -99,6 +99,11 @@ __urdlllocal ur_result_t context_t::ddiInit() { &urDdiTable.Sampler); } + if (UR_RESULT_SUCCESS == result) { + result = urGetTensorMapExpProcAddrTable(UR_API_VERSION_CURRENT, + &urDdiTable.TensorMapExp); + } + if (UR_RESULT_SUCCESS == result) { result = urGetUSMProcAddrTable(UR_API_VERSION_CURRENT, &urDdiTable.USM); } diff --git a/source/loader/ur_print.cpp b/source/loader/ur_print.cpp index 6b1cbfd5ee..690f562af4 100644 --- a/source/loader/ur_print.cpp +++ b/source/loader/ur_print.cpp @@ -1109,6 +1109,49 @@ ur_result_t urPrintExpEnqueueNativeCommandProperties( return str_copy(&ss, buffer, buff_size, out_size); } +ur_result_t +urPrintExpTensorMapDataTypeFlags(enum ur_exp_tensor_map_data_type_flag_t value, + char *buffer, const size_t buff_size, + size_t *out_size) { + std::stringstream ss; + ss << value; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t urPrintExpTensorMapInterleaveFlags( + enum ur_exp_tensor_map_interleave_flag_t value, char *buffer, + const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << value; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t urPrintExpTensorMapL2PromotionFlags( + enum ur_exp_tensor_map_l2_promotion_flag_t value, char *buffer, + const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << value; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t +urPrintExpTensorMapSwizzleFlags(enum ur_exp_tensor_map_swizzle_flag_t value, + char *buffer, const size_t buff_size, + size_t *out_size) { + std::stringstream ss; + ss << value; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t +urPrintExpTensorMapOobFillFlags(enum ur_exp_tensor_map_oob_fill_flag_t value, + char *buffer, const size_t buff_size, + size_t *out_size) { + std::stringstream ss; + ss << value; + return str_copy(&ss, buffer, buff_size, out_size); +} + ur_result_t urPrintAdapterGetParams(const struct ur_adapter_get_params_t *params, char *buffer, const size_t buff_size, @@ -2508,6 +2551,22 @@ ur_result_t urPrintSamplerCreateWithNativeHandleParams( return str_copy(&ss, buffer, buff_size, out_size); } +ur_result_t urPrintTensorMapEncodeIm_2ColExpParams( + const struct ur_tensor_map_encode_im_2_col_exp_params_t *params, + char *buffer, const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t urPrintTensorMapEncodeTiledExpParams( + const struct ur_tensor_map_encode_tiled_exp_params_t *params, char *buffer, + const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + ur_result_t urPrintUsmHostAllocParams(const struct ur_usm_host_alloc_params_t *params, char *buffer, const size_t buff_size, diff --git a/source/ur_api.cpp b/source/ur_api.cpp index 853d61472e..5d1632ce18 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -8099,3 +8099,130 @@ ur_result_t UR_APICALL urEnqueueNativeCommandExp( ur_result_t result = UR_RESULT_SUCCESS; return result; } + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Encode tensor map with image data +/// +/// @details +/// - Map encode using im2col. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_EXP_TENSOR_MAP_DATA_TYPE_FLAGS_MASK & TensorMapType` +/// + `::UR_EXP_TENSOR_MAP_INTERLEAVE_FLAGS_MASK & Interleave` +/// + `::UR_EXP_TENSOR_MAP_SWIZZLE_FLAGS_MASK & Swizzle` +/// + `::UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAGS_MASK & L2Promotion` +/// + `::UR_EXP_TENSOR_MAP_OOB_FILL_FLAGS_MASK & OobFill` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == GlobalAddress` +/// + `NULL == GlobalDim` +/// + `NULL == GlobalStrides` +/// + `NULL == PixelBoxLowerCorner` +/// + `NULL == PixelBoxUpperCorner` +/// + `NULL == ElementStrides` +/// + `NULL == hTensorMap` +ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( + ur_device_handle_t hDevice, ///< [in] Handle of the device object. + ur_exp_tensor_map_data_type_flags_t + TensorMapType, ///< [in] Data type of the tensor object. + uint32_t TensorRank, ///< [in] Dimensionality of tensor; must be at least 3. + void * + GlobalAddress, ///< [in] Starting address of memory region described by tensor. + const uint64_t * + GlobalDim, ///< [in] Array containing tensor size (number of elements) along each of + ///< the TensorRank dimensions. + const uint64_t * + GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the + ///< tensorRank - 1 dimensions. + const int * + PixelBoxLowerCorner, ///< [in] Array containing DHW dimensions of lower box corner. + const int * + PixelBoxUpperCorner, ///< [in] Array containing DHW dimensions of upper box corner. + uint32_t ChannelsPerPixel, ///< [in] Number of channels per pixel. + uint32_t PixelsPerColumn, ///< [in] Number of pixels per column. + const uint32_t * + ElementStrides, ///< [in] Array containing traversal stride in each of the tensorRank + ///< dimensions. + ur_exp_tensor_map_interleave_flags_t + Interleave, ///< [in] Type of interleaved layout the tensor addresses + ur_exp_tensor_map_swizzle_flags_t + Swizzle, ///< [in] Bank swizzling pattern inside shared memory + ur_exp_tensor_map_l2_promotion_flags_t + L2Promotion, ///< [in] L2 promotion size. + ur_exp_tensor_map_oob_fill_flags_t + OobFill, ///< [in] Indicate whether zero or special NaN constant will be used to + ///< fill out-of-bound elements. + ur_exp_tensor_map_handle_t + *hTensorMap ///< [out] Handle of the tensor map object. +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Encode tensor map with tiled data +/// +/// @details +/// - Tiled map encode. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_EXP_TENSOR_MAP_DATA_TYPE_FLAGS_MASK & TensorMapType` +/// + `::UR_EXP_TENSOR_MAP_INTERLEAVE_FLAGS_MASK & Interleave` +/// + `::UR_EXP_TENSOR_MAP_SWIZZLE_FLAGS_MASK & Swizzle` +/// + `::UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAGS_MASK & L2Promotion` +/// + `::UR_EXP_TENSOR_MAP_OOB_FILL_FLAGS_MASK & OobFill` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == GlobalAddress` +/// + `NULL == GlobalDim` +/// + `NULL == GlobalStrides` +/// + `NULL == BoxDim` +/// + `NULL == ElementStrides` +/// + `NULL == hTensorMap` +ur_result_t UR_APICALL urTensorMapEncodeTiledExp( + ur_device_handle_t hDevice, ///< [in] Handle of the device object. + ur_exp_tensor_map_data_type_flags_t + TensorMapType, ///< [in] Data type of the tensor object. + uint32_t TensorRank, ///< [in] Dimensionality of tensor; must be at least 3. + void * + GlobalAddress, ///< [in] Starting address of memory region described by tensor. + const uint64_t * + GlobalDim, ///< [in] Array containing tensor size (number of elements) along each of + ///< the TensorRank dimensions. + const uint64_t * + GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the + ///< tensorRank - 1 dimensions. + const uint32_t * + BoxDim, ///< [in] Array containing traversal box size (number of elments) along + ///< each of the tensorRank dimensions. Specifies how many elements to be + ///< traversed along each tensor dimension. + const uint32_t * + ElementStrides, ///< [in] Array containing traversal stride in each of the tensorRank + ///< dimensions. + ur_exp_tensor_map_interleave_flags_t + Interleave, ///< [in] Type of interleaved layout the tensor addresses + ur_exp_tensor_map_swizzle_flags_t + Swizzle, ///< [in] Bank swizzling pattern inside shared memory + ur_exp_tensor_map_l2_promotion_flags_t + L2Promotion, ///< [in] L2 promotion size. + ur_exp_tensor_map_oob_fill_flags_t + OobFill, ///< [in] Indicate whether zero or special NaN constant will be used to + ///< fill out-of-bound elements. + ur_exp_tensor_map_handle_t + *hTensorMap ///< [out] Handle of the tensor map object. +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} From 69038e603e9589f89fc4596b6c3e5c6a9139eab8 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Wed, 3 Jul 2024 11:07:18 +0100 Subject: [PATCH 02/12] Add CUDA impl --- source/adapters/cuda/CMakeLists.txt | 1 + source/adapters/cuda/tensor_map.cpp | 142 ++++++++++++++++++++++++++++ 2 files changed, 143 insertions(+) create mode 100644 source/adapters/cuda/tensor_map.cpp diff --git a/source/adapters/cuda/CMakeLists.txt b/source/adapters/cuda/CMakeLists.txt index b6b153a5d8..3d0418fd07 100644 --- a/source/adapters/cuda/CMakeLists.txt +++ b/source/adapters/cuda/CMakeLists.txt @@ -38,6 +38,7 @@ add_ur_adapter(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/queue.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler.hpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/tensor_map.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tracing.cpp ${CMAKE_CURRENT_SOURCE_DIR}/usm.cpp ${CMAKE_CURRENT_SOURCE_DIR}/usm_p2p.cpp diff --git a/source/adapters/cuda/tensor_map.cpp b/source/adapters/cuda/tensor_map.cpp new file mode 100644 index 0000000000..9d9559fd09 --- /dev/null +++ b/source/adapters/cuda/tensor_map.cpp @@ -0,0 +1,142 @@ +//===--------- tensor_map.cpp - CUDA Adapter ------------------------------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +#include "context.hpp" + +struct ur_exp_tensor_map_handle_t_ { + CUtensorMap Map; +}; + +#define CONVERT(URTYPE, CUTYPE) \ + if (URTYPE & UrType) \ + return CUTYPE; + +inline CUtensorMapDataType +convertUrToCuDataType(ur_exp_tensor_map_data_type_flags_t UrType) { + CONVERT(UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT8, + CU_TENSOR_MAP_DATA_TYPE_UINT8); + CONVERT(UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT16, + CU_TENSOR_MAP_DATA_TYPE_UINT16); + CONVERT(UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT32, + CU_TENSOR_MAP_DATA_TYPE_UINT32); + CONVERT(UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT32, + CU_TENSOR_MAP_DATA_TYPE_INT32); + CONVERT(UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT64, + CU_TENSOR_MAP_DATA_TYPE_UINT64); + CONVERT(UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT64, + CU_TENSOR_MAP_DATA_TYPE_INT64); + CONVERT(UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT16, + CU_TENSOR_MAP_DATA_TYPE_FLOAT16); + CONVERT(UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32, + CU_TENSOR_MAP_DATA_TYPE_FLOAT32); + CONVERT(UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT64, + CU_TENSOR_MAP_DATA_TYPE_FLOAT64); + CONVERT(UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_BFLOAT16, + CU_TENSOR_MAP_DATA_TYPE_BFLOAT16); + CONVERT(UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32_FTZ, + CU_TENSOR_MAP_DATA_TYPE_FLOAT32_FTZ); + CONVERT(UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32, + CU_TENSOR_MAP_DATA_TYPE_TFLOAT32); + CONVERT(UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32_FTZ, + CU_TENSOR_MAP_DATA_TYPE_TFLOAT32_FTZ); + throw "convertUrToCuDataType failed!"; +} + +CUtensorMapInterleave +convertUrToCuInterleave(ur_exp_tensor_map_interleave_flags_t UrType) { + CONVERT(UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_NONE, + CU_TENSOR_MAP_INTERLEAVE_NONE); + CONVERT(UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_16B, CU_TENSOR_MAP_INTERLEAVE_16B); + CONVERT(UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_32B, CU_TENSOR_MAP_INTERLEAVE_32B); + throw "convertUrToCuInterleave failed!"; +} + +CUtensorMapSwizzle +convertUrToCuSwizzle(ur_exp_tensor_map_swizzle_flags_t UrType) { + CONVERT(UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_NONE, CU_TENSOR_MAP_SWIZZLE_NONE); + CONVERT(UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_32B, CU_TENSOR_MAP_SWIZZLE_32B); + CONVERT(UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_64B, CU_TENSOR_MAP_SWIZZLE_64B); + CONVERT(UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_128B, CU_TENSOR_MAP_SWIZZLE_128B); + throw "convertUrToCuSwizzle failed!"; +} + +CUtensorMapL2promotion +convertUrToL2promotion(ur_exp_tensor_map_l2_promotion_flags_t UrType) { + CONVERT(UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_NONE, + CU_TENSOR_MAP_L2_PROMOTION_NONE); + CONVERT(UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_64B, + CU_TENSOR_MAP_L2_PROMOTION_L2_64B); + CONVERT(UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_128B, + CU_TENSOR_MAP_L2_PROMOTION_L2_128B); + CONVERT(UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_256B, + CU_TENSOR_MAP_L2_PROMOTION_L2_256B); + throw "convertUrToCul2promotion failed!"; +} + +CUtensorMapFloatOOBfill +convertUrToCuOOBfill(ur_exp_tensor_map_oob_fill_flags_t UrType) { + CONVERT(UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_NONE, + CU_TENSOR_MAP_FLOAT_OOB_FILL_NONE); + CONVERT(UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_REQUEST_ZERO_FMA, + CU_TENSOR_MAP_FLOAT_OOB_FILL_NAN_REQUEST_ZERO_FMA); + throw "convertUrToCuDataOOBfill failed!"; +} + +UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( + ur_device_handle_t hDevice, + ur_exp_tensor_map_data_type_flags_t TensorMapType, uint32_t TensorRank, + void *GlobalAddress, const uint64_t *GlobalDim, + const uint64_t *GlobalStrides, const int *PixelBoxLowerCorner, + const int *PixelBoxUpperCorner, uint32_t ChannelsPerPixel, + uint32_t PixelsPerColumn, const uint32_t *ElementStrides, + ur_exp_tensor_map_interleave_flags_t Interleave, + ur_exp_tensor_map_swizzle_flags_t Swizzle, + ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, + ur_exp_tensor_map_oob_fill_flags_t OobFill, + ur_exp_tensor_map_handle_t *hTensorMap) { + ScopedContext Active(hDevice); + try { + UR_CHECK_ERROR(cuTensorMapEncodeIm2col( + &(*hTensorMap)->Map, convertUrToCuDataType(TensorMapType), TensorRank, + GlobalAddress, GlobalDim, GlobalStrides, PixelBoxLowerCorner, + PixelBoxUpperCorner, ChannelsPerPixel, PixelsPerColumn, ElementStrides, + convertUrToCuInterleave(Interleave), convertUrToCuSwizzle(Swizzle), + convertUrToL2promotion(L2Promotion), convertUrToCuOOBfill(OobFill))); + } catch (ur_result_t Err) { + return Err; + } + return UR_RESULT_SUCCESS; +} +UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeTiledExp( + ur_device_handle_t hDevice, + ur_exp_tensor_map_data_type_flags_t TensorMapType, uint32_t TensorRank, + void *GlobalAddress, const uint64_t *GlobalDim, + const uint64_t *GlobalStrides, const uint32_t *BoxDim, + const uint32_t *ElementStrides, + ur_exp_tensor_map_interleave_flags_t Interleave, + ur_exp_tensor_map_swizzle_flags_t Swizzle, + ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, + ur_exp_tensor_map_oob_fill_flags_t OobFill, + ur_exp_tensor_map_handle_t *hTensorMap) { + ScopedContext Active(hDevice); + try { + UR_CHECK_ERROR(cuTensorMapEncodeTiled( + &(*hTensorMap)->Map, convertUrToCuDataType(TensorMapType), TensorRank, + GlobalAddress, GlobalDim, GlobalStrides, BoxDim, ElementStrides, + convertUrToCuInterleave(Interleave), convertUrToCuSwizzle(Swizzle), + convertUrToL2promotion(L2Promotion), convertUrToCuOOBfill(OobFill))); + } catch (ur_result_t Err) { + return Err; + } + return UR_RESULT_SUCCESS; +} From 05492e66beea3e0f2218e6c2c63d2241e1404c1e Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Thu, 4 Jul 2024 15:27:34 +0100 Subject: [PATCH 03/12] Respond to comments - Check that TensorDim < 3 using yaml returns: . - Rename some things and remove copypasta --- include/ur_api.h | 24 ++++++++++------- scripts/core/exp-tensor-map.yml | 22 ++++++++++------ source/adapters/cuda/tensor_map.cpp | 12 ++++----- source/adapters/mock/ur_mockddi.cpp | 18 ++++++------- source/loader/layers/tracing/ur_trcddi.cpp | 18 ++++++------- source/loader/layers/validation/ur_valddi.cpp | 26 ++++++++++++------- source/loader/ur_ldrddi.cpp | 18 ++++++------- source/loader/ur_libapi.cpp | 22 +++++++++------- source/ur_api.cpp | 22 +++++++++------- 9 files changed, 104 insertions(+), 78 deletions(-) diff --git a/include/ur_api.h b/include/ur_api.h index 13334a9c8e..8d4e6e5972 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -10166,7 +10166,7 @@ urEnqueueNativeCommandExp( #if !defined(__GNUC__) #pragma endregion #endif -// Intel 'oneAPI' Unified Runtime Experimental API for enqueuing work through native APIs +// Intel 'oneAPI' Unified Runtime Experimental API for mapping tensor objects #if !defined(__GNUC__) #pragma region tensor map(experimental) #endif @@ -10287,6 +10287,8 @@ typedef enum ur_exp_tensor_map_oob_fill_flag_t { /// + `NULL == PixelBoxUpperCorner` /// + `NULL == ElementStrides` /// + `NULL == hTensorMap` +/// - ::UR_RESULT_ERROR_INVALID_ARGUMENT +/// + `TensorRank < 3` UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( ur_device_handle_t hDevice, ///< [in] Handle of the device object. @@ -10296,18 +10298,18 @@ urTensorMapEncodeIm2ColExp( const uint64_t *GlobalDim, ///< [in] Array containing tensor size (number of elements) along each of ///< the TensorRank dimensions. const uint64_t *GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the - ///< tensorRank - 1 dimensions. + ///< TensorRank - 1 dimensions. const int *PixelBoxLowerCorner, ///< [in] Array containing DHW dimensions of lower box corner. const int *PixelBoxUpperCorner, ///< [in] Array containing DHW dimensions of upper box corner. uint32_t ChannelsPerPixel, ///< [in] Number of channels per pixel. uint32_t PixelsPerColumn, ///< [in] Number of pixels per column. - const uint32_t *ElementStrides, ///< [in] Array containing traversal stride in each of the tensorRank + const uint32_t *ElementStrides, ///< [in] Array containing traversal stride in each of the TensorRank ///< dimensions. ur_exp_tensor_map_interleave_flags_t Interleave, ///< [in] Type of interleaved layout the tensor addresses ur_exp_tensor_map_swizzle_flags_t Swizzle, ///< [in] Bank swizzling pattern inside shared memory ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, ///< [in] L2 promotion size. - ur_exp_tensor_map_oob_fill_flags_t OobFill, ///< [in] Indicate whether zero or special NaN constant will be used to - ///< fill out-of-bound elements. + ur_exp_tensor_map_oob_fill_flags_t OobFill, ///< [in] Indicates whether zero or special NaN constant will be used to + ///< fill out-of-bounds elements. ur_exp_tensor_map_handle_t *hTensorMap ///< [out] Handle of the tensor map object. ); @@ -10337,6 +10339,8 @@ urTensorMapEncodeIm2ColExp( /// + `NULL == BoxDim` /// + `NULL == ElementStrides` /// + `NULL == hTensorMap` +/// - ::UR_RESULT_ERROR_INVALID_ARGUMENT +/// + `TensorRank < 3` UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeTiledExp( ur_device_handle_t hDevice, ///< [in] Handle of the device object. @@ -10346,17 +10350,17 @@ urTensorMapEncodeTiledExp( const uint64_t *GlobalDim, ///< [in] Array containing tensor size (number of elements) along each of ///< the TensorRank dimensions. const uint64_t *GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the - ///< tensorRank - 1 dimensions. + ///< TensorRank - 1 dimensions. const uint32_t *BoxDim, ///< [in] Array containing traversal box size (number of elments) along - ///< each of the tensorRank dimensions. Specifies how many elements to be + ///< each of the TensorRank dimensions. Specifies how many elements to be ///< traversed along each tensor dimension. - const uint32_t *ElementStrides, ///< [in] Array containing traversal stride in each of the tensorRank + const uint32_t *ElementStrides, ///< [in] Array containing traversal stride in each of the TensorRank ///< dimensions. ur_exp_tensor_map_interleave_flags_t Interleave, ///< [in] Type of interleaved layout the tensor addresses ur_exp_tensor_map_swizzle_flags_t Swizzle, ///< [in] Bank swizzling pattern inside shared memory ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, ///< [in] L2 promotion size. - ur_exp_tensor_map_oob_fill_flags_t OobFill, ///< [in] Indicate whether zero or special NaN constant will be used to - ///< fill out-of-bound elements. + ur_exp_tensor_map_oob_fill_flags_t OobFill, ///< [in] Indicates whether zero or special NaN constant will be used to + ///< fill out-of-bounds elements. ur_exp_tensor_map_handle_t *hTensorMap ///< [out] Handle of the tensor map object. ); diff --git a/scripts/core/exp-tensor-map.yml b/scripts/core/exp-tensor-map.yml index 258a2403f0..fa1e8c1898 100644 --- a/scripts/core/exp-tensor-map.yml +++ b/scripts/core/exp-tensor-map.yml @@ -9,7 +9,7 @@ # --- #-------------------------------------------------------------------------- type: header -desc: "Intel $OneApi Unified Runtime Experimental API for enqueuing work through native APIs" +desc: "Intel $OneApi Unified Runtime Experimental API for mapping tensor objects" ordinal: "100" --- #-------------------------------------------------------------------------- @@ -125,7 +125,7 @@ params: desc: "[in] Array containing tensor size (number of elements) along each of the TensorRank dimensions." - type: const uint64_t* name: GlobalStrides - desc: "[in] Array containing stride size (in bytes) along each of the tensorRank - 1 dimensions." + desc: "[in] Array containing stride size (in bytes) along each of the TensorRank - 1 dimensions." - type: const int* name: PixelBoxLowerCorner desc: "[in] Array containing DHW dimensions of lower box corner." @@ -140,7 +140,7 @@ params: desc: "[in] Number of pixels per column." - type: const uint32_t* name: ElementStrides - desc: "[in] Array containing traversal stride in each of the tensorRank dimensions." + desc: "[in] Array containing traversal stride in each of the TensorRank dimensions." - type: $x_exp_tensor_map_interleave_flags_t name: Interleave desc: "[in] Type of interleaved layout the tensor addresses" @@ -152,10 +152,13 @@ params: desc: "[in] L2 promotion size." - type: $x_exp_tensor_map_oob_fill_flags_t name: OobFill - desc: "[in] Indicate whether zero or special NaN constant will be used to fill out-of-bound elements." + desc: "[in] Indicates whether zero or special NaN constant will be used to fill out-of-bounds elements." - type: $x_exp_tensor_map_handle_t* name: hTensorMap desc: "[out] Handle of the tensor map object." +returns: + - $X_RESULT_ERROR_INVALID_ARGUMENT: + - "`TensorRank < 3`" --- #-------------------------------------------------------------------------- type: function @@ -182,13 +185,13 @@ params: desc: "[in] Array containing tensor size (number of elements) along each of the TensorRank dimensions." - type: const uint64_t* name: GlobalStrides - desc: "[in] Array containing stride size (in bytes) along each of the tensorRank - 1 dimensions." + desc: "[in] Array containing stride size (in bytes) along each of the TensorRank - 1 dimensions." - type: const uint32_t* name: BoxDim - desc: "[in] Array containing traversal box size (number of elments) along each of the tensorRank dimensions. Specifies how many elements to be traversed along each tensor dimension." + desc: "[in] Array containing traversal box size (number of elments) along each of the TensorRank dimensions. Specifies how many elements to be traversed along each tensor dimension." - type: const uint32_t* name: ElementStrides - desc: "[in] Array containing traversal stride in each of the tensorRank dimensions." + desc: "[in] Array containing traversal stride in each of the TensorRank dimensions." - type: $x_exp_tensor_map_interleave_flags_t name: Interleave desc: "[in] Type of interleaved layout the tensor addresses" @@ -200,8 +203,11 @@ params: desc: "[in] L2 promotion size." - type: $x_exp_tensor_map_oob_fill_flags_t name: OobFill - desc: "[in] Indicate whether zero or special NaN constant will be used to fill out-of-bound elements." + desc: "[in] Indicates whether zero or special NaN constant will be used to fill out-of-bounds elements." - type: $x_exp_tensor_map_handle_t* name: hTensorMap desc: "[out] Handle of the tensor map object." +returns: + - $X_RESULT_ERROR_INVALID_ARGUMENT: + - "`TensorRank < 3`" diff --git a/source/adapters/cuda/tensor_map.cpp b/source/adapters/cuda/tensor_map.cpp index 9d9559fd09..da8e4f8f8c 100644 --- a/source/adapters/cuda/tensor_map.cpp +++ b/source/adapters/cuda/tensor_map.cpp @@ -18,8 +18,8 @@ struct ur_exp_tensor_map_handle_t_ { }; #define CONVERT(URTYPE, CUTYPE) \ - if (URTYPE & UrType) \ - return CUTYPE; + if ((URTYPE)&UrType) \ + return (CUTYPE); inline CUtensorMapDataType convertUrToCuDataType(ur_exp_tensor_map_data_type_flags_t UrType) { @@ -71,7 +71,7 @@ convertUrToCuSwizzle(ur_exp_tensor_map_swizzle_flags_t UrType) { } CUtensorMapL2promotion -convertUrToL2promotion(ur_exp_tensor_map_l2_promotion_flags_t UrType) { +convertUrToCuL2Promotion(ur_exp_tensor_map_l2_promotion_flags_t UrType) { CONVERT(UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_NONE, CU_TENSOR_MAP_L2_PROMOTION_NONE); CONVERT(UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_64B, @@ -84,7 +84,7 @@ convertUrToL2promotion(ur_exp_tensor_map_l2_promotion_flags_t UrType) { } CUtensorMapFloatOOBfill -convertUrToCuOOBfill(ur_exp_tensor_map_oob_fill_flags_t UrType) { +convertUrToCuOobFill(ur_exp_tensor_map_oob_fill_flags_t UrType) { CONVERT(UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_NONE, CU_TENSOR_MAP_FLOAT_OOB_FILL_NONE); CONVERT(UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_REQUEST_ZERO_FMA, @@ -111,7 +111,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( GlobalAddress, GlobalDim, GlobalStrides, PixelBoxLowerCorner, PixelBoxUpperCorner, ChannelsPerPixel, PixelsPerColumn, ElementStrides, convertUrToCuInterleave(Interleave), convertUrToCuSwizzle(Swizzle), - convertUrToL2promotion(L2Promotion), convertUrToCuOOBfill(OobFill))); + convertUrToCuL2Promotion(L2Promotion), convertUrToCuOobFill(OobFill))); } catch (ur_result_t Err) { return Err; } @@ -134,7 +134,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeTiledExp( &(*hTensorMap)->Map, convertUrToCuDataType(TensorMapType), TensorRank, GlobalAddress, GlobalDim, GlobalStrides, BoxDim, ElementStrides, convertUrToCuInterleave(Interleave), convertUrToCuSwizzle(Swizzle), - convertUrToL2promotion(L2Promotion), convertUrToCuOOBfill(OobFill))); + convertUrToCuL2Promotion(L2Promotion), convertUrToCuOobFill(OobFill))); } catch (ur_result_t Err) { return Err; } diff --git a/source/adapters/mock/ur_mockddi.cpp b/source/adapters/mock/ur_mockddi.cpp index f2849e73ff..ec0be3890f 100644 --- a/source/adapters/mock/ur_mockddi.cpp +++ b/source/adapters/mock/ur_mockddi.cpp @@ -10745,7 +10745,7 @@ __urdlllocal ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( ///< the TensorRank dimensions. const uint64_t * GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the - ///< tensorRank - 1 dimensions. + ///< TensorRank - 1 dimensions. const int * PixelBoxLowerCorner, ///< [in] Array containing DHW dimensions of lower box corner. const int * @@ -10753,7 +10753,7 @@ __urdlllocal ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( uint32_t ChannelsPerPixel, ///< [in] Number of channels per pixel. uint32_t PixelsPerColumn, ///< [in] Number of pixels per column. const uint32_t * - ElementStrides, ///< [in] Array containing traversal stride in each of the tensorRank + ElementStrides, ///< [in] Array containing traversal stride in each of the TensorRank ///< dimensions. ur_exp_tensor_map_interleave_flags_t Interleave, ///< [in] Type of interleaved layout the tensor addresses @@ -10762,8 +10762,8 @@ __urdlllocal ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, ///< [in] L2 promotion size. ur_exp_tensor_map_oob_fill_flags_t - OobFill, ///< [in] Indicate whether zero or special NaN constant will be used to - ///< fill out-of-bound elements. + OobFill, ///< [in] Indicates whether zero or special NaN constant will be used to + ///< fill out-of-bounds elements. ur_exp_tensor_map_handle_t *hTensorMap ///< [out] Handle of the tensor map object. ) try { @@ -10835,13 +10835,13 @@ __urdlllocal ur_result_t UR_APICALL urTensorMapEncodeTiledExp( ///< the TensorRank dimensions. const uint64_t * GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the - ///< tensorRank - 1 dimensions. + ///< TensorRank - 1 dimensions. const uint32_t * BoxDim, ///< [in] Array containing traversal box size (number of elments) along - ///< each of the tensorRank dimensions. Specifies how many elements to be + ///< each of the TensorRank dimensions. Specifies how many elements to be ///< traversed along each tensor dimension. const uint32_t * - ElementStrides, ///< [in] Array containing traversal stride in each of the tensorRank + ElementStrides, ///< [in] Array containing traversal stride in each of the TensorRank ///< dimensions. ur_exp_tensor_map_interleave_flags_t Interleave, ///< [in] Type of interleaved layout the tensor addresses @@ -10850,8 +10850,8 @@ __urdlllocal ur_result_t UR_APICALL urTensorMapEncodeTiledExp( ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, ///< [in] L2 promotion size. ur_exp_tensor_map_oob_fill_flags_t - OobFill, ///< [in] Indicate whether zero or special NaN constant will be used to - ///< fill out-of-bound elements. + OobFill, ///< [in] Indicates whether zero or special NaN constant will be used to + ///< fill out-of-bounds elements. ur_exp_tensor_map_handle_t *hTensorMap ///< [out] Handle of the tensor map object. ) try { diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index 5b28fd9f30..b6be9b242f 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -9235,7 +9235,7 @@ __urdlllocal ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( ///< the TensorRank dimensions. const uint64_t * GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the - ///< tensorRank - 1 dimensions. + ///< TensorRank - 1 dimensions. const int * PixelBoxLowerCorner, ///< [in] Array containing DHW dimensions of lower box corner. const int * @@ -9243,7 +9243,7 @@ __urdlllocal ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( uint32_t ChannelsPerPixel, ///< [in] Number of channels per pixel. uint32_t PixelsPerColumn, ///< [in] Number of pixels per column. const uint32_t * - ElementStrides, ///< [in] Array containing traversal stride in each of the tensorRank + ElementStrides, ///< [in] Array containing traversal stride in each of the TensorRank ///< dimensions. ur_exp_tensor_map_interleave_flags_t Interleave, ///< [in] Type of interleaved layout the tensor addresses @@ -9252,8 +9252,8 @@ __urdlllocal ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, ///< [in] L2 promotion size. ur_exp_tensor_map_oob_fill_flags_t - OobFill, ///< [in] Indicate whether zero or special NaN constant will be used to - ///< fill out-of-bound elements. + OobFill, ///< [in] Indicates whether zero or special NaN constant will be used to + ///< fill out-of-bounds elements. ur_exp_tensor_map_handle_t *hTensorMap ///< [out] Handle of the tensor map object. ) { @@ -9318,13 +9318,13 @@ __urdlllocal ur_result_t UR_APICALL urTensorMapEncodeTiledExp( ///< the TensorRank dimensions. const uint64_t * GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the - ///< tensorRank - 1 dimensions. + ///< TensorRank - 1 dimensions. const uint32_t * BoxDim, ///< [in] Array containing traversal box size (number of elments) along - ///< each of the tensorRank dimensions. Specifies how many elements to be + ///< each of the TensorRank dimensions. Specifies how many elements to be ///< traversed along each tensor dimension. const uint32_t * - ElementStrides, ///< [in] Array containing traversal stride in each of the tensorRank + ElementStrides, ///< [in] Array containing traversal stride in each of the TensorRank ///< dimensions. ur_exp_tensor_map_interleave_flags_t Interleave, ///< [in] Type of interleaved layout the tensor addresses @@ -9333,8 +9333,8 @@ __urdlllocal ur_result_t UR_APICALL urTensorMapEncodeTiledExp( ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, ///< [in] L2 promotion size. ur_exp_tensor_map_oob_fill_flags_t - OobFill, ///< [in] Indicate whether zero or special NaN constant will be used to - ///< fill out-of-bound elements. + OobFill, ///< [in] Indicates whether zero or special NaN constant will be used to + ///< fill out-of-bounds elements. ur_exp_tensor_map_handle_t *hTensorMap ///< [out] Handle of the tensor map object. ) { diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index a46da9af2b..1701ee4725 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -10288,7 +10288,7 @@ __urdlllocal ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( ///< the TensorRank dimensions. const uint64_t * GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the - ///< tensorRank - 1 dimensions. + ///< TensorRank - 1 dimensions. const int * PixelBoxLowerCorner, ///< [in] Array containing DHW dimensions of lower box corner. const int * @@ -10296,7 +10296,7 @@ __urdlllocal ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( uint32_t ChannelsPerPixel, ///< [in] Number of channels per pixel. uint32_t PixelsPerColumn, ///< [in] Number of pixels per column. const uint32_t * - ElementStrides, ///< [in] Array containing traversal stride in each of the tensorRank + ElementStrides, ///< [in] Array containing traversal stride in each of the TensorRank ///< dimensions. ur_exp_tensor_map_interleave_flags_t Interleave, ///< [in] Type of interleaved layout the tensor addresses @@ -10305,8 +10305,8 @@ __urdlllocal ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, ///< [in] L2 promotion size. ur_exp_tensor_map_oob_fill_flags_t - OobFill, ///< [in] Indicate whether zero or special NaN constant will be used to - ///< fill out-of-bound elements. + OobFill, ///< [in] Indicates whether zero or special NaN constant will be used to + ///< fill out-of-bounds elements. ur_exp_tensor_map_handle_t *hTensorMap ///< [out] Handle of the tensor map object. ) { @@ -10369,6 +10369,10 @@ __urdlllocal ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( if (UR_EXP_TENSOR_MAP_OOB_FILL_FLAGS_MASK & OobFill) { return UR_RESULT_ERROR_INVALID_ENUMERATION; } + + if (TensorRank < 3) { + return UR_RESULT_ERROR_INVALID_ARGUMENT; + } } if (getContext()->enableLifetimeValidation && @@ -10399,13 +10403,13 @@ __urdlllocal ur_result_t UR_APICALL urTensorMapEncodeTiledExp( ///< the TensorRank dimensions. const uint64_t * GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the - ///< tensorRank - 1 dimensions. + ///< TensorRank - 1 dimensions. const uint32_t * BoxDim, ///< [in] Array containing traversal box size (number of elments) along - ///< each of the tensorRank dimensions. Specifies how many elements to be + ///< each of the TensorRank dimensions. Specifies how many elements to be ///< traversed along each tensor dimension. const uint32_t * - ElementStrides, ///< [in] Array containing traversal stride in each of the tensorRank + ElementStrides, ///< [in] Array containing traversal stride in each of the TensorRank ///< dimensions. ur_exp_tensor_map_interleave_flags_t Interleave, ///< [in] Type of interleaved layout the tensor addresses @@ -10414,8 +10418,8 @@ __urdlllocal ur_result_t UR_APICALL urTensorMapEncodeTiledExp( ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, ///< [in] L2 promotion size. ur_exp_tensor_map_oob_fill_flags_t - OobFill, ///< [in] Indicate whether zero or special NaN constant will be used to - ///< fill out-of-bound elements. + OobFill, ///< [in] Indicates whether zero or special NaN constant will be used to + ///< fill out-of-bounds elements. ur_exp_tensor_map_handle_t *hTensorMap ///< [out] Handle of the tensor map object. ) { @@ -10474,6 +10478,10 @@ __urdlllocal ur_result_t UR_APICALL urTensorMapEncodeTiledExp( if (UR_EXP_TENSOR_MAP_OOB_FILL_FLAGS_MASK & OobFill) { return UR_RESULT_ERROR_INVALID_ENUMERATION; } + + if (TensorRank < 3) { + return UR_RESULT_ERROR_INVALID_ARGUMENT; + } } if (getContext()->enableLifetimeValidation && diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index 598e92c311..2409738fbf 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -9378,7 +9378,7 @@ __urdlllocal ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( ///< the TensorRank dimensions. const uint64_t * GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the - ///< tensorRank - 1 dimensions. + ///< TensorRank - 1 dimensions. const int * PixelBoxLowerCorner, ///< [in] Array containing DHW dimensions of lower box corner. const int * @@ -9386,7 +9386,7 @@ __urdlllocal ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( uint32_t ChannelsPerPixel, ///< [in] Number of channels per pixel. uint32_t PixelsPerColumn, ///< [in] Number of pixels per column. const uint32_t * - ElementStrides, ///< [in] Array containing traversal stride in each of the tensorRank + ElementStrides, ///< [in] Array containing traversal stride in each of the TensorRank ///< dimensions. ur_exp_tensor_map_interleave_flags_t Interleave, ///< [in] Type of interleaved layout the tensor addresses @@ -9395,8 +9395,8 @@ __urdlllocal ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, ///< [in] L2 promotion size. ur_exp_tensor_map_oob_fill_flags_t - OobFill, ///< [in] Indicate whether zero or special NaN constant will be used to - ///< fill out-of-bound elements. + OobFill, ///< [in] Indicates whether zero or special NaN constant will be used to + ///< fill out-of-bounds elements. ur_exp_tensor_map_handle_t *hTensorMap ///< [out] Handle of the tensor map object. ) { @@ -9451,13 +9451,13 @@ __urdlllocal ur_result_t UR_APICALL urTensorMapEncodeTiledExp( ///< the TensorRank dimensions. const uint64_t * GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the - ///< tensorRank - 1 dimensions. + ///< TensorRank - 1 dimensions. const uint32_t * BoxDim, ///< [in] Array containing traversal box size (number of elments) along - ///< each of the tensorRank dimensions. Specifies how many elements to be + ///< each of the TensorRank dimensions. Specifies how many elements to be ///< traversed along each tensor dimension. const uint32_t * - ElementStrides, ///< [in] Array containing traversal stride in each of the tensorRank + ElementStrides, ///< [in] Array containing traversal stride in each of the TensorRank ///< dimensions. ur_exp_tensor_map_interleave_flags_t Interleave, ///< [in] Type of interleaved layout the tensor addresses @@ -9466,8 +9466,8 @@ __urdlllocal ur_result_t UR_APICALL urTensorMapEncodeTiledExp( ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, ///< [in] L2 promotion size. ur_exp_tensor_map_oob_fill_flags_t - OobFill, ///< [in] Indicate whether zero or special NaN constant will be used to - ///< fill out-of-bound elements. + OobFill, ///< [in] Indicates whether zero or special NaN constant will be used to + ///< fill out-of-bounds elements. ur_exp_tensor_map_handle_t *hTensorMap ///< [out] Handle of the tensor map object. ) { diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index 3129eec2f7..d83ec2e829 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -9578,6 +9578,8 @@ ur_result_t UR_APICALL urEnqueueNativeCommandExp( /// + `NULL == PixelBoxUpperCorner` /// + `NULL == ElementStrides` /// + `NULL == hTensorMap` +/// - ::UR_RESULT_ERROR_INVALID_ARGUMENT +/// + `TensorRank < 3` ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( ur_device_handle_t hDevice, ///< [in] Handle of the device object. ur_exp_tensor_map_data_type_flags_t @@ -9590,7 +9592,7 @@ ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( ///< the TensorRank dimensions. const uint64_t * GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the - ///< tensorRank - 1 dimensions. + ///< TensorRank - 1 dimensions. const int * PixelBoxLowerCorner, ///< [in] Array containing DHW dimensions of lower box corner. const int * @@ -9598,7 +9600,7 @@ ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( uint32_t ChannelsPerPixel, ///< [in] Number of channels per pixel. uint32_t PixelsPerColumn, ///< [in] Number of pixels per column. const uint32_t * - ElementStrides, ///< [in] Array containing traversal stride in each of the tensorRank + ElementStrides, ///< [in] Array containing traversal stride in each of the TensorRank ///< dimensions. ur_exp_tensor_map_interleave_flags_t Interleave, ///< [in] Type of interleaved layout the tensor addresses @@ -9607,8 +9609,8 @@ ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, ///< [in] L2 promotion size. ur_exp_tensor_map_oob_fill_flags_t - OobFill, ///< [in] Indicate whether zero or special NaN constant will be used to - ///< fill out-of-bound elements. + OobFill, ///< [in] Indicates whether zero or special NaN constant will be used to + ///< fill out-of-bounds elements. ur_exp_tensor_map_handle_t *hTensorMap ///< [out] Handle of the tensor map object. ) try { @@ -9653,6 +9655,8 @@ ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( /// + `NULL == BoxDim` /// + `NULL == ElementStrides` /// + `NULL == hTensorMap` +/// - ::UR_RESULT_ERROR_INVALID_ARGUMENT +/// + `TensorRank < 3` ur_result_t UR_APICALL urTensorMapEncodeTiledExp( ur_device_handle_t hDevice, ///< [in] Handle of the device object. ur_exp_tensor_map_data_type_flags_t @@ -9665,13 +9669,13 @@ ur_result_t UR_APICALL urTensorMapEncodeTiledExp( ///< the TensorRank dimensions. const uint64_t * GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the - ///< tensorRank - 1 dimensions. + ///< TensorRank - 1 dimensions. const uint32_t * BoxDim, ///< [in] Array containing traversal box size (number of elments) along - ///< each of the tensorRank dimensions. Specifies how many elements to be + ///< each of the TensorRank dimensions. Specifies how many elements to be ///< traversed along each tensor dimension. const uint32_t * - ElementStrides, ///< [in] Array containing traversal stride in each of the tensorRank + ElementStrides, ///< [in] Array containing traversal stride in each of the TensorRank ///< dimensions. ur_exp_tensor_map_interleave_flags_t Interleave, ///< [in] Type of interleaved layout the tensor addresses @@ -9680,8 +9684,8 @@ ur_result_t UR_APICALL urTensorMapEncodeTiledExp( ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, ///< [in] L2 promotion size. ur_exp_tensor_map_oob_fill_flags_t - OobFill, ///< [in] Indicate whether zero or special NaN constant will be used to - ///< fill out-of-bound elements. + OobFill, ///< [in] Indicates whether zero or special NaN constant will be used to + ///< fill out-of-bounds elements. ur_exp_tensor_map_handle_t *hTensorMap ///< [out] Handle of the tensor map object. ) try { diff --git a/source/ur_api.cpp b/source/ur_api.cpp index 5d1632ce18..7be7628651 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -8127,6 +8127,8 @@ ur_result_t UR_APICALL urEnqueueNativeCommandExp( /// + `NULL == PixelBoxUpperCorner` /// + `NULL == ElementStrides` /// + `NULL == hTensorMap` +/// - ::UR_RESULT_ERROR_INVALID_ARGUMENT +/// + `TensorRank < 3` ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( ur_device_handle_t hDevice, ///< [in] Handle of the device object. ur_exp_tensor_map_data_type_flags_t @@ -8139,7 +8141,7 @@ ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( ///< the TensorRank dimensions. const uint64_t * GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the - ///< tensorRank - 1 dimensions. + ///< TensorRank - 1 dimensions. const int * PixelBoxLowerCorner, ///< [in] Array containing DHW dimensions of lower box corner. const int * @@ -8147,7 +8149,7 @@ ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( uint32_t ChannelsPerPixel, ///< [in] Number of channels per pixel. uint32_t PixelsPerColumn, ///< [in] Number of pixels per column. const uint32_t * - ElementStrides, ///< [in] Array containing traversal stride in each of the tensorRank + ElementStrides, ///< [in] Array containing traversal stride in each of the TensorRank ///< dimensions. ur_exp_tensor_map_interleave_flags_t Interleave, ///< [in] Type of interleaved layout the tensor addresses @@ -8156,8 +8158,8 @@ ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, ///< [in] L2 promotion size. ur_exp_tensor_map_oob_fill_flags_t - OobFill, ///< [in] Indicate whether zero or special NaN constant will be used to - ///< fill out-of-bound elements. + OobFill, ///< [in] Indicates whether zero or special NaN constant will be used to + ///< fill out-of-bounds elements. ur_exp_tensor_map_handle_t *hTensorMap ///< [out] Handle of the tensor map object. ) { @@ -8191,6 +8193,8 @@ ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( /// + `NULL == BoxDim` /// + `NULL == ElementStrides` /// + `NULL == hTensorMap` +/// - ::UR_RESULT_ERROR_INVALID_ARGUMENT +/// + `TensorRank < 3` ur_result_t UR_APICALL urTensorMapEncodeTiledExp( ur_device_handle_t hDevice, ///< [in] Handle of the device object. ur_exp_tensor_map_data_type_flags_t @@ -8203,13 +8207,13 @@ ur_result_t UR_APICALL urTensorMapEncodeTiledExp( ///< the TensorRank dimensions. const uint64_t * GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the - ///< tensorRank - 1 dimensions. + ///< TensorRank - 1 dimensions. const uint32_t * BoxDim, ///< [in] Array containing traversal box size (number of elments) along - ///< each of the tensorRank dimensions. Specifies how many elements to be + ///< each of the TensorRank dimensions. Specifies how many elements to be ///< traversed along each tensor dimension. const uint32_t * - ElementStrides, ///< [in] Array containing traversal stride in each of the tensorRank + ElementStrides, ///< [in] Array containing traversal stride in each of the TensorRank ///< dimensions. ur_exp_tensor_map_interleave_flags_t Interleave, ///< [in] Type of interleaved layout the tensor addresses @@ -8218,8 +8222,8 @@ ur_result_t UR_APICALL urTensorMapEncodeTiledExp( ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, ///< [in] L2 promotion size. ur_exp_tensor_map_oob_fill_flags_t - OobFill, ///< [in] Indicate whether zero or special NaN constant will be used to - ///< fill out-of-bound elements. + OobFill, ///< [in] Indicates whether zero or special NaN constant will be used to + ///< fill out-of-bounds elements. ur_exp_tensor_map_handle_t *hTensorMap ///< [out] Handle of the tensor map object. ) { From e0635975b3861fe3dd5a034e5d0089e53586f1a5 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Mon, 15 Jul 2024 15:10:14 +0100 Subject: [PATCH 04/12] Add unsupported entry points to other adapters --- source/adapters/hip/CMakeLists.txt | 1 + source/adapters/hip/tensor_map.cpp | 29 +++++++++++++++++++++++ source/adapters/level_zero/CMakeLists.txt | 1 + source/adapters/level_zero/tensor_map.cpp | 28 ++++++++++++++++++++++ source/adapters/native_cpu/CMakeLists.txt | 1 + source/adapters/native_cpu/tensor_map.cpp | 29 +++++++++++++++++++++++ source/adapters/opencl/CMakeLists.txt | 1 + source/adapters/opencl/tensor_map.cpp | 29 +++++++++++++++++++++++ 8 files changed, 119 insertions(+) create mode 100644 source/adapters/hip/tensor_map.cpp create mode 100644 source/adapters/level_zero/tensor_map.cpp create mode 100644 source/adapters/native_cpu/tensor_map.cpp create mode 100644 source/adapters/opencl/tensor_map.cpp diff --git a/source/adapters/hip/CMakeLists.txt b/source/adapters/hip/CMakeLists.txt index 9113d7b1ca..36222907c6 100644 --- a/source/adapters/hip/CMakeLists.txt +++ b/source/adapters/hip/CMakeLists.txt @@ -86,6 +86,7 @@ add_ur_adapter(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/queue.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler.hpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/tensor_map.cpp ${CMAKE_CURRENT_SOURCE_DIR}/usm.cpp ${CMAKE_CURRENT_SOURCE_DIR}/usm_p2p.cpp ${CMAKE_CURRENT_SOURCE_DIR}/virtual_mem.cpp diff --git a/source/adapters/hip/tensor_map.cpp b/source/adapters/hip/tensor_map.cpp new file mode 100644 index 0000000000..59ab4932e5 --- /dev/null +++ b/source/adapters/hip/tensor_map.cpp @@ -0,0 +1,29 @@ +//===--------- tensor_map.cpp - HIP Adapter -------------------------------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( + ur_device_handle_t, ur_exp_tensor_map_data_type_flags_t, uint32_t, void *, + const uint64_t *, const uint64_t *, const int *, const int *, uint32_t, + uint32_t, const uint32_t *, ur_exp_tensor_map_interleave_flags_t, + ur_exp_tensor_map_swizzle_flags_t, ur_exp_tensor_map_l2_promotion_flags_t, + ur_exp_tensor_map_oob_fill_flags_t, ur_exp_tensor_map_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} +UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeTiledExp( + ur_device_handle_t, ur_exp_tensor_map_data_type_flags_t, uint32_t, void *, + const uint64_t *, const uint64_t *, const uint32_t *, const uint32_t *, + ur_exp_tensor_map_interleave_flags_t, ur_exp_tensor_map_swizzle_flags_t, + ur_exp_tensor_map_l2_promotion_flags_t, ur_exp_tensor_map_oob_fill_flags_t, + ur_exp_tensor_map_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + diff --git a/source/adapters/level_zero/CMakeLists.txt b/source/adapters/level_zero/CMakeLists.txt index 05a33c1224..4e81bbd738 100644 --- a/source/adapters/level_zero/CMakeLists.txt +++ b/source/adapters/level_zero/CMakeLists.txt @@ -45,6 +45,7 @@ if(UR_BUILD_ADAPTER_L0) ${CMAKE_CURRENT_SOURCE_DIR}/program.cpp ${CMAKE_CURRENT_SOURCE_DIR}/queue.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/tensor_map.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image.cpp ${CMAKE_CURRENT_SOURCE_DIR}/helpers/kernel_helpers.cpp ${CMAKE_CURRENT_SOURCE_DIR}/helpers/memory_helpers.cpp diff --git a/source/adapters/level_zero/tensor_map.cpp b/source/adapters/level_zero/tensor_map.cpp new file mode 100644 index 0000000000..60625cec94 --- /dev/null +++ b/source/adapters/level_zero/tensor_map.cpp @@ -0,0 +1,28 @@ +//===--------- tensor_map.cpp - L0 Adapter --------------------------------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( + ur_device_handle_t, ur_exp_tensor_map_data_type_flags_t, uint32_t, void *, + const uint64_t *, const uint64_t *, const int *, const int *, uint32_t, + uint32_t, const uint32_t *, ur_exp_tensor_map_interleave_flags_t, + ur_exp_tensor_map_swizzle_flags_t, ur_exp_tensor_map_l2_promotion_flags_t, + ur_exp_tensor_map_oob_fill_flags_t, ur_exp_tensor_map_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} +UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeTiledExp( + ur_device_handle_t, ur_exp_tensor_map_data_type_flags_t, uint32_t, void *, + const uint64_t *, const uint64_t *, const uint32_t *, const uint32_t *, + ur_exp_tensor_map_interleave_flags_t, ur_exp_tensor_map_swizzle_flags_t, + ur_exp_tensor_map_l2_promotion_flags_t, ur_exp_tensor_map_oob_fill_flags_t, + ur_exp_tensor_map_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/native_cpu/CMakeLists.txt b/source/adapters/native_cpu/CMakeLists.txt index 56cfc577d8..69f7fff6bd 100644 --- a/source/adapters/native_cpu/CMakeLists.txt +++ b/source/adapters/native_cpu/CMakeLists.txt @@ -34,6 +34,7 @@ add_ur_adapter(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/queue.cpp ${CMAKE_CURRENT_SOURCE_DIR}/queue.hpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/tensor_map.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ur_interface_loader.cpp ${CMAKE_CURRENT_SOURCE_DIR}/usm_p2p.cpp ${CMAKE_CURRENT_SOURCE_DIR}/virtual_mem.cpp diff --git a/source/adapters/native_cpu/tensor_map.cpp b/source/adapters/native_cpu/tensor_map.cpp new file mode 100644 index 0000000000..288d748ab6 --- /dev/null +++ b/source/adapters/native_cpu/tensor_map.cpp @@ -0,0 +1,29 @@ +//===--------- tensor_map.cpp - Native CPU Adapter ------------------------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( + ur_device_handle_t, ur_exp_tensor_map_data_type_flags_t, uint32_t, void *, + const uint64_t *, const uint64_t *, const int *, const int *, uint32_t, + uint32_t, const uint32_t *, ur_exp_tensor_map_interleave_flags_t, + ur_exp_tensor_map_swizzle_flags_t, ur_exp_tensor_map_l2_promotion_flags_t, + ur_exp_tensor_map_oob_fill_flags_t, ur_exp_tensor_map_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} +UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeTiledExp( + ur_device_handle_t, ur_exp_tensor_map_data_type_flags_t, uint32_t, void *, + const uint64_t *, const uint64_t *, const uint32_t *, const uint32_t *, + ur_exp_tensor_map_interleave_flags_t, ur_exp_tensor_map_swizzle_flags_t, + ur_exp_tensor_map_l2_promotion_flags_t, ur_exp_tensor_map_oob_fill_flags_t, + ur_exp_tensor_map_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + diff --git a/source/adapters/opencl/CMakeLists.txt b/source/adapters/opencl/CMakeLists.txt index a7e91f75e5..e091012bab 100644 --- a/source/adapters/opencl/CMakeLists.txt +++ b/source/adapters/opencl/CMakeLists.txt @@ -38,6 +38,7 @@ add_ur_adapter(${TARGET_NAME} SHARED ${CMAKE_CURRENT_SOURCE_DIR}/queue.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/usm.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/tensor_map.cpp ${CMAKE_CURRENT_SOURCE_DIR}/usm.cpp ${CMAKE_CURRENT_SOURCE_DIR}/usm_p2p.cpp ${CMAKE_CURRENT_SOURCE_DIR}/virtual_mem.cpp diff --git a/source/adapters/opencl/tensor_map.cpp b/source/adapters/opencl/tensor_map.cpp new file mode 100644 index 0000000000..b39aaf800a --- /dev/null +++ b/source/adapters/opencl/tensor_map.cpp @@ -0,0 +1,29 @@ +//===--------- tensor_map.cpp - OpenCL Adapter ----------------------------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( + ur_device_handle_t, ur_exp_tensor_map_data_type_flags_t, uint32_t, void *, + const uint64_t *, const uint64_t *, const int *, const int *, uint32_t, + uint32_t, const uint32_t *, ur_exp_tensor_map_interleave_flags_t, + ur_exp_tensor_map_swizzle_flags_t, ur_exp_tensor_map_l2_promotion_flags_t, + ur_exp_tensor_map_oob_fill_flags_t, ur_exp_tensor_map_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} +UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeTiledExp( + ur_device_handle_t, ur_exp_tensor_map_data_type_flags_t, uint32_t, void *, + const uint64_t *, const uint64_t *, const uint32_t *, const uint32_t *, + ur_exp_tensor_map_interleave_flags_t, ur_exp_tensor_map_swizzle_flags_t, + ur_exp_tensor_map_l2_promotion_flags_t, ur_exp_tensor_map_oob_fill_flags_t, + ur_exp_tensor_map_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + From 6e6059c20a307df0f14fd6b975dfd3e206c27923 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Mon, 15 Jul 2024 16:09:17 +0100 Subject: [PATCH 05/12] Clang format --- source/adapters/hip/tensor_map.cpp | 1 - source/adapters/native_cpu/tensor_map.cpp | 1 - source/adapters/opencl/tensor_map.cpp | 1 - 3 files changed, 3 deletions(-) diff --git a/source/adapters/hip/tensor_map.cpp b/source/adapters/hip/tensor_map.cpp index 59ab4932e5..348c4c9d05 100644 --- a/source/adapters/hip/tensor_map.cpp +++ b/source/adapters/hip/tensor_map.cpp @@ -26,4 +26,3 @@ UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeTiledExp( ur_exp_tensor_map_handle_t *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } - diff --git a/source/adapters/native_cpu/tensor_map.cpp b/source/adapters/native_cpu/tensor_map.cpp index 288d748ab6..eb9f01b318 100644 --- a/source/adapters/native_cpu/tensor_map.cpp +++ b/source/adapters/native_cpu/tensor_map.cpp @@ -26,4 +26,3 @@ UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeTiledExp( ur_exp_tensor_map_handle_t *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } - diff --git a/source/adapters/opencl/tensor_map.cpp b/source/adapters/opencl/tensor_map.cpp index b39aaf800a..ea2a009f88 100644 --- a/source/adapters/opencl/tensor_map.cpp +++ b/source/adapters/opencl/tensor_map.cpp @@ -26,4 +26,3 @@ UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeTiledExp( ur_exp_tensor_map_handle_t *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } - From ccde31ec616fa51b29e0d6f123bc9ad15bf9f0c6 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Fri, 25 Oct 2024 16:38:46 +0100 Subject: [PATCH 06/12] Put UR entry points in ur::level_zero Fixes missing symbol at linking for static build of L0 adapter. --- source/adapters/level_zero/tensor_map.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/source/adapters/level_zero/tensor_map.cpp b/source/adapters/level_zero/tensor_map.cpp index 60625cec94..91d6498540 100644 --- a/source/adapters/level_zero/tensor_map.cpp +++ b/source/adapters/level_zero/tensor_map.cpp @@ -10,7 +10,9 @@ #include -UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( +namespace ur::level_zero { + +ur_result_t urTensorMapEncodeIm2ColExp( ur_device_handle_t, ur_exp_tensor_map_data_type_flags_t, uint32_t, void *, const uint64_t *, const uint64_t *, const int *, const int *, uint32_t, uint32_t, const uint32_t *, ur_exp_tensor_map_interleave_flags_t, @@ -18,7 +20,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( ur_exp_tensor_map_oob_fill_flags_t, ur_exp_tensor_map_handle_t *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeTiledExp( + +ur_result_t urTensorMapEncodeTiledExp( ur_device_handle_t, ur_exp_tensor_map_data_type_flags_t, uint32_t, void *, const uint64_t *, const uint64_t *, const uint32_t *, const uint32_t *, ur_exp_tensor_map_interleave_flags_t, ur_exp_tensor_map_swizzle_flags_t, @@ -26,3 +29,4 @@ UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeTiledExp( ur_exp_tensor_map_handle_t *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +} // namespace ur::level_zero From 837aa279cc3415f9f6ef3481e9c80bdec5078cd9 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Thu, 7 Nov 2024 17:40:34 +0000 Subject: [PATCH 07/12] Add ProcAddrTable Entry points --- source/adapters/cuda/ur_interface_loader.cpp | 13 +++++++++++++ source/adapters/hip/ur_interface_loader.cpp | 13 +++++++++++++ source/adapters/native_cpu/ur_interface_loader.cpp | 13 +++++++++++++ source/adapters/opencl/ur_interface_loader.cpp | 13 +++++++++++++ 4 files changed, 52 insertions(+) diff --git a/source/adapters/cuda/ur_interface_loader.cpp b/source/adapters/cuda/ur_interface_loader.cpp index 4b13e6669c..cea4707a05 100644 --- a/source/adapters/cuda/ur_interface_loader.cpp +++ b/source/adapters/cuda/ur_interface_loader.cpp @@ -434,6 +434,19 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable( return UR_RESULT_SUCCESS; } +UR_APIEXPORT ur_result_t UR_APICALL urGetTensorMapExpProcAddrTable( + ur_api_version_t version, ur_tensor_map_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } + + pDdiTable->pfnEncodeIm2ColExp = urTensorMapEncodeIm2ColExp; + pDdiTable->pfnEncodeTiledExp = urTensorMapEncodeTiledExp; + + return result; +} + UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( ur_api_version_t version, ur_program_exp_dditable_t *pDdiTable) { auto result = validateProcInputs(version, pDdiTable); diff --git a/source/adapters/hip/ur_interface_loader.cpp b/source/adapters/hip/ur_interface_loader.cpp index f7ec09188f..2c9df55bb6 100644 --- a/source/adapters/hip/ur_interface_loader.cpp +++ b/source/adapters/hip/ur_interface_loader.cpp @@ -400,6 +400,19 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable( return UR_RESULT_SUCCESS; } +UR_APIEXPORT ur_result_t UR_APICALL urGetTensorMapExpProcAddrTable( + ur_api_version_t version, ur_tensor_map_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } + + pDdiTable->pfnEncodeIm2ColExp = urTensorMapEncodeIm2ColExp; + pDdiTable->pfnEncodeTiledExp = urTensorMapEncodeTiledExp; + + return result; +} + UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( ur_api_version_t version, ur_program_exp_dditable_t *pDdiTable) { auto result = validateProcInputs(version, pDdiTable); diff --git a/source/adapters/native_cpu/ur_interface_loader.cpp b/source/adapters/native_cpu/ur_interface_loader.cpp index 9717f020c3..55b1e6a568 100644 --- a/source/adapters/native_cpu/ur_interface_loader.cpp +++ b/source/adapters/native_cpu/ur_interface_loader.cpp @@ -418,6 +418,19 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable( return UR_RESULT_SUCCESS; } +UR_APIEXPORT ur_result_t UR_APICALL urGetTensorMapExpProcAddrTable( + ur_api_version_t version, ur_tensor_map_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } + + pDdiTable->pfnEncodeIm2ColExp = urTensorMapEncodeIm2ColExp; + pDdiTable->pfnEncodeTiledExp = urTensorMapEncodeTiledExp; + + return result; +} + UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( ur_api_version_t version, ur_program_exp_dditable_t *pDdiTable) { auto result = validateProcInputs(version, pDdiTable); diff --git a/source/adapters/opencl/ur_interface_loader.cpp b/source/adapters/opencl/ur_interface_loader.cpp index 46d2bf6cdd..d51c27f6cc 100644 --- a/source/adapters/opencl/ur_interface_loader.cpp +++ b/source/adapters/opencl/ur_interface_loader.cpp @@ -426,6 +426,19 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable( return UR_RESULT_SUCCESS; } +UR_APIEXPORT ur_result_t UR_APICALL urGetTensorMapExpProcAddrTable( + ur_api_version_t version, ur_tensor_map_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } + + pDdiTable->pfnEncodeIm2ColExp = urTensorMapEncodeIm2ColExp; + pDdiTable->pfnEncodeTiledExp = urTensorMapEncodeTiledExp; + + return result; +} + UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( ur_api_version_t version, ur_program_exp_dditable_t *pDdiTable) { auto result = validateProcInputs(version, pDdiTable); From b1a32860fd8ad8ded8656d6b5956e5a35aa38b98 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Wed, 20 Nov 2024 14:47:20 +0000 Subject: [PATCH 08/12] Fix bad merge conflicts resolution --- include/ur_api.h | 4 ++-- include/ur_print.hpp | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/include/ur_api.h b/include/ur_api.h index 8d4e6e5972..bb1a1bed3f 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -231,8 +231,8 @@ typedef enum ur_function_t { UR_FUNCTION_COMMAND_BUFFER_UPDATE_WAIT_EVENTS_EXP = 244, ///< Enumerator for ::urCommandBufferUpdateWaitEventsExp UR_FUNCTION_BINDLESS_IMAGES_MAP_EXTERNAL_LINEAR_MEMORY_EXP = 245, ///< Enumerator for ::urBindlessImagesMapExternalLinearMemoryExp UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT = 246, ///< Enumerator for ::urEnqueueEventsWaitWithBarrierExt - UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP = 230, ///< Enumerator for ::urTensorMapEncodeIm2ColExp - UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP = 231, ///< Enumerator for ::urTensorMapEncodeTiledExp + UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP = 247, ///< Enumerator for ::urTensorMapEncodeIm2ColExp + UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP = 248, ///< Enumerator for ::urTensorMapEncodeTiledExp /// @cond UR_FUNCTION_FORCE_UINT32 = 0x7fffffff /// @endcond diff --git a/include/ur_print.hpp b/include/ur_print.hpp index dafe882726..1acde66f4f 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -987,6 +987,7 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value) { break; case UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT: os << "UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT"; + break; case UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP: os << "UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP"; break; From 2f5ff276a9f47e7d08995079b9f8fcf13469264c Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Tue, 3 Dec 2024 14:53:25 +0000 Subject: [PATCH 09/12] Add clarifications in extension documentation --- scripts/core/EXP-TENSOR-MAP.rst | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/scripts/core/EXP-TENSOR-MAP.rst b/scripts/core/EXP-TENSOR-MAP.rst index 3679f3cfd1..15a6802363 100644 --- a/scripts/core/EXP-TENSOR-MAP.rst +++ b/scripts/core/EXP-TENSOR-MAP.rst @@ -23,8 +23,14 @@ Tensor Mapping APIs Motivation -------------------------------------------------------------------------------- -Used to target the CUDA entry points cuTensorMapEncodeIm2col and -cuTensorMapEncodeTiled. +Used to target the CUDA entry points ``cuTensorMapEncodeIm2col`` and +``cuTensorMapEncodeTiled``. + +For some tensor core operations on ``sm_90+`` Nvidia devices, a tensor +descriptor must be built on the host and passed to the kernel. The interfaces +mentioned above, and mapped to UR in this extension, provide the APIs necessary +to create these tensor descriptor objects, that can then be passed to the +kernels. API -------------------------------------------------------------------------------- @@ -61,7 +67,7 @@ Changelog Support -------------------------------------------------------------------------------- -This is only supported in the CUDA adapter. +This extension is only supported on the ``UR_PLATFORM_BACKEND_CUDA`` backend. Contributors -------------------------------------------------------------------------------- From 8c4366f13af4ee4b0a37169132cd79a91a3775cd Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Tue, 3 Dec 2024 15:31:43 +0000 Subject: [PATCH 10/12] Fix formatting --- include/ur_api.h | 6 ++-- include/ur_api_funcs.def | 2 ++ include/ur_print.hpp | 12 ++++---- scripts/core/registry.yml | 12 ++++---- .../level_zero/ur_interface_loader.cpp | 17 +++++++++++ .../level_zero/ur_interface_loader.hpp | 24 ++++++++++++++++ source/loader/layers/tracing/ur_trcddi.cpp | 28 ++++++++++++------- source/loader/ur_ldrddi.cpp | 5 ++++ 8 files changed, 81 insertions(+), 25 deletions(-) diff --git a/include/ur_api.h b/include/ur_api.h index bb1a1bed3f..68c5032460 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -215,7 +215,9 @@ typedef enum ur_function_t { UR_FUNCTION_ENQUEUE_NATIVE_COMMAND_EXP = 228, ///< Enumerator for ::urEnqueueNativeCommandExp UR_FUNCTION_LOADER_CONFIG_SET_MOCKING_ENABLED = 229, ///< Enumerator for ::urLoaderConfigSetMockingEnabled UR_FUNCTION_BINDLESS_IMAGES_RELEASE_EXTERNAL_MEMORY_EXP = 230, ///< Enumerator for ::urBindlessImagesReleaseExternalMemoryExp + UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP = 230, ///< Enumerator for ::urTensorMapEncodeIm2ColExp UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_MEMCPY_EXP = 231, ///< Enumerator for ::urCommandBufferAppendUSMMemcpyExp + UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP = 231, ///< Enumerator for ::urTensorMapEncodeTiledExp UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_FILL_EXP = 232, ///< Enumerator for ::urCommandBufferAppendUSMFillExp UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_EXP = 233, ///< Enumerator for ::urCommandBufferAppendMemBufferCopyExp UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_EXP = 234, ///< Enumerator for ::urCommandBufferAppendMemBufferWriteExp @@ -231,8 +233,6 @@ typedef enum ur_function_t { UR_FUNCTION_COMMAND_BUFFER_UPDATE_WAIT_EVENTS_EXP = 244, ///< Enumerator for ::urCommandBufferUpdateWaitEventsExp UR_FUNCTION_BINDLESS_IMAGES_MAP_EXTERNAL_LINEAR_MEMORY_EXP = 245, ///< Enumerator for ::urBindlessImagesMapExternalLinearMemoryExp UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT = 246, ///< Enumerator for ::urEnqueueEventsWaitWithBarrierExt - UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP = 247, ///< Enumerator for ::urTensorMapEncodeIm2ColExp - UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP = 248, ///< Enumerator for ::urTensorMapEncodeTiledExp /// @cond UR_FUNCTION_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -10168,7 +10168,7 @@ urEnqueueNativeCommandExp( #endif // Intel 'oneAPI' Unified Runtime Experimental API for mapping tensor objects #if !defined(__GNUC__) -#pragma region tensor map(experimental) +#pragma region tensor_map_(experimental) #endif /////////////////////////////////////////////////////////////////////////////// /// @brief Handle of tensor map object diff --git a/include/ur_api_funcs.def b/include/ur_api_funcs.def index 4920245369..5279534547 100644 --- a/include/ur_api_funcs.def +++ b/include/ur_api_funcs.def @@ -185,6 +185,8 @@ _UR_API(urCommandBufferUpdateSignalEventExp) _UR_API(urCommandBufferUpdateWaitEventsExp) _UR_API(urCommandBufferGetInfoExp) _UR_API(urCommandBufferCommandGetInfoExp) +_UR_API(urTensorMapEncodeIm2ColExp) +_UR_API(urTensorMapEncodeTiledExp) _UR_API(urUsmP2PEnablePeerAccessExp) _UR_API(urUsmP2PDisablePeerAccessExp) _UR_API(urUsmP2PPeerAccessGetInfoExp) diff --git a/include/ur_print.hpp b/include/ur_print.hpp index 1acde66f4f..cd6bc2ffe0 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -940,9 +940,15 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value) { case UR_FUNCTION_BINDLESS_IMAGES_RELEASE_EXTERNAL_MEMORY_EXP: os << "UR_FUNCTION_BINDLESS_IMAGES_RELEASE_EXTERNAL_MEMORY_EXP"; break; + case UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP: + os << "UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP"; + break; case UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_MEMCPY_EXP: os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_MEMCPY_EXP"; break; + case UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP: + os << "UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP"; + break; case UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_FILL_EXP: os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_FILL_EXP"; break; @@ -988,12 +994,6 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value) { case UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT: os << "UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT"; break; - case UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP: - os << "UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP"; - break; - case UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP: - os << "UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP"; - break; default: os << "unknown enumerator"; break; diff --git a/scripts/core/registry.yml b/scripts/core/registry.yml index 059e23c2a0..6d7eaef77c 100644 --- a/scripts/core/registry.yml +++ b/scripts/core/registry.yml @@ -559,9 +559,15 @@ etors: - name: BINDLESS_IMAGES_RELEASE_EXTERNAL_MEMORY_EXP desc: Enumerator for $xBindlessImagesReleaseExternalMemoryExp value: '230' +- name: TENSOR_MAP_ENCODE_IM_2_COL_EXP + desc: Enumerator for $xTensorMapEncodeIm2ColExp + value: '230' - name: COMMAND_BUFFER_APPEND_USM_MEMCPY_EXP desc: Enumerator for $xCommandBufferAppendUSMMemcpyExp value: '231' +- name: TENSOR_MAP_ENCODE_TILED_EXP + desc: Enumerator for $xTensorMapEncodeTiledExp + value: '231' - name: COMMAND_BUFFER_APPEND_USM_FILL_EXP desc: Enumerator for $xCommandBufferAppendUSMFillExp value: '232' @@ -607,12 +613,6 @@ etors: - name: ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT desc: Enumerator for $xEnqueueEventsWaitWithBarrierExt value: '246' -- name: TENSOR_MAP_ENCODE_IM_2_COL_EXP - desc: Enumerator for $xTensorMapEncodeIm2ColExp - value: '230' -- name: TENSOR_MAP_ENCODE_TILED_EXP - desc: Enumerator for $xTensorMapEncodeTiledExp - value: '231' --- type: enum desc: Defines structure types diff --git a/source/adapters/level_zero/ur_interface_loader.cpp b/source/adapters/level_zero/ur_interface_loader.cpp index 0a36b3ecad..1d9c8d5c37 100644 --- a/source/adapters/level_zero/ur_interface_loader.cpp +++ b/source/adapters/level_zero/ur_interface_loader.cpp @@ -423,6 +423,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable( return result; } +UR_APIEXPORT ur_result_t UR_APICALL urGetTensorMapExpProcAddrTable( + ur_api_version_t version, ur_tensor_map_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } + + pDdiTable->pfnEncodeIm2ColExp = ur::level_zero::urTensorMapEncodeIm2ColExp; + pDdiTable->pfnEncodeTiledExp = ur::level_zero::urTensorMapEncodeTiledExp; + + return result; +} + UR_APIEXPORT ur_result_t UR_APICALL urGetUSMProcAddrTable(ur_api_version_t version, ur_usm_dditable_t *pDdiTable) { auto result = validateProcInputs(version, pDdiTable); @@ -594,6 +607,10 @@ ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi) { &ddi->Sampler); if (result != UR_RESULT_SUCCESS) return result; + result = ur::level_zero::urGetTensorMapExpProcAddrTable( + UR_API_VERSION_CURRENT, &ddi->TensorMapExp); + if (result != UR_RESULT_SUCCESS) + return result; result = ur::level_zero::urGetUSMProcAddrTable(UR_API_VERSION_CURRENT, &ddi->USM); if (result != UR_RESULT_SUCCESS) diff --git a/source/adapters/level_zero/ur_interface_loader.hpp b/source/adapters/level_zero/ur_interface_loader.hpp index 1215d6449e..bebba18e6d 100644 --- a/source/adapters/level_zero/ur_interface_loader.hpp +++ b/source/adapters/level_zero/ur_interface_loader.hpp @@ -735,6 +735,30 @@ ur_result_t urEnqueueNativeCommandExp( const ur_exp_enqueue_native_command_properties_t *pProperties, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t urTensorMapEncodeIm2ColExp( + ur_device_handle_t hDevice, + ur_exp_tensor_map_data_type_flags_t TensorMapType, uint32_t TensorRank, + void *GlobalAddress, const uint64_t *GlobalDim, + const uint64_t *GlobalStrides, const int *PixelBoxLowerCorner, + const int *PixelBoxUpperCorner, uint32_t ChannelsPerPixel, + uint32_t PixelsPerColumn, const uint32_t *ElementStrides, + ur_exp_tensor_map_interleave_flags_t Interleave, + ur_exp_tensor_map_swizzle_flags_t Swizzle, + ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, + ur_exp_tensor_map_oob_fill_flags_t OobFill, + ur_exp_tensor_map_handle_t *hTensorMap); +ur_result_t +urTensorMapEncodeTiledExp(ur_device_handle_t hDevice, + ur_exp_tensor_map_data_type_flags_t TensorMapType, + uint32_t TensorRank, void *GlobalAddress, + const uint64_t *GlobalDim, + const uint64_t *GlobalStrides, const uint32_t *BoxDim, + const uint32_t *ElementStrides, + ur_exp_tensor_map_interleave_flags_t Interleave, + ur_exp_tensor_map_swizzle_flags_t Swizzle, + ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, + ur_exp_tensor_map_oob_fill_flags_t OobFill, + ur_exp_tensor_map_handle_t *hTensorMap); #ifdef UR_STATIC_ADAPTER_LEVEL_ZERO ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi); #endif diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index b6be9b242f..5c3d67dbdc 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -9284,7 +9284,8 @@ __urdlllocal ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( getContext()->notify_begin(UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP, "urTensorMapEncodeIm2ColExp", ¶ms); - getContext()->logger.info("---> urTensorMapEncodeIm2ColExp"); + auto &logger = getContext()->logger; + logger.info(" ---> urTensorMapEncodeIm2ColExp\n"); ur_result_t result = pfnEncodeIm2ColExp( hDevice, TensorMapType, TensorRank, GlobalAddress, GlobalDim, @@ -9296,10 +9297,13 @@ __urdlllocal ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( "urTensorMapEncodeIm2ColExp", ¶ms, &result, instance); - std::ostringstream args_str; - ur::extras::printFunctionParams( - args_str, UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP, ¶ms); - getContext()->logger.info("({}) -> {};\n", args_str.str(), result); + if (logger.getLevel() <= logger::Level::INFO) { + std::ostringstream args_str; + ur::extras::printFunctionParams( + args_str, UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP, ¶ms); + logger.info(" <--- urTensorMapEncodeIm2ColExp({}) -> {};\n", + args_str.str(), result); + } return result; } @@ -9354,7 +9358,8 @@ __urdlllocal ur_result_t UR_APICALL urTensorMapEncodeTiledExp( getContext()->notify_begin(UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP, "urTensorMapEncodeTiledExp", ¶ms); - getContext()->logger.info("---> urTensorMapEncodeTiledExp"); + auto &logger = getContext()->logger; + logger.info(" ---> urTensorMapEncodeTiledExp\n"); ur_result_t result = pfnEncodeTiledExp( hDevice, TensorMapType, TensorRank, GlobalAddress, GlobalDim, @@ -9365,10 +9370,13 @@ __urdlllocal ur_result_t UR_APICALL urTensorMapEncodeTiledExp( "urTensorMapEncodeTiledExp", ¶ms, &result, instance); - std::ostringstream args_str; - ur::extras::printFunctionParams( - args_str, UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP, ¶ms); - getContext()->logger.info("({}) -> {};\n", args_str.str(), result); + if (logger.getLevel() <= logger::Level::INFO) { + std::ostringstream args_str; + ur::extras::printFunctionParams( + args_str, UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP, ¶ms); + logger.info(" <--- urTensorMapEncodeTiledExp({}) -> {};\n", + args_str.str(), result); + } return result; } diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index 2409738fbf..d152e63dc8 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -10693,6 +10693,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetTensorMapExpProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } From 32cc0d9fb11427f402ff87d971f929e5e49f8f93 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Tue, 3 Dec 2024 15:44:55 +0000 Subject: [PATCH 11/12] Fix enum ordering for tensor map --- include/ur_api.h | 4 ++-- include/ur_print.hpp | 12 ++++++------ scripts/core/registry.yml | 12 ++++++------ 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/include/ur_api.h b/include/ur_api.h index 68c5032460..2f3d535610 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -215,9 +215,7 @@ typedef enum ur_function_t { UR_FUNCTION_ENQUEUE_NATIVE_COMMAND_EXP = 228, ///< Enumerator for ::urEnqueueNativeCommandExp UR_FUNCTION_LOADER_CONFIG_SET_MOCKING_ENABLED = 229, ///< Enumerator for ::urLoaderConfigSetMockingEnabled UR_FUNCTION_BINDLESS_IMAGES_RELEASE_EXTERNAL_MEMORY_EXP = 230, ///< Enumerator for ::urBindlessImagesReleaseExternalMemoryExp - UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP = 230, ///< Enumerator for ::urTensorMapEncodeIm2ColExp UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_MEMCPY_EXP = 231, ///< Enumerator for ::urCommandBufferAppendUSMMemcpyExp - UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP = 231, ///< Enumerator for ::urTensorMapEncodeTiledExp UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_FILL_EXP = 232, ///< Enumerator for ::urCommandBufferAppendUSMFillExp UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_COPY_EXP = 233, ///< Enumerator for ::urCommandBufferAppendMemBufferCopyExp UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_EXP = 234, ///< Enumerator for ::urCommandBufferAppendMemBufferWriteExp @@ -233,6 +231,8 @@ typedef enum ur_function_t { UR_FUNCTION_COMMAND_BUFFER_UPDATE_WAIT_EVENTS_EXP = 244, ///< Enumerator for ::urCommandBufferUpdateWaitEventsExp UR_FUNCTION_BINDLESS_IMAGES_MAP_EXTERNAL_LINEAR_MEMORY_EXP = 245, ///< Enumerator for ::urBindlessImagesMapExternalLinearMemoryExp UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT = 246, ///< Enumerator for ::urEnqueueEventsWaitWithBarrierExt + UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP = 247, ///< Enumerator for ::urTensorMapEncodeIm2ColExp + UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP = 248, ///< Enumerator for ::urTensorMapEncodeTiledExp /// @cond UR_FUNCTION_FORCE_UINT32 = 0x7fffffff /// @endcond diff --git a/include/ur_print.hpp b/include/ur_print.hpp index cd6bc2ffe0..1acde66f4f 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -940,15 +940,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value) { case UR_FUNCTION_BINDLESS_IMAGES_RELEASE_EXTERNAL_MEMORY_EXP: os << "UR_FUNCTION_BINDLESS_IMAGES_RELEASE_EXTERNAL_MEMORY_EXP"; break; - case UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP: - os << "UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP"; - break; case UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_MEMCPY_EXP: os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_MEMCPY_EXP"; break; - case UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP: - os << "UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP"; - break; case UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_FILL_EXP: os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_FILL_EXP"; break; @@ -994,6 +988,12 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value) { case UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT: os << "UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT"; break; + case UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP: + os << "UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP"; + break; + case UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP: + os << "UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP"; + break; default: os << "unknown enumerator"; break; diff --git a/scripts/core/registry.yml b/scripts/core/registry.yml index 6d7eaef77c..f1a5d9199f 100644 --- a/scripts/core/registry.yml +++ b/scripts/core/registry.yml @@ -559,15 +559,9 @@ etors: - name: BINDLESS_IMAGES_RELEASE_EXTERNAL_MEMORY_EXP desc: Enumerator for $xBindlessImagesReleaseExternalMemoryExp value: '230' -- name: TENSOR_MAP_ENCODE_IM_2_COL_EXP - desc: Enumerator for $xTensorMapEncodeIm2ColExp - value: '230' - name: COMMAND_BUFFER_APPEND_USM_MEMCPY_EXP desc: Enumerator for $xCommandBufferAppendUSMMemcpyExp value: '231' -- name: TENSOR_MAP_ENCODE_TILED_EXP - desc: Enumerator for $xTensorMapEncodeTiledExp - value: '231' - name: COMMAND_BUFFER_APPEND_USM_FILL_EXP desc: Enumerator for $xCommandBufferAppendUSMFillExp value: '232' @@ -613,6 +607,12 @@ etors: - name: ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT desc: Enumerator for $xEnqueueEventsWaitWithBarrierExt value: '246' +- name: TENSOR_MAP_ENCODE_IM_2_COL_EXP + desc: Enumerator for $xTensorMapEncodeIm2ColExp + value: '247' +- name: TENSOR_MAP_ENCODE_TILED_EXP + desc: Enumerator for $xTensorMapEncodeTiledExp + value: '248' --- type: enum desc: Defines structure types From 72b5730061b86024078e9440c7e645e7a3904c24 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Wed, 4 Dec 2024 10:48:43 +0000 Subject: [PATCH 12/12] Add tensormap stubs to L0 v2 --- source/adapters/level_zero/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/source/adapters/level_zero/CMakeLists.txt b/source/adapters/level_zero/CMakeLists.txt index 4e81bbd738..cb7e0281af 100644 --- a/source/adapters/level_zero/CMakeLists.txt +++ b/source/adapters/level_zero/CMakeLists.txt @@ -140,6 +140,7 @@ if(UR_BUILD_ADAPTER_L0_V2) ${CMAKE_CURRENT_SOURCE_DIR}/helpers/memory_helpers.cpp ${CMAKE_CURRENT_SOURCE_DIR}/usm_p2p.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/tensor_map.cpp # v2-only sources ${CMAKE_CURRENT_SOURCE_DIR}/v2/command_list_cache.hpp ${CMAKE_CURRENT_SOURCE_DIR}/v2/context.hpp