diff --git a/include/ur_api.h b/include/ur_api.h index eb8b07221c..2f3d535610 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -231,6 +231,8 @@ typedef enum ur_function_t { UR_FUNCTION_COMMAND_BUFFER_UPDATE_WAIT_EVENTS_EXP = 244, ///< Enumerator for ::urCommandBufferUpdateWaitEventsExp UR_FUNCTION_BINDLESS_IMAGES_MAP_EXTERNAL_LINEAR_MEMORY_EXP = 245, ///< Enumerator for ::urBindlessImagesMapExternalLinearMemoryExp UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT = 246, ///< Enumerator for ::urEnqueueEventsWaitWithBarrierExt + UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP = 247, ///< Enumerator for ::urTensorMapEncodeIm2ColExp + UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP = 248, ///< Enumerator for ::urTensorMapEncodeTiledExp /// @cond UR_FUNCTION_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -10161,6 +10163,207 @@ urEnqueueNativeCommandExp( ///< not NULL, phEvent must not refer to an element of the phEventWaitList array. ); +#if !defined(__GNUC__) +#pragma endregion +#endif +// Intel 'oneAPI' Unified Runtime Experimental API for mapping tensor objects +#if !defined(__GNUC__) +#pragma region tensor_map_(experimental) +#endif +/////////////////////////////////////////////////////////////////////////////// +/// @brief Handle of tensor map object +typedef struct ur_exp_tensor_map_handle_t_ *ur_exp_tensor_map_handle_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Tensor map data type +typedef uint32_t ur_exp_tensor_map_data_type_flags_t; +typedef enum ur_exp_tensor_map_data_type_flag_t { + UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT8 = UR_BIT(0), ///< 1 byte + UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT16 = UR_BIT(1), ///< 2 bytes + UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT32 = UR_BIT(2), ///< 4 bytes + UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT32 = UR_BIT(3), ///< 4 bytes + UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT64 = UR_BIT(4), ///< 8 bytes + UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT64 = UR_BIT(5), ///< 8 bytes + UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT16 = UR_BIT(6), ///< 2 bytes + UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32 = UR_BIT(7), ///< 4 bytes + UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT64 = UR_BIT(8), ///< 8 bytes + UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_BFLOAT16 = UR_BIT(9), ///< 2 bytes + UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32_FTZ = UR_BIT(10), ///< 4 bytes + UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32 = UR_BIT(11), ///< 4 bytes + UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32_FTZ = UR_BIT(12), ///< 4 bytes + /// @cond + UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FORCE_UINT32 = 0x7fffffff + /// @endcond + +} ur_exp_tensor_map_data_type_flag_t; +/// @brief Bit Mask for validating ur_exp_tensor_map_data_type_flags_t +#define UR_EXP_TENSOR_MAP_DATA_TYPE_FLAGS_MASK 0xffffe000 + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Tensor map interleave +typedef uint32_t ur_exp_tensor_map_interleave_flags_t; +typedef enum ur_exp_tensor_map_interleave_flag_t { + UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_NONE = UR_BIT(0), ///< No interleave + UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_16B = UR_BIT(1), ///< 16B interleave + UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_32B = UR_BIT(2), ///< 32B interleave + /// @cond + UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_FORCE_UINT32 = 0x7fffffff + /// @endcond + +} ur_exp_tensor_map_interleave_flag_t; +/// @brief Bit Mask for validating ur_exp_tensor_map_interleave_flags_t +#define UR_EXP_TENSOR_MAP_INTERLEAVE_FLAGS_MASK 0xfffffff8 + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Tensor map l2 promotion +typedef uint32_t ur_exp_tensor_map_l2_promotion_flags_t; +typedef enum ur_exp_tensor_map_l2_promotion_flag_t { + UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_NONE = UR_BIT(0), ///< No promotion type + UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_64B = UR_BIT(1), ///< 64B promotion type + UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_128B = UR_BIT(2), ///< 128B promotion type + UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_256B = UR_BIT(3), ///< 256B promotion type + /// @cond + UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_FORCE_UINT32 = 0x7fffffff + /// @endcond + +} ur_exp_tensor_map_l2_promotion_flag_t; +/// @brief Bit Mask for validating ur_exp_tensor_map_l2_promotion_flags_t +#define UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAGS_MASK 0xfffffff0 + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Tensor map swizzle +typedef uint32_t ur_exp_tensor_map_swizzle_flags_t; +typedef enum ur_exp_tensor_map_swizzle_flag_t { + UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_NONE = UR_BIT(0), ///< No swizzle + UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_32B = UR_BIT(1), ///< 32B swizzle + UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_64B = UR_BIT(2), ///< 64B swizzle + UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_128B = UR_BIT(3), ///< 128B swizzle + /// @cond + UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_FORCE_UINT32 = 0x7fffffff + /// @endcond + +} ur_exp_tensor_map_swizzle_flag_t; +/// @brief Bit Mask for validating ur_exp_tensor_map_swizzle_flags_t +#define UR_EXP_TENSOR_MAP_SWIZZLE_FLAGS_MASK 0xfffffff0 + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Tensor map OOB fill +typedef uint32_t ur_exp_tensor_map_oob_fill_flags_t; +typedef enum ur_exp_tensor_map_oob_fill_flag_t { + UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_NONE = UR_BIT(0), ///< No OOB fill + UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_REQUEST_ZERO_FMA = UR_BIT(1), ///< Refer to NVIDIA docs + /// @cond + UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_FORCE_UINT32 = 0x7fffffff + /// @endcond + +} ur_exp_tensor_map_oob_fill_flag_t; +/// @brief Bit Mask for validating ur_exp_tensor_map_oob_fill_flags_t +#define UR_EXP_TENSOR_MAP_OOB_FILL_FLAGS_MASK 0xfffffffc + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Encode tensor map with image data +/// +/// @details +/// - Map encode using im2col. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_EXP_TENSOR_MAP_DATA_TYPE_FLAGS_MASK & TensorMapType` +/// + `::UR_EXP_TENSOR_MAP_INTERLEAVE_FLAGS_MASK & Interleave` +/// + `::UR_EXP_TENSOR_MAP_SWIZZLE_FLAGS_MASK & Swizzle` +/// + `::UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAGS_MASK & L2Promotion` +/// + `::UR_EXP_TENSOR_MAP_OOB_FILL_FLAGS_MASK & OobFill` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == GlobalAddress` +/// + `NULL == GlobalDim` +/// + `NULL == GlobalStrides` +/// + `NULL == PixelBoxLowerCorner` +/// + `NULL == PixelBoxUpperCorner` +/// + `NULL == ElementStrides` +/// + `NULL == hTensorMap` +/// - ::UR_RESULT_ERROR_INVALID_ARGUMENT +/// + `TensorRank < 3` +UR_APIEXPORT ur_result_t UR_APICALL +urTensorMapEncodeIm2ColExp( + ur_device_handle_t hDevice, ///< [in] Handle of the device object. + ur_exp_tensor_map_data_type_flags_t TensorMapType, ///< [in] Data type of the tensor object. + uint32_t TensorRank, ///< [in] Dimensionality of tensor; must be at least 3. + void *GlobalAddress, ///< [in] Starting address of memory region described by tensor. + const uint64_t *GlobalDim, ///< [in] Array containing tensor size (number of elements) along each of + ///< the TensorRank dimensions. + const uint64_t *GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the + ///< TensorRank - 1 dimensions. + const int *PixelBoxLowerCorner, ///< [in] Array containing DHW dimensions of lower box corner. + const int *PixelBoxUpperCorner, ///< [in] Array containing DHW dimensions of upper box corner. + uint32_t ChannelsPerPixel, ///< [in] Number of channels per pixel. + uint32_t PixelsPerColumn, ///< [in] Number of pixels per column. + const uint32_t *ElementStrides, ///< [in] Array containing traversal stride in each of the TensorRank + ///< dimensions. + ur_exp_tensor_map_interleave_flags_t Interleave, ///< [in] Type of interleaved layout the tensor addresses + ur_exp_tensor_map_swizzle_flags_t Swizzle, ///< [in] Bank swizzling pattern inside shared memory + ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, ///< [in] L2 promotion size. + ur_exp_tensor_map_oob_fill_flags_t OobFill, ///< [in] Indicates whether zero or special NaN constant will be used to + ///< fill out-of-bounds elements. + ur_exp_tensor_map_handle_t *hTensorMap ///< [out] Handle of the tensor map object. +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Encode tensor map with tiled data +/// +/// @details +/// - Tiled map encode. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_EXP_TENSOR_MAP_DATA_TYPE_FLAGS_MASK & TensorMapType` +/// + `::UR_EXP_TENSOR_MAP_INTERLEAVE_FLAGS_MASK & Interleave` +/// + `::UR_EXP_TENSOR_MAP_SWIZZLE_FLAGS_MASK & Swizzle` +/// + `::UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAGS_MASK & L2Promotion` +/// + `::UR_EXP_TENSOR_MAP_OOB_FILL_FLAGS_MASK & OobFill` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == GlobalAddress` +/// + `NULL == GlobalDim` +/// + `NULL == GlobalStrides` +/// + `NULL == BoxDim` +/// + `NULL == ElementStrides` +/// + `NULL == hTensorMap` +/// - ::UR_RESULT_ERROR_INVALID_ARGUMENT +/// + `TensorRank < 3` +UR_APIEXPORT ur_result_t UR_APICALL +urTensorMapEncodeTiledExp( + ur_device_handle_t hDevice, ///< [in] Handle of the device object. + ur_exp_tensor_map_data_type_flags_t TensorMapType, ///< [in] Data type of the tensor object. + uint32_t TensorRank, ///< [in] Dimensionality of tensor; must be at least 3. + void *GlobalAddress, ///< [in] Starting address of memory region described by tensor. + const uint64_t *GlobalDim, ///< [in] Array containing tensor size (number of elements) along each of + ///< the TensorRank dimensions. + const uint64_t *GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the + ///< TensorRank - 1 dimensions. + const uint32_t *BoxDim, ///< [in] Array containing traversal box size (number of elments) along + ///< each of the TensorRank dimensions. Specifies how many elements to be + ///< traversed along each tensor dimension. + const uint32_t *ElementStrides, ///< [in] Array containing traversal stride in each of the TensorRank + ///< dimensions. + ur_exp_tensor_map_interleave_flags_t Interleave, ///< [in] Type of interleaved layout the tensor addresses + ur_exp_tensor_map_swizzle_flags_t Swizzle, ///< [in] Bank swizzling pattern inside shared memory + ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, ///< [in] L2 promotion size. + ur_exp_tensor_map_oob_fill_flags_t OobFill, ///< [in] Indicates whether zero or special NaN constant will be used to + ///< fill out-of-bounds elements. + ur_exp_tensor_map_handle_t *hTensorMap ///< [out] Handle of the tensor map object. +); + #if !defined(__GNUC__) #pragma endregion #endif @@ -12333,6 +12536,49 @@ typedef struct ur_command_buffer_command_get_info_exp_params_t { size_t **ppPropSizeRet; } ur_command_buffer_command_get_info_exp_params_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urTensorMapEncodeIm2ColExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_tensor_map_encode_im_2_col_exp_params_t { + ur_device_handle_t *phDevice; + ur_exp_tensor_map_data_type_flags_t *pTensorMapType; + uint32_t *pTensorRank; + void **pGlobalAddress; + const uint64_t **pGlobalDim; + const uint64_t **pGlobalStrides; + const int **pPixelBoxLowerCorner; + const int **pPixelBoxUpperCorner; + uint32_t *pChannelsPerPixel; + uint32_t *pPixelsPerColumn; + const uint32_t **pElementStrides; + ur_exp_tensor_map_interleave_flags_t *pInterleave; + ur_exp_tensor_map_swizzle_flags_t *pSwizzle; + ur_exp_tensor_map_l2_promotion_flags_t *pL2Promotion; + ur_exp_tensor_map_oob_fill_flags_t *pOobFill; + ur_exp_tensor_map_handle_t **phTensorMap; +} ur_tensor_map_encode_im_2_col_exp_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urTensorMapEncodeTiledExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_tensor_map_encode_tiled_exp_params_t { + ur_device_handle_t *phDevice; + ur_exp_tensor_map_data_type_flags_t *pTensorMapType; + uint32_t *pTensorRank; + void **pGlobalAddress; + const uint64_t **pGlobalDim; + const uint64_t **pGlobalStrides; + const uint32_t **pBoxDim; + const uint32_t **pElementStrides; + ur_exp_tensor_map_interleave_flags_t *pInterleave; + ur_exp_tensor_map_swizzle_flags_t *pSwizzle; + ur_exp_tensor_map_l2_promotion_flags_t *pL2Promotion; + ur_exp_tensor_map_oob_fill_flags_t *pOobFill; + ur_exp_tensor_map_handle_t **phTensorMap; +} ur_tensor_map_encode_tiled_exp_params_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urUsmP2PEnablePeerAccessExp /// @details Each entry is a pointer to the parameter passed to the function; diff --git a/include/ur_api_funcs.def b/include/ur_api_funcs.def index 4920245369..5279534547 100644 --- a/include/ur_api_funcs.def +++ b/include/ur_api_funcs.def @@ -185,6 +185,8 @@ _UR_API(urCommandBufferUpdateSignalEventExp) _UR_API(urCommandBufferUpdateWaitEventsExp) _UR_API(urCommandBufferGetInfoExp) _UR_API(urCommandBufferCommandGetInfoExp) +_UR_API(urTensorMapEncodeIm2ColExp) +_UR_API(urTensorMapEncodeTiledExp) _UR_API(urUsmP2PEnablePeerAccessExp) _UR_API(urUsmP2PDisablePeerAccessExp) _UR_API(urUsmP2PPeerAccessGetInfoExp) diff --git a/include/ur_ddi.h b/include/ur_ddi.h index 40a6c5c269..695c1885b0 100644 --- a/include/ur_ddi.h +++ b/include/ur_ddi.h @@ -2248,6 +2248,71 @@ typedef ur_result_t(UR_APICALL *ur_pfnGetCommandBufferExpProcAddrTable_t)( ur_api_version_t, ur_command_buffer_exp_dditable_t *); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urTensorMapEncodeIm2ColExp +typedef ur_result_t(UR_APICALL *ur_pfnTensorMapEncodeIm2ColExp_t)( + ur_device_handle_t, + ur_exp_tensor_map_data_type_flags_t, + uint32_t, + void *, + const uint64_t *, + const uint64_t *, + const int *, + const int *, + uint32_t, + uint32_t, + const uint32_t *, + ur_exp_tensor_map_interleave_flags_t, + ur_exp_tensor_map_swizzle_flags_t, + ur_exp_tensor_map_l2_promotion_flags_t, + ur_exp_tensor_map_oob_fill_flags_t, + ur_exp_tensor_map_handle_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urTensorMapEncodeTiledExp +typedef ur_result_t(UR_APICALL *ur_pfnTensorMapEncodeTiledExp_t)( + ur_device_handle_t, + ur_exp_tensor_map_data_type_flags_t, + uint32_t, + void *, + const uint64_t *, + const uint64_t *, + const uint32_t *, + const uint32_t *, + ur_exp_tensor_map_interleave_flags_t, + ur_exp_tensor_map_swizzle_flags_t, + ur_exp_tensor_map_l2_promotion_flags_t, + ur_exp_tensor_map_oob_fill_flags_t, + ur_exp_tensor_map_handle_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Table of TensorMapExp functions pointers +typedef struct ur_tensor_map_exp_dditable_t { + ur_pfnTensorMapEncodeIm2ColExp_t pfnEncodeIm2ColExp; + ur_pfnTensorMapEncodeTiledExp_t pfnEncodeTiledExp; +} ur_tensor_map_exp_dditable_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's TensorMapExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL +urGetTensorMapExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_tensor_map_exp_dditable_t *pDdiTable ///< [in,out] pointer to table of DDI function pointers +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urGetTensorMapExpProcAddrTable +typedef ur_result_t(UR_APICALL *ur_pfnGetTensorMapExpProcAddrTable_t)( + ur_api_version_t, + ur_tensor_map_exp_dditable_t *); + /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urUsmP2PEnablePeerAccessExp typedef ur_result_t(UR_APICALL *ur_pfnUsmP2PEnablePeerAccessExp_t)( @@ -2515,6 +2580,7 @@ typedef struct ur_dditable_t { ur_usm_dditable_t USM; ur_usm_exp_dditable_t USMExp; ur_command_buffer_exp_dditable_t CommandBufferExp; + ur_tensor_map_exp_dditable_t TensorMapExp; ur_usm_p2p_exp_dditable_t UsmP2PExp; ur_virtual_mem_dditable_t VirtualMem; ur_device_dditable_t Device; diff --git a/include/ur_print.h b/include/ur_print.h index c2adb18067..3782ffb5ce 100644 --- a/include/ur_print.h +++ b/include/ur_print.h @@ -1098,6 +1098,46 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintExpEnqueueNativeCommandFlags(enum ur_ /// - `buff_size < out_size` UR_APIEXPORT ur_result_t UR_APICALL urPrintExpEnqueueNativeCommandProperties(const struct ur_exp_enqueue_native_command_properties_t params, char *buffer, const size_t buff_size, size_t *out_size); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_tensor_map_data_type_flag_t enum +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintExpTensorMapDataTypeFlags(enum ur_exp_tensor_map_data_type_flag_t value, char *buffer, const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_tensor_map_interleave_flag_t enum +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintExpTensorMapInterleaveFlags(enum ur_exp_tensor_map_interleave_flag_t value, char *buffer, const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_tensor_map_l2_promotion_flag_t enum +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintExpTensorMapL2PromotionFlags(enum ur_exp_tensor_map_l2_promotion_flag_t value, char *buffer, const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_tensor_map_swizzle_flag_t enum +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintExpTensorMapSwizzleFlags(enum ur_exp_tensor_map_swizzle_flag_t value, char *buffer, const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_tensor_map_oob_fill_flag_t enum +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintExpTensorMapOobFillFlags(enum ur_exp_tensor_map_oob_fill_flag_t value, char *buffer, const size_t buff_size, size_t *out_size); + /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_loader_config_create_params_t struct /// @returns @@ -2522,6 +2562,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintCommandBufferGetInfoExpParams(const s /// - `buff_size < out_size` UR_APIEXPORT ur_result_t UR_APICALL urPrintCommandBufferCommandGetInfoExpParams(const struct ur_command_buffer_command_get_info_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_tensor_map_encode_im_2_col_exp_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintTensorMapEncodeIm_2ColExpParams(const struct ur_tensor_map_encode_im_2_col_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_tensor_map_encode_tiled_exp_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintTensorMapEncodeTiledExpParams(const struct ur_tensor_map_encode_tiled_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); + /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_usm_p2p_enable_peer_access_exp_params_t struct /// @returns diff --git a/include/ur_print.hpp b/include/ur_print.hpp index 8888a74f91..1acde66f4f 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -56,6 +56,8 @@ template <> struct is_handle : std::true_type {}; template <> struct is_handle : std::true_type {}; +template <> +struct is_handle : std::true_type {}; template inline constexpr bool is_handle_v = is_handle::value; template @@ -222,6 +224,21 @@ inline ur_result_t printFlag(std::ostream &os, uint32 template <> inline ur_result_t printFlag(std::ostream &os, uint32_t flag); +template <> +inline ur_result_t printFlag(std::ostream &os, uint32_t flag); + +template <> +inline ur_result_t printFlag(std::ostream &os, uint32_t flag); + +template <> +inline ur_result_t printFlag(std::ostream &os, uint32_t flag); + +template <> +inline ur_result_t printFlag(std::ostream &os, uint32_t flag); + +template <> +inline ur_result_t printFlag(std::ostream &os, uint32_t flag); + } // namespace ur::details inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value); @@ -359,6 +376,11 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_exp_enqueue_ext_flag_t inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_enqueue_ext_properties_t params); inline std::ostream &operator<<(std::ostream &os, enum ur_exp_enqueue_native_command_flag_t value); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_enqueue_native_command_properties_t params); +inline std::ostream &operator<<(std::ostream &os, enum ur_exp_tensor_map_data_type_flag_t value); +inline std::ostream &operator<<(std::ostream &os, enum ur_exp_tensor_map_interleave_flag_t value); +inline std::ostream &operator<<(std::ostream &os, enum ur_exp_tensor_map_l2_promotion_flag_t value); +inline std::ostream &operator<<(std::ostream &os, enum ur_exp_tensor_map_swizzle_flag_t value); +inline std::ostream &operator<<(std::ostream &os, enum ur_exp_tensor_map_oob_fill_flag_t value); /////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_function_t type @@ -966,6 +988,12 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value) { case UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT: os << "UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT"; break; + case UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP: + os << "UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP"; + break; + case UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP: + os << "UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP"; + break; default: os << "unknown enumerator"; break; @@ -10662,6 +10690,504 @@ inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_enqueue_na os << "}"; return os; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_tensor_map_data_type_flag_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, enum ur_exp_tensor_map_data_type_flag_t value) { + switch (value) { + case UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT8: + os << "UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT8"; + break; + case UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT16: + os << "UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT16"; + break; + case UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT32: + os << "UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT32"; + break; + case UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT32: + os << "UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT32"; + break; + case UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT64: + os << "UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT64"; + break; + case UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT64: + os << "UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT64"; + break; + case UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT16: + os << "UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT16"; + break; + case UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32: + os << "UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32"; + break; + case UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT64: + os << "UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT64"; + break; + case UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_BFLOAT16: + os << "UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_BFLOAT16"; + break; + case UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32_FTZ: + os << "UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32_FTZ"; + break; + case UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32: + os << "UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32"; + break; + case UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32_FTZ: + os << "UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32_FTZ"; + break; + default: + os << "unknown enumerator"; + break; + } + return os; +} + +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_tensor_map_data_type_flag_t flag +template <> +inline ur_result_t printFlag(std::ostream &os, uint32_t flag) { + uint32_t val = flag; + bool first = true; + + if ((val & UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT8) == (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT8) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT8; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT8; + } + + if ((val & UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT16) == (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT16) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT16; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT16; + } + + if ((val & UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT32) == (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT32) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT32; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT32; + } + + if ((val & UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT32) == (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT32) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT32; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT32; + } + + if ((val & UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT64) == (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT64) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT64; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT64; + } + + if ((val & UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT64) == (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT64) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT64; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT64; + } + + if ((val & UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT16) == (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT16) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT16; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT16; + } + + if ((val & UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32) == (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32; + } + + if ((val & UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT64) == (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT64) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT64; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT64; + } + + if ((val & UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_BFLOAT16) == (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_BFLOAT16) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_BFLOAT16; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_BFLOAT16; + } + + if ((val & UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32_FTZ) == (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32_FTZ) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32_FTZ; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32_FTZ; + } + + if ((val & UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32) == (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32; + } + + if ((val & UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32_FTZ) == (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32_FTZ) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32_FTZ; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32_FTZ; + } + if (val != 0) { + std::bitset<32> bits(val); + if (!first) { + os << " | "; + } + os << "unknown bit flags " << bits; + } else if (first) { + os << "0"; + } + return UR_RESULT_SUCCESS; +} +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_tensor_map_interleave_flag_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, enum ur_exp_tensor_map_interleave_flag_t value) { + switch (value) { + case UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_NONE: + os << "UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_NONE"; + break; + case UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_16B: + os << "UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_16B"; + break; + case UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_32B: + os << "UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_32B"; + break; + default: + os << "unknown enumerator"; + break; + } + return os; +} + +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_tensor_map_interleave_flag_t flag +template <> +inline ur_result_t printFlag(std::ostream &os, uint32_t flag) { + uint32_t val = flag; + bool first = true; + + if ((val & UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_NONE) == (uint32_t)UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_NONE) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_NONE; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_NONE; + } + + if ((val & UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_16B) == (uint32_t)UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_16B) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_16B; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_16B; + } + + if ((val & UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_32B) == (uint32_t)UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_32B) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_32B; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_32B; + } + if (val != 0) { + std::bitset<32> bits(val); + if (!first) { + os << " | "; + } + os << "unknown bit flags " << bits; + } else if (first) { + os << "0"; + } + return UR_RESULT_SUCCESS; +} +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_tensor_map_l2_promotion_flag_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, enum ur_exp_tensor_map_l2_promotion_flag_t value) { + switch (value) { + case UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_NONE: + os << "UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_NONE"; + break; + case UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_64B: + os << "UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_64B"; + break; + case UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_128B: + os << "UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_128B"; + break; + case UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_256B: + os << "UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_256B"; + break; + default: + os << "unknown enumerator"; + break; + } + return os; +} + +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_tensor_map_l2_promotion_flag_t flag +template <> +inline ur_result_t printFlag(std::ostream &os, uint32_t flag) { + uint32_t val = flag; + bool first = true; + + if ((val & UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_NONE) == (uint32_t)UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_NONE) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_NONE; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_NONE; + } + + if ((val & UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_64B) == (uint32_t)UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_64B) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_64B; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_64B; + } + + if ((val & UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_128B) == (uint32_t)UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_128B) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_128B; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_128B; + } + + if ((val & UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_256B) == (uint32_t)UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_256B) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_256B; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_256B; + } + if (val != 0) { + std::bitset<32> bits(val); + if (!first) { + os << " | "; + } + os << "unknown bit flags " << bits; + } else if (first) { + os << "0"; + } + return UR_RESULT_SUCCESS; +} +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_tensor_map_swizzle_flag_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, enum ur_exp_tensor_map_swizzle_flag_t value) { + switch (value) { + case UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_NONE: + os << "UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_NONE"; + break; + case UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_32B: + os << "UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_32B"; + break; + case UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_64B: + os << "UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_64B"; + break; + case UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_128B: + os << "UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_128B"; + break; + default: + os << "unknown enumerator"; + break; + } + return os; +} + +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_tensor_map_swizzle_flag_t flag +template <> +inline ur_result_t printFlag(std::ostream &os, uint32_t flag) { + uint32_t val = flag; + bool first = true; + + if ((val & UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_NONE) == (uint32_t)UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_NONE) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_NONE; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_NONE; + } + + if ((val & UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_32B) == (uint32_t)UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_32B) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_32B; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_32B; + } + + if ((val & UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_64B) == (uint32_t)UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_64B) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_64B; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_64B; + } + + if ((val & UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_128B) == (uint32_t)UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_128B) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_128B; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_128B; + } + if (val != 0) { + std::bitset<32> bits(val); + if (!first) { + os << " | "; + } + os << "unknown bit flags " << bits; + } else if (first) { + os << "0"; + } + return UR_RESULT_SUCCESS; +} +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_tensor_map_oob_fill_flag_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, enum ur_exp_tensor_map_oob_fill_flag_t value) { + switch (value) { + case UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_NONE: + os << "UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_NONE"; + break; + case UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_REQUEST_ZERO_FMA: + os << "UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_REQUEST_ZERO_FMA"; + break; + default: + os << "unknown enumerator"; + break; + } + return os; +} + +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_tensor_map_oob_fill_flag_t flag +template <> +inline ur_result_t printFlag(std::ostream &os, uint32_t flag) { + uint32_t val = flag; + bool first = true; + + if ((val & UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_NONE) == (uint32_t)UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_NONE) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_NONE; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_NONE; + } + + if ((val & UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_REQUEST_ZERO_FMA) == (uint32_t)UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_REQUEST_ZERO_FMA) { + val ^= (uint32_t)UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_REQUEST_ZERO_FMA; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_REQUEST_ZERO_FMA; + } + if (val != 0) { + std::bitset<32> bits(val); + if (!first) { + os << " | "; + } + os << "unknown bit flags " << bits; + } else if (first) { + os << "0"; + } + return UR_RESULT_SUCCESS; +} +} // namespace ur::details /////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_loader_config_create_params_t type @@ -17932,6 +18458,192 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct return os; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_tensor_map_encode_im_2_col_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_tensor_map_encode_im_2_col_exp_params_t *params) { + + os << ".hDevice = "; + + ur::details::printPtr(os, + *(params->phDevice)); + + os << ", "; + os << ".TensorMapType = "; + + ur::details::printFlag(os, + *(params->pTensorMapType)); + + os << ", "; + os << ".TensorRank = "; + + os << *(params->pTensorRank); + + os << ", "; + os << ".GlobalAddress = "; + + ur::details::printPtr(os, + *(params->pGlobalAddress)); + + os << ", "; + os << ".GlobalDim = "; + + ur::details::printPtr(os, + *(params->pGlobalDim)); + + os << ", "; + os << ".GlobalStrides = "; + + ur::details::printPtr(os, + *(params->pGlobalStrides)); + + os << ", "; + os << ".PixelBoxLowerCorner = "; + + ur::details::printPtr(os, + *(params->pPixelBoxLowerCorner)); + + os << ", "; + os << ".PixelBoxUpperCorner = "; + + ur::details::printPtr(os, + *(params->pPixelBoxUpperCorner)); + + os << ", "; + os << ".ChannelsPerPixel = "; + + os << *(params->pChannelsPerPixel); + + os << ", "; + os << ".PixelsPerColumn = "; + + os << *(params->pPixelsPerColumn); + + os << ", "; + os << ".ElementStrides = "; + + ur::details::printPtr(os, + *(params->pElementStrides)); + + os << ", "; + os << ".Interleave = "; + + ur::details::printFlag(os, + *(params->pInterleave)); + + os << ", "; + os << ".Swizzle = "; + + ur::details::printFlag(os, + *(params->pSwizzle)); + + os << ", "; + os << ".L2Promotion = "; + + ur::details::printFlag(os, + *(params->pL2Promotion)); + + os << ", "; + os << ".OobFill = "; + + ur::details::printFlag(os, + *(params->pOobFill)); + + os << ", "; + os << ".hTensorMap = "; + + ur::details::printPtr(os, + *(params->phTensorMap)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_tensor_map_encode_tiled_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_tensor_map_encode_tiled_exp_params_t *params) { + + os << ".hDevice = "; + + ur::details::printPtr(os, + *(params->phDevice)); + + os << ", "; + os << ".TensorMapType = "; + + ur::details::printFlag(os, + *(params->pTensorMapType)); + + os << ", "; + os << ".TensorRank = "; + + os << *(params->pTensorRank); + + os << ", "; + os << ".GlobalAddress = "; + + ur::details::printPtr(os, + *(params->pGlobalAddress)); + + os << ", "; + os << ".GlobalDim = "; + + ur::details::printPtr(os, + *(params->pGlobalDim)); + + os << ", "; + os << ".GlobalStrides = "; + + ur::details::printPtr(os, + *(params->pGlobalStrides)); + + os << ", "; + os << ".BoxDim = "; + + ur::details::printPtr(os, + *(params->pBoxDim)); + + os << ", "; + os << ".ElementStrides = "; + + ur::details::printPtr(os, + *(params->pElementStrides)); + + os << ", "; + os << ".Interleave = "; + + ur::details::printFlag(os, + *(params->pInterleave)); + + os << ", "; + os << ".Swizzle = "; + + ur::details::printFlag(os, + *(params->pSwizzle)); + + os << ", "; + os << ".L2Promotion = "; + + ur::details::printFlag(os, + *(params->pL2Promotion)); + + os << ", "; + os << ".OobFill = "; + + ur::details::printFlag(os, + *(params->pOobFill)); + + os << ", "; + os << ".hTensorMap = "; + + ur::details::printPtr(os, + *(params->phTensorMap)); + + return os; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_usm_p2p_enable_peer_access_exp_params_t type /// @returns @@ -19170,6 +19882,12 @@ inline ur_result_t UR_APICALL printFunctionParams(std::ostream &os, ur_function_ case UR_FUNCTION_COMMAND_BUFFER_COMMAND_GET_INFO_EXP: { os << (const struct ur_command_buffer_command_get_info_exp_params_t *)params; } break; + case UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP: { + os << (const struct ur_tensor_map_encode_im_2_col_exp_params_t *)params; + } break; + case UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP: { + os << (const struct ur_tensor_map_encode_tiled_exp_params_t *)params; + } break; case UR_FUNCTION_USM_P2P_ENABLE_PEER_ACCESS_EXP: { os << (const struct ur_usm_p2p_enable_peer_access_exp_params_t *)params; } break; diff --git a/scripts/core/EXP-TENSOR-MAP.rst b/scripts/core/EXP-TENSOR-MAP.rst new file mode 100644 index 0000000000..15a6802363 --- /dev/null +++ b/scripts/core/EXP-TENSOR-MAP.rst @@ -0,0 +1,75 @@ +<% + OneApi=tags['$OneApi'] + x=tags['$x'] + X=x.upper() +%> + +.. _experimental-enqueue-native-command: + +================================================================================ +Tensor Mapping APIs +================================================================================ + +.. warning:: + + Experimental features: + + * May be replaced, updated, or removed at any time. + * Do not require maintaining API/ABI stability of their own additions over + time. + * Do not require conformance testing of their own additions. + + +Motivation +-------------------------------------------------------------------------------- + +Used to target the CUDA entry points ``cuTensorMapEncodeIm2col`` and +``cuTensorMapEncodeTiled``. + +For some tensor core operations on ``sm_90+`` Nvidia devices, a tensor +descriptor must be built on the host and passed to the kernel. The interfaces +mentioned above, and mapped to UR in this extension, provide the APIs necessary +to create these tensor descriptor objects, that can then be passed to the +kernels. + +API +-------------------------------------------------------------------------------- + +Enums +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +${x}_exp_tensor_map_data_type_flags_t +${x}_exp_tensor_map_interleave_flags_t +${x}_exp_tensor_map_l2_promotion_flags_t +${x}_exp_tensor_map_swizzle_flags_t +${x}_exp_tensor_map_oob_fill_flags_t + +Types +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +${x}_exp_tensor_map_handle_t + +Functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +* ${x}TensorMapEncodeIm2ColExp +* ${x}TensorMapEncodeTiledExp + +Changelog +-------------------------------------------------------------------------------- + ++-----------+------------------------+ +| Revision | Changes | ++===========+========================+ +| 1.0 | Initial Draft | ++-----------+------------------------+ + + +Support +-------------------------------------------------------------------------------- + +This extension is only supported on the ``UR_PLATFORM_BACKEND_CUDA`` backend. + +Contributors +-------------------------------------------------------------------------------- + +* Hugh Delaney `hugh.delaney@codeplay.com `_ diff --git a/scripts/core/exp-tensor-map.yml b/scripts/core/exp-tensor-map.yml new file mode 100644 index 0000000000..fa1e8c1898 --- /dev/null +++ b/scripts/core/exp-tensor-map.yml @@ -0,0 +1,213 @@ +# +# Copyright (C) 2024 Intel Corporation +# +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# See YaML.md for syntax definition +# +--- #-------------------------------------------------------------------------- +type: header +desc: "Intel $OneApi Unified Runtime Experimental API for mapping tensor objects" +ordinal: "100" + +--- #-------------------------------------------------------------------------- +type: handle +desc: "Handle of tensor map object" +class: $xTensorMap +name: "$x_exp_tensor_map_handle_t" + +--- #-------------------------------------------------------------------------- +type: enum +desc: "Tensor map data type" +name: $x_exp_tensor_map_data_type_flags_t +etors: + - name: UINT8 + desc: "1 byte" + - name: UINT16 + desc: "2 bytes" + - name: UINT32 + desc: "4 bytes" + - name: INT32 + desc: "4 bytes" + - name: UINT64 + desc: "8 bytes" + - name: INT64 + desc: "8 bytes" + - name: FLOAT16 + desc: "2 bytes" + - name: FLOAT32 + desc: "4 bytes" + - name: FLOAT64 + desc: "8 bytes" + - name: BFLOAT16 + desc: "2 bytes" + - name: FLOAT32_FTZ + desc: "4 bytes" + - name: TFLOAT32 + desc: "4 bytes" + - name: TFLOAT32_FTZ + desc: "4 bytes" + +--- #-------------------------------------------------------------------------- +type: enum +desc: "Tensor map interleave" +name: $x_exp_tensor_map_interleave_flags_t +etors: + - name: NONE + desc: "No interleave" + - name: 16B + desc: "16B interleave" + - name: 32B + desc: "32B interleave" + +--- #-------------------------------------------------------------------------- +type: enum +desc: "Tensor map l2 promotion" +name: $x_exp_tensor_map_l2_promotion_flags_t +etors: + - name: NONE + desc: "No promotion type" + - name: 64B + desc: "64B promotion type" + - name: 128B + desc: "128B promotion type" + - name: 256B + desc: "256B promotion type" + +--- #-------------------------------------------------------------------------- +type: enum +desc: "Tensor map swizzle" +name: $x_exp_tensor_map_swizzle_flags_t +etors: + - name: NONE + desc: "No swizzle" + - name: 32B + desc: "32B swizzle" + - name: 64B + desc: "64B swizzle" + - name: 128B + desc: "128B swizzle" + +--- #-------------------------------------------------------------------------- +type: enum +desc: "Tensor map OOB fill" +name: $x_exp_tensor_map_oob_fill_flags_t +etors: + - name: NONE + desc: "No OOB fill" + - name: REQUEST_ZERO_FMA + desc: "Refer to NVIDIA docs" + +--- #-------------------------------------------------------------------------- +type: function +desc: "Encode tensor map with image data" +class: $xTensorMap +name: EncodeIm2ColExp +details: + - "Map encode using im2col." +params: + - type: $x_device_handle_t + name: hDevice + desc: "[in] Handle of the device object." + - type: $x_exp_tensor_map_data_type_flags_t + name: TensorMapType + desc: "[in] Data type of the tensor object." + - type: uint32_t + name: TensorRank + desc: "[in] Dimensionality of tensor; must be at least 3." + - type: void* + name: GlobalAddress + desc: "[in] Starting address of memory region described by tensor." + - type: const uint64_t* + name: GlobalDim + desc: "[in] Array containing tensor size (number of elements) along each of the TensorRank dimensions." + - type: const uint64_t* + name: GlobalStrides + desc: "[in] Array containing stride size (in bytes) along each of the TensorRank - 1 dimensions." + - type: const int* + name: PixelBoxLowerCorner + desc: "[in] Array containing DHW dimensions of lower box corner." + - type: const int* + name: PixelBoxUpperCorner + desc: "[in] Array containing DHW dimensions of upper box corner." + - type: uint32_t + name: ChannelsPerPixel + desc: "[in] Number of channels per pixel." + - type: uint32_t + name: PixelsPerColumn + desc: "[in] Number of pixels per column." + - type: const uint32_t* + name: ElementStrides + desc: "[in] Array containing traversal stride in each of the TensorRank dimensions." + - type: $x_exp_tensor_map_interleave_flags_t + name: Interleave + desc: "[in] Type of interleaved layout the tensor addresses" + - type: $x_exp_tensor_map_swizzle_flags_t + name: Swizzle + desc: "[in] Bank swizzling pattern inside shared memory" + - type: $x_exp_tensor_map_l2_promotion_flags_t + name: L2Promotion + desc: "[in] L2 promotion size." + - type: $x_exp_tensor_map_oob_fill_flags_t + name: OobFill + desc: "[in] Indicates whether zero or special NaN constant will be used to fill out-of-bounds elements." + - type: $x_exp_tensor_map_handle_t* + name: hTensorMap + desc: "[out] Handle of the tensor map object." +returns: + - $X_RESULT_ERROR_INVALID_ARGUMENT: + - "`TensorRank < 3`" + +--- #-------------------------------------------------------------------------- +type: function +desc: "Encode tensor map with tiled data" +class: $xTensorMap +name: EncodeTiledExp +details: + - "Tiled map encode." +params: + - type: $x_device_handle_t + name: hDevice + desc: "[in] Handle of the device object." + - type: $x_exp_tensor_map_data_type_flags_t + name: TensorMapType + desc: "[in] Data type of the tensor object." + - type: uint32_t + name: TensorRank + desc: "[in] Dimensionality of tensor; must be at least 3." + - type: void* + name: GlobalAddress + desc: "[in] Starting address of memory region described by tensor." + - type: const uint64_t* + name: GlobalDim + desc: "[in] Array containing tensor size (number of elements) along each of the TensorRank dimensions." + - type: const uint64_t* + name: GlobalStrides + desc: "[in] Array containing stride size (in bytes) along each of the TensorRank - 1 dimensions." + - type: const uint32_t* + name: BoxDim + desc: "[in] Array containing traversal box size (number of elments) along each of the TensorRank dimensions. Specifies how many elements to be traversed along each tensor dimension." + - type: const uint32_t* + name: ElementStrides + desc: "[in] Array containing traversal stride in each of the TensorRank dimensions." + - type: $x_exp_tensor_map_interleave_flags_t + name: Interleave + desc: "[in] Type of interleaved layout the tensor addresses" + - type: $x_exp_tensor_map_swizzle_flags_t + name: Swizzle + desc: "[in] Bank swizzling pattern inside shared memory" + - type: $x_exp_tensor_map_l2_promotion_flags_t + name: L2Promotion + desc: "[in] L2 promotion size." + - type: $x_exp_tensor_map_oob_fill_flags_t + name: OobFill + desc: "[in] Indicates whether zero or special NaN constant will be used to fill out-of-bounds elements." + - type: $x_exp_tensor_map_handle_t* + name: hTensorMap + desc: "[out] Handle of the tensor map object." +returns: + - $X_RESULT_ERROR_INVALID_ARGUMENT: + - "`TensorRank < 3`" + diff --git a/scripts/core/registry.yml b/scripts/core/registry.yml index 2133e1c889..f1a5d9199f 100644 --- a/scripts/core/registry.yml +++ b/scripts/core/registry.yml @@ -607,6 +607,12 @@ etors: - name: ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT desc: Enumerator for $xEnqueueEventsWaitWithBarrierExt value: '246' +- name: TENSOR_MAP_ENCODE_IM_2_COL_EXP + desc: Enumerator for $xTensorMapEncodeIm2ColExp + value: '247' +- name: TENSOR_MAP_ENCODE_TILED_EXP + desc: Enumerator for $xTensorMapEncodeTiledExp + value: '248' --- type: enum desc: Defines structure types diff --git a/source/adapters/adapter.def.in b/source/adapters/adapter.def.in index 3c18c78bd1..fd37178966 100644 --- a/source/adapters/adapter.def.in +++ b/source/adapters/adapter.def.in @@ -16,6 +16,7 @@ EXPORTS urGetProgramExpProcAddrTable urGetQueueProcAddrTable urGetSamplerProcAddrTable + urGetTensorMapExpProcAddrTable urGetUSMProcAddrTable urGetUSMExpProcAddrTable urGetUsmP2PExpProcAddrTable diff --git a/source/adapters/adapter.map.in b/source/adapters/adapter.map.in index bb08ae7d88..50db54ef40 100644 --- a/source/adapters/adapter.map.in +++ b/source/adapters/adapter.map.in @@ -16,6 +16,7 @@ urGetProgramExpProcAddrTable; urGetQueueProcAddrTable; urGetSamplerProcAddrTable; + urGetTensorMapExpProcAddrTable; urGetUSMProcAddrTable; urGetUSMExpProcAddrTable; urGetUsmP2PExpProcAddrTable; diff --git a/source/adapters/cuda/CMakeLists.txt b/source/adapters/cuda/CMakeLists.txt index b6b153a5d8..3d0418fd07 100644 --- a/source/adapters/cuda/CMakeLists.txt +++ b/source/adapters/cuda/CMakeLists.txt @@ -38,6 +38,7 @@ add_ur_adapter(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/queue.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler.hpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/tensor_map.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tracing.cpp ${CMAKE_CURRENT_SOURCE_DIR}/usm.cpp ${CMAKE_CURRENT_SOURCE_DIR}/usm_p2p.cpp diff --git a/source/adapters/cuda/tensor_map.cpp b/source/adapters/cuda/tensor_map.cpp new file mode 100644 index 0000000000..da8e4f8f8c --- /dev/null +++ b/source/adapters/cuda/tensor_map.cpp @@ -0,0 +1,142 @@ +//===--------- tensor_map.cpp - CUDA Adapter ------------------------------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +#include "context.hpp" + +struct ur_exp_tensor_map_handle_t_ { + CUtensorMap Map; +}; + +#define CONVERT(URTYPE, CUTYPE) \ + if ((URTYPE)&UrType) \ + return (CUTYPE); + +inline CUtensorMapDataType +convertUrToCuDataType(ur_exp_tensor_map_data_type_flags_t UrType) { + CONVERT(UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT8, + CU_TENSOR_MAP_DATA_TYPE_UINT8); + CONVERT(UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT16, + CU_TENSOR_MAP_DATA_TYPE_UINT16); + CONVERT(UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT32, + CU_TENSOR_MAP_DATA_TYPE_UINT32); + CONVERT(UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT32, + CU_TENSOR_MAP_DATA_TYPE_INT32); + CONVERT(UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT64, + CU_TENSOR_MAP_DATA_TYPE_UINT64); + CONVERT(UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT64, + CU_TENSOR_MAP_DATA_TYPE_INT64); + CONVERT(UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT16, + CU_TENSOR_MAP_DATA_TYPE_FLOAT16); + CONVERT(UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32, + CU_TENSOR_MAP_DATA_TYPE_FLOAT32); + CONVERT(UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT64, + CU_TENSOR_MAP_DATA_TYPE_FLOAT64); + CONVERT(UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_BFLOAT16, + CU_TENSOR_MAP_DATA_TYPE_BFLOAT16); + CONVERT(UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32_FTZ, + CU_TENSOR_MAP_DATA_TYPE_FLOAT32_FTZ); + CONVERT(UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32, + CU_TENSOR_MAP_DATA_TYPE_TFLOAT32); + CONVERT(UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32_FTZ, + CU_TENSOR_MAP_DATA_TYPE_TFLOAT32_FTZ); + throw "convertUrToCuDataType failed!"; +} + +CUtensorMapInterleave +convertUrToCuInterleave(ur_exp_tensor_map_interleave_flags_t UrType) { + CONVERT(UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_NONE, + CU_TENSOR_MAP_INTERLEAVE_NONE); + CONVERT(UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_16B, CU_TENSOR_MAP_INTERLEAVE_16B); + CONVERT(UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_32B, CU_TENSOR_MAP_INTERLEAVE_32B); + throw "convertUrToCuInterleave failed!"; +} + +CUtensorMapSwizzle +convertUrToCuSwizzle(ur_exp_tensor_map_swizzle_flags_t UrType) { + CONVERT(UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_NONE, CU_TENSOR_MAP_SWIZZLE_NONE); + CONVERT(UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_32B, CU_TENSOR_MAP_SWIZZLE_32B); + CONVERT(UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_64B, CU_TENSOR_MAP_SWIZZLE_64B); + CONVERT(UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_128B, CU_TENSOR_MAP_SWIZZLE_128B); + throw "convertUrToCuSwizzle failed!"; +} + +CUtensorMapL2promotion +convertUrToCuL2Promotion(ur_exp_tensor_map_l2_promotion_flags_t UrType) { + CONVERT(UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_NONE, + CU_TENSOR_MAP_L2_PROMOTION_NONE); + CONVERT(UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_64B, + CU_TENSOR_MAP_L2_PROMOTION_L2_64B); + CONVERT(UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_128B, + CU_TENSOR_MAP_L2_PROMOTION_L2_128B); + CONVERT(UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_256B, + CU_TENSOR_MAP_L2_PROMOTION_L2_256B); + throw "convertUrToCul2promotion failed!"; +} + +CUtensorMapFloatOOBfill +convertUrToCuOobFill(ur_exp_tensor_map_oob_fill_flags_t UrType) { + CONVERT(UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_NONE, + CU_TENSOR_MAP_FLOAT_OOB_FILL_NONE); + CONVERT(UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_REQUEST_ZERO_FMA, + CU_TENSOR_MAP_FLOAT_OOB_FILL_NAN_REQUEST_ZERO_FMA); + throw "convertUrToCuDataOOBfill failed!"; +} + +UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( + ur_device_handle_t hDevice, + ur_exp_tensor_map_data_type_flags_t TensorMapType, uint32_t TensorRank, + void *GlobalAddress, const uint64_t *GlobalDim, + const uint64_t *GlobalStrides, const int *PixelBoxLowerCorner, + const int *PixelBoxUpperCorner, uint32_t ChannelsPerPixel, + uint32_t PixelsPerColumn, const uint32_t *ElementStrides, + ur_exp_tensor_map_interleave_flags_t Interleave, + ur_exp_tensor_map_swizzle_flags_t Swizzle, + ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, + ur_exp_tensor_map_oob_fill_flags_t OobFill, + ur_exp_tensor_map_handle_t *hTensorMap) { + ScopedContext Active(hDevice); + try { + UR_CHECK_ERROR(cuTensorMapEncodeIm2col( + &(*hTensorMap)->Map, convertUrToCuDataType(TensorMapType), TensorRank, + GlobalAddress, GlobalDim, GlobalStrides, PixelBoxLowerCorner, + PixelBoxUpperCorner, ChannelsPerPixel, PixelsPerColumn, ElementStrides, + convertUrToCuInterleave(Interleave), convertUrToCuSwizzle(Swizzle), + convertUrToCuL2Promotion(L2Promotion), convertUrToCuOobFill(OobFill))); + } catch (ur_result_t Err) { + return Err; + } + return UR_RESULT_SUCCESS; +} +UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeTiledExp( + ur_device_handle_t hDevice, + ur_exp_tensor_map_data_type_flags_t TensorMapType, uint32_t TensorRank, + void *GlobalAddress, const uint64_t *GlobalDim, + const uint64_t *GlobalStrides, const uint32_t *BoxDim, + const uint32_t *ElementStrides, + ur_exp_tensor_map_interleave_flags_t Interleave, + ur_exp_tensor_map_swizzle_flags_t Swizzle, + ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, + ur_exp_tensor_map_oob_fill_flags_t OobFill, + ur_exp_tensor_map_handle_t *hTensorMap) { + ScopedContext Active(hDevice); + try { + UR_CHECK_ERROR(cuTensorMapEncodeTiled( + &(*hTensorMap)->Map, convertUrToCuDataType(TensorMapType), TensorRank, + GlobalAddress, GlobalDim, GlobalStrides, BoxDim, ElementStrides, + convertUrToCuInterleave(Interleave), convertUrToCuSwizzle(Swizzle), + convertUrToCuL2Promotion(L2Promotion), convertUrToCuOobFill(OobFill))); + } catch (ur_result_t Err) { + return Err; + } + return UR_RESULT_SUCCESS; +} diff --git a/source/adapters/cuda/ur_interface_loader.cpp b/source/adapters/cuda/ur_interface_loader.cpp index 4b13e6669c..cea4707a05 100644 --- a/source/adapters/cuda/ur_interface_loader.cpp +++ b/source/adapters/cuda/ur_interface_loader.cpp @@ -434,6 +434,19 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable( return UR_RESULT_SUCCESS; } +UR_APIEXPORT ur_result_t UR_APICALL urGetTensorMapExpProcAddrTable( + ur_api_version_t version, ur_tensor_map_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } + + pDdiTable->pfnEncodeIm2ColExp = urTensorMapEncodeIm2ColExp; + pDdiTable->pfnEncodeTiledExp = urTensorMapEncodeTiledExp; + + return result; +} + UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( ur_api_version_t version, ur_program_exp_dditable_t *pDdiTable) { auto result = validateProcInputs(version, pDdiTable); diff --git a/source/adapters/hip/CMakeLists.txt b/source/adapters/hip/CMakeLists.txt index 9113d7b1ca..36222907c6 100644 --- a/source/adapters/hip/CMakeLists.txt +++ b/source/adapters/hip/CMakeLists.txt @@ -86,6 +86,7 @@ add_ur_adapter(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/queue.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler.hpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/tensor_map.cpp ${CMAKE_CURRENT_SOURCE_DIR}/usm.cpp ${CMAKE_CURRENT_SOURCE_DIR}/usm_p2p.cpp ${CMAKE_CURRENT_SOURCE_DIR}/virtual_mem.cpp diff --git a/source/adapters/hip/tensor_map.cpp b/source/adapters/hip/tensor_map.cpp new file mode 100644 index 0000000000..348c4c9d05 --- /dev/null +++ b/source/adapters/hip/tensor_map.cpp @@ -0,0 +1,28 @@ +//===--------- tensor_map.cpp - HIP Adapter -------------------------------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( + ur_device_handle_t, ur_exp_tensor_map_data_type_flags_t, uint32_t, void *, + const uint64_t *, const uint64_t *, const int *, const int *, uint32_t, + uint32_t, const uint32_t *, ur_exp_tensor_map_interleave_flags_t, + ur_exp_tensor_map_swizzle_flags_t, ur_exp_tensor_map_l2_promotion_flags_t, + ur_exp_tensor_map_oob_fill_flags_t, ur_exp_tensor_map_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} +UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeTiledExp( + ur_device_handle_t, ur_exp_tensor_map_data_type_flags_t, uint32_t, void *, + const uint64_t *, const uint64_t *, const uint32_t *, const uint32_t *, + ur_exp_tensor_map_interleave_flags_t, ur_exp_tensor_map_swizzle_flags_t, + ur_exp_tensor_map_l2_promotion_flags_t, ur_exp_tensor_map_oob_fill_flags_t, + ur_exp_tensor_map_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/hip/ur_interface_loader.cpp b/source/adapters/hip/ur_interface_loader.cpp index f7ec09188f..2c9df55bb6 100644 --- a/source/adapters/hip/ur_interface_loader.cpp +++ b/source/adapters/hip/ur_interface_loader.cpp @@ -400,6 +400,19 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable( return UR_RESULT_SUCCESS; } +UR_APIEXPORT ur_result_t UR_APICALL urGetTensorMapExpProcAddrTable( + ur_api_version_t version, ur_tensor_map_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } + + pDdiTable->pfnEncodeIm2ColExp = urTensorMapEncodeIm2ColExp; + pDdiTable->pfnEncodeTiledExp = urTensorMapEncodeTiledExp; + + return result; +} + UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( ur_api_version_t version, ur_program_exp_dditable_t *pDdiTable) { auto result = validateProcInputs(version, pDdiTable); diff --git a/source/adapters/level_zero/CMakeLists.txt b/source/adapters/level_zero/CMakeLists.txt index 05a33c1224..cb7e0281af 100644 --- a/source/adapters/level_zero/CMakeLists.txt +++ b/source/adapters/level_zero/CMakeLists.txt @@ -45,6 +45,7 @@ if(UR_BUILD_ADAPTER_L0) ${CMAKE_CURRENT_SOURCE_DIR}/program.cpp ${CMAKE_CURRENT_SOURCE_DIR}/queue.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/tensor_map.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image.cpp ${CMAKE_CURRENT_SOURCE_DIR}/helpers/kernel_helpers.cpp ${CMAKE_CURRENT_SOURCE_DIR}/helpers/memory_helpers.cpp @@ -139,6 +140,7 @@ if(UR_BUILD_ADAPTER_L0_V2) ${CMAKE_CURRENT_SOURCE_DIR}/helpers/memory_helpers.cpp ${CMAKE_CURRENT_SOURCE_DIR}/usm_p2p.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/tensor_map.cpp # v2-only sources ${CMAKE_CURRENT_SOURCE_DIR}/v2/command_list_cache.hpp ${CMAKE_CURRENT_SOURCE_DIR}/v2/context.hpp diff --git a/source/adapters/level_zero/tensor_map.cpp b/source/adapters/level_zero/tensor_map.cpp new file mode 100644 index 0000000000..91d6498540 --- /dev/null +++ b/source/adapters/level_zero/tensor_map.cpp @@ -0,0 +1,32 @@ +//===--------- tensor_map.cpp - L0 Adapter --------------------------------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +namespace ur::level_zero { + +ur_result_t urTensorMapEncodeIm2ColExp( + ur_device_handle_t, ur_exp_tensor_map_data_type_flags_t, uint32_t, void *, + const uint64_t *, const uint64_t *, const int *, const int *, uint32_t, + uint32_t, const uint32_t *, ur_exp_tensor_map_interleave_flags_t, + ur_exp_tensor_map_swizzle_flags_t, ur_exp_tensor_map_l2_promotion_flags_t, + ur_exp_tensor_map_oob_fill_flags_t, ur_exp_tensor_map_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t urTensorMapEncodeTiledExp( + ur_device_handle_t, ur_exp_tensor_map_data_type_flags_t, uint32_t, void *, + const uint64_t *, const uint64_t *, const uint32_t *, const uint32_t *, + ur_exp_tensor_map_interleave_flags_t, ur_exp_tensor_map_swizzle_flags_t, + ur_exp_tensor_map_l2_promotion_flags_t, ur_exp_tensor_map_oob_fill_flags_t, + ur_exp_tensor_map_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/ur_interface_loader.cpp b/source/adapters/level_zero/ur_interface_loader.cpp index 0a36b3ecad..1d9c8d5c37 100644 --- a/source/adapters/level_zero/ur_interface_loader.cpp +++ b/source/adapters/level_zero/ur_interface_loader.cpp @@ -423,6 +423,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable( return result; } +UR_APIEXPORT ur_result_t UR_APICALL urGetTensorMapExpProcAddrTable( + ur_api_version_t version, ur_tensor_map_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } + + pDdiTable->pfnEncodeIm2ColExp = ur::level_zero::urTensorMapEncodeIm2ColExp; + pDdiTable->pfnEncodeTiledExp = ur::level_zero::urTensorMapEncodeTiledExp; + + return result; +} + UR_APIEXPORT ur_result_t UR_APICALL urGetUSMProcAddrTable(ur_api_version_t version, ur_usm_dditable_t *pDdiTable) { auto result = validateProcInputs(version, pDdiTable); @@ -594,6 +607,10 @@ ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi) { &ddi->Sampler); if (result != UR_RESULT_SUCCESS) return result; + result = ur::level_zero::urGetTensorMapExpProcAddrTable( + UR_API_VERSION_CURRENT, &ddi->TensorMapExp); + if (result != UR_RESULT_SUCCESS) + return result; result = ur::level_zero::urGetUSMProcAddrTable(UR_API_VERSION_CURRENT, &ddi->USM); if (result != UR_RESULT_SUCCESS) diff --git a/source/adapters/level_zero/ur_interface_loader.hpp b/source/adapters/level_zero/ur_interface_loader.hpp index 1215d6449e..bebba18e6d 100644 --- a/source/adapters/level_zero/ur_interface_loader.hpp +++ b/source/adapters/level_zero/ur_interface_loader.hpp @@ -735,6 +735,30 @@ ur_result_t urEnqueueNativeCommandExp( const ur_exp_enqueue_native_command_properties_t *pProperties, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t urTensorMapEncodeIm2ColExp( + ur_device_handle_t hDevice, + ur_exp_tensor_map_data_type_flags_t TensorMapType, uint32_t TensorRank, + void *GlobalAddress, const uint64_t *GlobalDim, + const uint64_t *GlobalStrides, const int *PixelBoxLowerCorner, + const int *PixelBoxUpperCorner, uint32_t ChannelsPerPixel, + uint32_t PixelsPerColumn, const uint32_t *ElementStrides, + ur_exp_tensor_map_interleave_flags_t Interleave, + ur_exp_tensor_map_swizzle_flags_t Swizzle, + ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, + ur_exp_tensor_map_oob_fill_flags_t OobFill, + ur_exp_tensor_map_handle_t *hTensorMap); +ur_result_t +urTensorMapEncodeTiledExp(ur_device_handle_t hDevice, + ur_exp_tensor_map_data_type_flags_t TensorMapType, + uint32_t TensorRank, void *GlobalAddress, + const uint64_t *GlobalDim, + const uint64_t *GlobalStrides, const uint32_t *BoxDim, + const uint32_t *ElementStrides, + ur_exp_tensor_map_interleave_flags_t Interleave, + ur_exp_tensor_map_swizzle_flags_t Swizzle, + ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, + ur_exp_tensor_map_oob_fill_flags_t OobFill, + ur_exp_tensor_map_handle_t *hTensorMap); #ifdef UR_STATIC_ADAPTER_LEVEL_ZERO ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi); #endif diff --git a/source/adapters/mock/ur_mockddi.cpp b/source/adapters/mock/ur_mockddi.cpp index 42c342444d..ec0be3890f 100644 --- a/source/adapters/mock/ur_mockddi.cpp +++ b/source/adapters/mock/ur_mockddi.cpp @@ -10731,6 +10731,172 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueNativeCommandExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urTensorMapEncodeIm2ColExp +__urdlllocal ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( + ur_device_handle_t hDevice, ///< [in] Handle of the device object. + ur_exp_tensor_map_data_type_flags_t + TensorMapType, ///< [in] Data type of the tensor object. + uint32_t TensorRank, ///< [in] Dimensionality of tensor; must be at least 3. + void * + GlobalAddress, ///< [in] Starting address of memory region described by tensor. + const uint64_t * + GlobalDim, ///< [in] Array containing tensor size (number of elements) along each of + ///< the TensorRank dimensions. + const uint64_t * + GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the + ///< TensorRank - 1 dimensions. + const int * + PixelBoxLowerCorner, ///< [in] Array containing DHW dimensions of lower box corner. + const int * + PixelBoxUpperCorner, ///< [in] Array containing DHW dimensions of upper box corner. + uint32_t ChannelsPerPixel, ///< [in] Number of channels per pixel. + uint32_t PixelsPerColumn, ///< [in] Number of pixels per column. + const uint32_t * + ElementStrides, ///< [in] Array containing traversal stride in each of the TensorRank + ///< dimensions. + ur_exp_tensor_map_interleave_flags_t + Interleave, ///< [in] Type of interleaved layout the tensor addresses + ur_exp_tensor_map_swizzle_flags_t + Swizzle, ///< [in] Bank swizzling pattern inside shared memory + ur_exp_tensor_map_l2_promotion_flags_t + L2Promotion, ///< [in] L2 promotion size. + ur_exp_tensor_map_oob_fill_flags_t + OobFill, ///< [in] Indicates whether zero or special NaN constant will be used to + ///< fill out-of-bounds elements. + ur_exp_tensor_map_handle_t + *hTensorMap ///< [out] Handle of the tensor map object. + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + ur_tensor_map_encode_im_2_col_exp_params_t params = {&hDevice, + &TensorMapType, + &TensorRank, + &GlobalAddress, + &GlobalDim, + &GlobalStrides, + &PixelBoxLowerCorner, + &PixelBoxUpperCorner, + &ChannelsPerPixel, + &PixelsPerColumn, + &ElementStrides, + &Interleave, + &Swizzle, + &L2Promotion, + &OobFill, + &hTensorMap}; + + auto beforeCallback = reinterpret_cast( + mock::getCallbacks().get_before_callback("urTensorMapEncodeIm2ColExp")); + if (beforeCallback) { + result = beforeCallback(¶ms); + if (result != UR_RESULT_SUCCESS) { + return result; + } + } + + auto replaceCallback = reinterpret_cast( + mock::getCallbacks().get_replace_callback( + "urTensorMapEncodeIm2ColExp")); + if (replaceCallback) { + result = replaceCallback(¶ms); + } else { + + *hTensorMap = mock::createDummyHandle(); + result = UR_RESULT_SUCCESS; + } + + if (result != UR_RESULT_SUCCESS) { + return result; + } + + auto afterCallback = reinterpret_cast( + mock::getCallbacks().get_after_callback("urTensorMapEncodeIm2ColExp")); + if (afterCallback) { + return afterCallback(¶ms); + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urTensorMapEncodeTiledExp +__urdlllocal ur_result_t UR_APICALL urTensorMapEncodeTiledExp( + ur_device_handle_t hDevice, ///< [in] Handle of the device object. + ur_exp_tensor_map_data_type_flags_t + TensorMapType, ///< [in] Data type of the tensor object. + uint32_t TensorRank, ///< [in] Dimensionality of tensor; must be at least 3. + void * + GlobalAddress, ///< [in] Starting address of memory region described by tensor. + const uint64_t * + GlobalDim, ///< [in] Array containing tensor size (number of elements) along each of + ///< the TensorRank dimensions. + const uint64_t * + GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the + ///< TensorRank - 1 dimensions. + const uint32_t * + BoxDim, ///< [in] Array containing traversal box size (number of elments) along + ///< each of the TensorRank dimensions. Specifies how many elements to be + ///< traversed along each tensor dimension. + const uint32_t * + ElementStrides, ///< [in] Array containing traversal stride in each of the TensorRank + ///< dimensions. + ur_exp_tensor_map_interleave_flags_t + Interleave, ///< [in] Type of interleaved layout the tensor addresses + ur_exp_tensor_map_swizzle_flags_t + Swizzle, ///< [in] Bank swizzling pattern inside shared memory + ur_exp_tensor_map_l2_promotion_flags_t + L2Promotion, ///< [in] L2 promotion size. + ur_exp_tensor_map_oob_fill_flags_t + OobFill, ///< [in] Indicates whether zero or special NaN constant will be used to + ///< fill out-of-bounds elements. + ur_exp_tensor_map_handle_t + *hTensorMap ///< [out] Handle of the tensor map object. + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + ur_tensor_map_encode_tiled_exp_params_t params = { + &hDevice, &TensorMapType, &TensorRank, &GlobalAddress, + &GlobalDim, &GlobalStrides, &BoxDim, &ElementStrides, + &Interleave, &Swizzle, &L2Promotion, &OobFill, + &hTensorMap}; + + auto beforeCallback = reinterpret_cast( + mock::getCallbacks().get_before_callback("urTensorMapEncodeTiledExp")); + if (beforeCallback) { + result = beforeCallback(¶ms); + if (result != UR_RESULT_SUCCESS) { + return result; + } + } + + auto replaceCallback = reinterpret_cast( + mock::getCallbacks().get_replace_callback("urTensorMapEncodeTiledExp")); + if (replaceCallback) { + result = replaceCallback(¶ms); + } else { + + *hTensorMap = mock::createDummyHandle(); + result = UR_RESULT_SUCCESS; + } + + if (result != UR_RESULT_SUCCESS) { + return result; + } + + auto afterCallback = reinterpret_cast( + mock::getCallbacks().get_after_callback("urTensorMapEncodeTiledExp")); + if (afterCallback) { + return afterCallback(¶ms); + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + } // namespace driver #if defined(__cplusplus) @@ -11550,6 +11716,38 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's TensorMapExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL urGetTensorMapExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_tensor_map_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers + ) try { + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (driver::d_context.version < version) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + ur_result_t result = UR_RESULT_SUCCESS; + + pDdiTable->pfnEncodeIm2ColExp = driver::urTensorMapEncodeIm2ColExp; + + pDdiTable->pfnEncodeTiledExp = driver::urTensorMapEncodeTiledExp; + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's USM table /// with current process' addresses diff --git a/source/adapters/native_cpu/CMakeLists.txt b/source/adapters/native_cpu/CMakeLists.txt index 56cfc577d8..69f7fff6bd 100644 --- a/source/adapters/native_cpu/CMakeLists.txt +++ b/source/adapters/native_cpu/CMakeLists.txt @@ -34,6 +34,7 @@ add_ur_adapter(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/queue.cpp ${CMAKE_CURRENT_SOURCE_DIR}/queue.hpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/tensor_map.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ur_interface_loader.cpp ${CMAKE_CURRENT_SOURCE_DIR}/usm_p2p.cpp ${CMAKE_CURRENT_SOURCE_DIR}/virtual_mem.cpp diff --git a/source/adapters/native_cpu/tensor_map.cpp b/source/adapters/native_cpu/tensor_map.cpp new file mode 100644 index 0000000000..eb9f01b318 --- /dev/null +++ b/source/adapters/native_cpu/tensor_map.cpp @@ -0,0 +1,28 @@ +//===--------- tensor_map.cpp - Native CPU Adapter ------------------------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( + ur_device_handle_t, ur_exp_tensor_map_data_type_flags_t, uint32_t, void *, + const uint64_t *, const uint64_t *, const int *, const int *, uint32_t, + uint32_t, const uint32_t *, ur_exp_tensor_map_interleave_flags_t, + ur_exp_tensor_map_swizzle_flags_t, ur_exp_tensor_map_l2_promotion_flags_t, + ur_exp_tensor_map_oob_fill_flags_t, ur_exp_tensor_map_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} +UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeTiledExp( + ur_device_handle_t, ur_exp_tensor_map_data_type_flags_t, uint32_t, void *, + const uint64_t *, const uint64_t *, const uint32_t *, const uint32_t *, + ur_exp_tensor_map_interleave_flags_t, ur_exp_tensor_map_swizzle_flags_t, + ur_exp_tensor_map_l2_promotion_flags_t, ur_exp_tensor_map_oob_fill_flags_t, + ur_exp_tensor_map_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/native_cpu/ur_interface_loader.cpp b/source/adapters/native_cpu/ur_interface_loader.cpp index 9717f020c3..55b1e6a568 100644 --- a/source/adapters/native_cpu/ur_interface_loader.cpp +++ b/source/adapters/native_cpu/ur_interface_loader.cpp @@ -418,6 +418,19 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable( return UR_RESULT_SUCCESS; } +UR_APIEXPORT ur_result_t UR_APICALL urGetTensorMapExpProcAddrTable( + ur_api_version_t version, ur_tensor_map_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } + + pDdiTable->pfnEncodeIm2ColExp = urTensorMapEncodeIm2ColExp; + pDdiTable->pfnEncodeTiledExp = urTensorMapEncodeTiledExp; + + return result; +} + UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( ur_api_version_t version, ur_program_exp_dditable_t *pDdiTable) { auto result = validateProcInputs(version, pDdiTable); diff --git a/source/adapters/opencl/CMakeLists.txt b/source/adapters/opencl/CMakeLists.txt index a7e91f75e5..e091012bab 100644 --- a/source/adapters/opencl/CMakeLists.txt +++ b/source/adapters/opencl/CMakeLists.txt @@ -38,6 +38,7 @@ add_ur_adapter(${TARGET_NAME} SHARED ${CMAKE_CURRENT_SOURCE_DIR}/queue.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/usm.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/tensor_map.cpp ${CMAKE_CURRENT_SOURCE_DIR}/usm.cpp ${CMAKE_CURRENT_SOURCE_DIR}/usm_p2p.cpp ${CMAKE_CURRENT_SOURCE_DIR}/virtual_mem.cpp diff --git a/source/adapters/opencl/tensor_map.cpp b/source/adapters/opencl/tensor_map.cpp new file mode 100644 index 0000000000..ea2a009f88 --- /dev/null +++ b/source/adapters/opencl/tensor_map.cpp @@ -0,0 +1,28 @@ +//===--------- tensor_map.cpp - OpenCL Adapter ----------------------------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( + ur_device_handle_t, ur_exp_tensor_map_data_type_flags_t, uint32_t, void *, + const uint64_t *, const uint64_t *, const int *, const int *, uint32_t, + uint32_t, const uint32_t *, ur_exp_tensor_map_interleave_flags_t, + ur_exp_tensor_map_swizzle_flags_t, ur_exp_tensor_map_l2_promotion_flags_t, + ur_exp_tensor_map_oob_fill_flags_t, ur_exp_tensor_map_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} +UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeTiledExp( + ur_device_handle_t, ur_exp_tensor_map_data_type_flags_t, uint32_t, void *, + const uint64_t *, const uint64_t *, const uint32_t *, const uint32_t *, + ur_exp_tensor_map_interleave_flags_t, ur_exp_tensor_map_swizzle_flags_t, + ur_exp_tensor_map_l2_promotion_flags_t, ur_exp_tensor_map_oob_fill_flags_t, + ur_exp_tensor_map_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/opencl/ur_interface_loader.cpp b/source/adapters/opencl/ur_interface_loader.cpp index 46d2bf6cdd..d51c27f6cc 100644 --- a/source/adapters/opencl/ur_interface_loader.cpp +++ b/source/adapters/opencl/ur_interface_loader.cpp @@ -426,6 +426,19 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable( return UR_RESULT_SUCCESS; } +UR_APIEXPORT ur_result_t UR_APICALL urGetTensorMapExpProcAddrTable( + ur_api_version_t version, ur_tensor_map_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } + + pDdiTable->pfnEncodeIm2ColExp = urTensorMapEncodeIm2ColExp; + pDdiTable->pfnEncodeTiledExp = urTensorMapEncodeTiledExp; + + return result; +} + UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( ur_api_version_t version, ur_program_exp_dditable_t *pDdiTable) { auto result = validateProcInputs(version, pDdiTable); diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index 64489c39ac..5c3d67dbdc 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -9221,6 +9221,166 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueNativeCommandExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urTensorMapEncodeIm2ColExp +__urdlllocal ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( + ur_device_handle_t hDevice, ///< [in] Handle of the device object. + ur_exp_tensor_map_data_type_flags_t + TensorMapType, ///< [in] Data type of the tensor object. + uint32_t TensorRank, ///< [in] Dimensionality of tensor; must be at least 3. + void * + GlobalAddress, ///< [in] Starting address of memory region described by tensor. + const uint64_t * + GlobalDim, ///< [in] Array containing tensor size (number of elements) along each of + ///< the TensorRank dimensions. + const uint64_t * + GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the + ///< TensorRank - 1 dimensions. + const int * + PixelBoxLowerCorner, ///< [in] Array containing DHW dimensions of lower box corner. + const int * + PixelBoxUpperCorner, ///< [in] Array containing DHW dimensions of upper box corner. + uint32_t ChannelsPerPixel, ///< [in] Number of channels per pixel. + uint32_t PixelsPerColumn, ///< [in] Number of pixels per column. + const uint32_t * + ElementStrides, ///< [in] Array containing traversal stride in each of the TensorRank + ///< dimensions. + ur_exp_tensor_map_interleave_flags_t + Interleave, ///< [in] Type of interleaved layout the tensor addresses + ur_exp_tensor_map_swizzle_flags_t + Swizzle, ///< [in] Bank swizzling pattern inside shared memory + ur_exp_tensor_map_l2_promotion_flags_t + L2Promotion, ///< [in] L2 promotion size. + ur_exp_tensor_map_oob_fill_flags_t + OobFill, ///< [in] Indicates whether zero or special NaN constant will be used to + ///< fill out-of-bounds elements. + ur_exp_tensor_map_handle_t + *hTensorMap ///< [out] Handle of the tensor map object. +) { + auto pfnEncodeIm2ColExp = + getContext()->urDdiTable.TensorMapExp.pfnEncodeIm2ColExp; + + if (nullptr == pfnEncodeIm2ColExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_tensor_map_encode_im_2_col_exp_params_t params = {&hDevice, + &TensorMapType, + &TensorRank, + &GlobalAddress, + &GlobalDim, + &GlobalStrides, + &PixelBoxLowerCorner, + &PixelBoxUpperCorner, + &ChannelsPerPixel, + &PixelsPerColumn, + &ElementStrides, + &Interleave, + &Swizzle, + &L2Promotion, + &OobFill, + &hTensorMap}; + uint64_t instance = + getContext()->notify_begin(UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP, + "urTensorMapEncodeIm2ColExp", ¶ms); + + auto &logger = getContext()->logger; + logger.info(" ---> urTensorMapEncodeIm2ColExp\n"); + + ur_result_t result = pfnEncodeIm2ColExp( + hDevice, TensorMapType, TensorRank, GlobalAddress, GlobalDim, + GlobalStrides, PixelBoxLowerCorner, PixelBoxUpperCorner, + ChannelsPerPixel, PixelsPerColumn, ElementStrides, Interleave, Swizzle, + L2Promotion, OobFill, hTensorMap); + + getContext()->notify_end(UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP, + "urTensorMapEncodeIm2ColExp", ¶ms, &result, + instance); + + if (logger.getLevel() <= logger::Level::INFO) { + std::ostringstream args_str; + ur::extras::printFunctionParams( + args_str, UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP, ¶ms); + logger.info(" <--- urTensorMapEncodeIm2ColExp({}) -> {};\n", + args_str.str(), result); + } + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urTensorMapEncodeTiledExp +__urdlllocal ur_result_t UR_APICALL urTensorMapEncodeTiledExp( + ur_device_handle_t hDevice, ///< [in] Handle of the device object. + ur_exp_tensor_map_data_type_flags_t + TensorMapType, ///< [in] Data type of the tensor object. + uint32_t TensorRank, ///< [in] Dimensionality of tensor; must be at least 3. + void * + GlobalAddress, ///< [in] Starting address of memory region described by tensor. + const uint64_t * + GlobalDim, ///< [in] Array containing tensor size (number of elements) along each of + ///< the TensorRank dimensions. + const uint64_t * + GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the + ///< TensorRank - 1 dimensions. + const uint32_t * + BoxDim, ///< [in] Array containing traversal box size (number of elments) along + ///< each of the TensorRank dimensions. Specifies how many elements to be + ///< traversed along each tensor dimension. + const uint32_t * + ElementStrides, ///< [in] Array containing traversal stride in each of the TensorRank + ///< dimensions. + ur_exp_tensor_map_interleave_flags_t + Interleave, ///< [in] Type of interleaved layout the tensor addresses + ur_exp_tensor_map_swizzle_flags_t + Swizzle, ///< [in] Bank swizzling pattern inside shared memory + ur_exp_tensor_map_l2_promotion_flags_t + L2Promotion, ///< [in] L2 promotion size. + ur_exp_tensor_map_oob_fill_flags_t + OobFill, ///< [in] Indicates whether zero or special NaN constant will be used to + ///< fill out-of-bounds elements. + ur_exp_tensor_map_handle_t + *hTensorMap ///< [out] Handle of the tensor map object. +) { + auto pfnEncodeTiledExp = + getContext()->urDdiTable.TensorMapExp.pfnEncodeTiledExp; + + if (nullptr == pfnEncodeTiledExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_tensor_map_encode_tiled_exp_params_t params = { + &hDevice, &TensorMapType, &TensorRank, &GlobalAddress, + &GlobalDim, &GlobalStrides, &BoxDim, &ElementStrides, + &Interleave, &Swizzle, &L2Promotion, &OobFill, + &hTensorMap}; + uint64_t instance = + getContext()->notify_begin(UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP, + "urTensorMapEncodeTiledExp", ¶ms); + + auto &logger = getContext()->logger; + logger.info(" ---> urTensorMapEncodeTiledExp\n"); + + ur_result_t result = pfnEncodeTiledExp( + hDevice, TensorMapType, TensorRank, GlobalAddress, GlobalDim, + GlobalStrides, BoxDim, ElementStrides, Interleave, Swizzle, L2Promotion, + OobFill, hTensorMap); + + getContext()->notify_end(UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP, + "urTensorMapEncodeTiledExp", ¶ms, &result, + instance); + + if (logger.getLevel() <= logger::Level::INFO) { + std::ostringstream args_str; + ur::extras::printFunctionParams( + args_str, UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP, ¶ms); + logger.info(" <--- urTensorMapEncodeTiledExp({}) -> {};\n", + args_str.str(), result); + } + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Global table /// with current process' addresses @@ -10266,6 +10426,43 @@ __urdlllocal ur_result_t UR_APICALL urGetSamplerProcAddrTable( return result; } /////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's TensorMapExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +__urdlllocal ur_result_t UR_APICALL urGetTensorMapExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_tensor_map_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + auto &dditable = ur_tracing_layer::getContext()->urDdiTable.TensorMapExp; + + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (UR_MAJOR_VERSION(ur_tracing_layer::getContext()->version) != + UR_MAJOR_VERSION(version) || + UR_MINOR_VERSION(ur_tracing_layer::getContext()->version) > + UR_MINOR_VERSION(version)) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + ur_result_t result = UR_RESULT_SUCCESS; + + dditable.pfnEncodeIm2ColExp = pDdiTable->pfnEncodeIm2ColExp; + pDdiTable->pfnEncodeIm2ColExp = + ur_tracing_layer::urTensorMapEncodeIm2ColExp; + + dditable.pfnEncodeTiledExp = pDdiTable->pfnEncodeTiledExp; + pDdiTable->pfnEncodeTiledExp = ur_tracing_layer::urTensorMapEncodeTiledExp; + + return result; +} +/////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's USM table /// with current process' addresses /// @@ -10610,6 +10807,11 @@ ur_result_t context_t::init(ur_dditable_t *dditable, UR_API_VERSION_CURRENT, &dditable->Sampler); } + if (UR_RESULT_SUCCESS == result) { + result = ur_tracing_layer::urGetTensorMapExpProcAddrTable( + UR_API_VERSION_CURRENT, &dditable->TensorMapExp); + } + if (UR_RESULT_SUCCESS == result) { result = ur_tracing_layer::urGetUSMProcAddrTable(UR_API_VERSION_CURRENT, &dditable->USM); diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index b3969de10f..1701ee4725 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -10274,6 +10274,229 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueNativeCommandExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urTensorMapEncodeIm2ColExp +__urdlllocal ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( + ur_device_handle_t hDevice, ///< [in] Handle of the device object. + ur_exp_tensor_map_data_type_flags_t + TensorMapType, ///< [in] Data type of the tensor object. + uint32_t TensorRank, ///< [in] Dimensionality of tensor; must be at least 3. + void * + GlobalAddress, ///< [in] Starting address of memory region described by tensor. + const uint64_t * + GlobalDim, ///< [in] Array containing tensor size (number of elements) along each of + ///< the TensorRank dimensions. + const uint64_t * + GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the + ///< TensorRank - 1 dimensions. + const int * + PixelBoxLowerCorner, ///< [in] Array containing DHW dimensions of lower box corner. + const int * + PixelBoxUpperCorner, ///< [in] Array containing DHW dimensions of upper box corner. + uint32_t ChannelsPerPixel, ///< [in] Number of channels per pixel. + uint32_t PixelsPerColumn, ///< [in] Number of pixels per column. + const uint32_t * + ElementStrides, ///< [in] Array containing traversal stride in each of the TensorRank + ///< dimensions. + ur_exp_tensor_map_interleave_flags_t + Interleave, ///< [in] Type of interleaved layout the tensor addresses + ur_exp_tensor_map_swizzle_flags_t + Swizzle, ///< [in] Bank swizzling pattern inside shared memory + ur_exp_tensor_map_l2_promotion_flags_t + L2Promotion, ///< [in] L2 promotion size. + ur_exp_tensor_map_oob_fill_flags_t + OobFill, ///< [in] Indicates whether zero or special NaN constant will be used to + ///< fill out-of-bounds elements. + ur_exp_tensor_map_handle_t + *hTensorMap ///< [out] Handle of the tensor map object. +) { + auto pfnEncodeIm2ColExp = + getContext()->urDdiTable.TensorMapExp.pfnEncodeIm2ColExp; + + if (nullptr == pfnEncodeIm2ColExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (getContext()->enableParameterValidation) { + if (NULL == hDevice) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == GlobalAddress) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL == GlobalDim) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL == GlobalStrides) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL == PixelBoxLowerCorner) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL == PixelBoxUpperCorner) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL == ElementStrides) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL == hTensorMap) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (UR_EXP_TENSOR_MAP_DATA_TYPE_FLAGS_MASK & TensorMapType) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (UR_EXP_TENSOR_MAP_INTERLEAVE_FLAGS_MASK & Interleave) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (UR_EXP_TENSOR_MAP_SWIZZLE_FLAGS_MASK & Swizzle) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAGS_MASK & L2Promotion) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (UR_EXP_TENSOR_MAP_OOB_FILL_FLAGS_MASK & OobFill) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (TensorRank < 3) { + return UR_RESULT_ERROR_INVALID_ARGUMENT; + } + } + + if (getContext()->enableLifetimeValidation && + !getContext()->refCountContext->isReferenceValid(hDevice)) { + getContext()->refCountContext->logInvalidReference(hDevice); + } + + ur_result_t result = pfnEncodeIm2ColExp( + hDevice, TensorMapType, TensorRank, GlobalAddress, GlobalDim, + GlobalStrides, PixelBoxLowerCorner, PixelBoxUpperCorner, + ChannelsPerPixel, PixelsPerColumn, ElementStrides, Interleave, Swizzle, + L2Promotion, OobFill, hTensorMap); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urTensorMapEncodeTiledExp +__urdlllocal ur_result_t UR_APICALL urTensorMapEncodeTiledExp( + ur_device_handle_t hDevice, ///< [in] Handle of the device object. + ur_exp_tensor_map_data_type_flags_t + TensorMapType, ///< [in] Data type of the tensor object. + uint32_t TensorRank, ///< [in] Dimensionality of tensor; must be at least 3. + void * + GlobalAddress, ///< [in] Starting address of memory region described by tensor. + const uint64_t * + GlobalDim, ///< [in] Array containing tensor size (number of elements) along each of + ///< the TensorRank dimensions. + const uint64_t * + GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the + ///< TensorRank - 1 dimensions. + const uint32_t * + BoxDim, ///< [in] Array containing traversal box size (number of elments) along + ///< each of the TensorRank dimensions. Specifies how many elements to be + ///< traversed along each tensor dimension. + const uint32_t * + ElementStrides, ///< [in] Array containing traversal stride in each of the TensorRank + ///< dimensions. + ur_exp_tensor_map_interleave_flags_t + Interleave, ///< [in] Type of interleaved layout the tensor addresses + ur_exp_tensor_map_swizzle_flags_t + Swizzle, ///< [in] Bank swizzling pattern inside shared memory + ur_exp_tensor_map_l2_promotion_flags_t + L2Promotion, ///< [in] L2 promotion size. + ur_exp_tensor_map_oob_fill_flags_t + OobFill, ///< [in] Indicates whether zero or special NaN constant will be used to + ///< fill out-of-bounds elements. + ur_exp_tensor_map_handle_t + *hTensorMap ///< [out] Handle of the tensor map object. +) { + auto pfnEncodeTiledExp = + getContext()->urDdiTable.TensorMapExp.pfnEncodeTiledExp; + + if (nullptr == pfnEncodeTiledExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (getContext()->enableParameterValidation) { + if (NULL == hDevice) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == GlobalAddress) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL == GlobalDim) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL == GlobalStrides) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL == BoxDim) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL == ElementStrides) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL == hTensorMap) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (UR_EXP_TENSOR_MAP_DATA_TYPE_FLAGS_MASK & TensorMapType) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (UR_EXP_TENSOR_MAP_INTERLEAVE_FLAGS_MASK & Interleave) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (UR_EXP_TENSOR_MAP_SWIZZLE_FLAGS_MASK & Swizzle) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAGS_MASK & L2Promotion) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (UR_EXP_TENSOR_MAP_OOB_FILL_FLAGS_MASK & OobFill) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (TensorRank < 3) { + return UR_RESULT_ERROR_INVALID_ARGUMENT; + } + } + + if (getContext()->enableLifetimeValidation && + !getContext()->refCountContext->isReferenceValid(hDevice)) { + getContext()->refCountContext->logInvalidReference(hDevice); + } + + ur_result_t result = pfnEncodeTiledExp( + hDevice, TensorMapType, TensorRank, GlobalAddress, GlobalDim, + GlobalStrides, BoxDim, ElementStrides, Interleave, Swizzle, L2Promotion, + OobFill, hTensorMap); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Global table /// with current process' addresses @@ -11344,6 +11567,45 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's TensorMapExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL urGetTensorMapExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_tensor_map_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + auto &dditable = ur_validation_layer::getContext()->urDdiTable.TensorMapExp; + + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (UR_MAJOR_VERSION(ur_validation_layer::getContext()->version) != + UR_MAJOR_VERSION(version) || + UR_MINOR_VERSION(ur_validation_layer::getContext()->version) > + UR_MINOR_VERSION(version)) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + ur_result_t result = UR_RESULT_SUCCESS; + + dditable.pfnEncodeIm2ColExp = pDdiTable->pfnEncodeIm2ColExp; + pDdiTable->pfnEncodeIm2ColExp = + ur_validation_layer::urTensorMapEncodeIm2ColExp; + + dditable.pfnEncodeTiledExp = pDdiTable->pfnEncodeTiledExp; + pDdiTable->pfnEncodeTiledExp = + ur_validation_layer::urTensorMapEncodeTiledExp; + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's USM table /// with current process' addresses @@ -11711,6 +11973,11 @@ ur_result_t context_t::init(ur_dditable_t *dditable, UR_API_VERSION_CURRENT, &dditable->Sampler); } + if (UR_RESULT_SUCCESS == result) { + result = ur_validation_layer::urGetTensorMapExpProcAddrTable( + UR_API_VERSION_CURRENT, &dditable->TensorMapExp); + } + if (UR_RESULT_SUCCESS == result) { result = ur_validation_layer::urGetUSMProcAddrTable( UR_API_VERSION_CURRENT, &dditable->USM); diff --git a/source/loader/loader.def.in b/source/loader/loader.def.in index a336da153d..5ca6d99113 100644 --- a/source/loader/loader.def.in +++ b/source/loader/loader.def.in @@ -119,6 +119,7 @@ EXPORTS urGetProgramProcAddrTable urGetQueueProcAddrTable urGetSamplerProcAddrTable + urGetTensorMapExpProcAddrTable urGetUSMExpProcAddrTable urGetUSMProcAddrTable urGetUsmP2PExpProcAddrTable @@ -331,6 +332,11 @@ EXPORTS urPrintExpSamplerCubemapFilterMode urPrintExpSamplerCubemapProperties urPrintExpSamplerMipProperties + urPrintExpTensorMapDataTypeFlags + urPrintExpTensorMapInterleaveFlags + urPrintExpTensorMapL2PromotionFlags + urPrintExpTensorMapOobFillFlags + urPrintExpTensorMapSwizzleFlags urPrintExpWin32Handle urPrintFunction urPrintFunctionParams @@ -465,6 +471,8 @@ EXPORTS urPrintSamplerRetainParams urPrintSpecializationConstantInfo urPrintStructureType + urPrintTensorMapEncodeIm_2ColExpParams + urPrintTensorMapEncodeTiledExpParams urPrintUsmAdviceFlags urPrintUsmAllocInfo urPrintUsmAllocLocationDesc @@ -535,6 +543,8 @@ EXPORTS urSamplerGetNativeHandle urSamplerRelease urSamplerRetain + urTensorMapEncodeIm2ColExp + urTensorMapEncodeTiledExp urUSMDeviceAlloc urUSMFree urUSMGetMemAllocInfo diff --git a/source/loader/loader.map.in b/source/loader/loader.map.in index 59a8a8d107..706d28dd01 100644 --- a/source/loader/loader.map.in +++ b/source/loader/loader.map.in @@ -119,6 +119,7 @@ urGetProgramProcAddrTable; urGetQueueProcAddrTable; urGetSamplerProcAddrTable; + urGetTensorMapExpProcAddrTable; urGetUSMExpProcAddrTable; urGetUSMProcAddrTable; urGetUsmP2PExpProcAddrTable; @@ -331,6 +332,11 @@ urPrintExpSamplerCubemapFilterMode; urPrintExpSamplerCubemapProperties; urPrintExpSamplerMipProperties; + urPrintExpTensorMapDataTypeFlags; + urPrintExpTensorMapInterleaveFlags; + urPrintExpTensorMapL2PromotionFlags; + urPrintExpTensorMapOobFillFlags; + urPrintExpTensorMapSwizzleFlags; urPrintExpWin32Handle; urPrintFunction; urPrintFunctionParams; @@ -465,6 +471,8 @@ urPrintSamplerRetainParams; urPrintSpecializationConstantInfo; urPrintStructureType; + urPrintTensorMapEncodeIm_2ColExpParams; + urPrintTensorMapEncodeTiledExpParams; urPrintUsmAdviceFlags; urPrintUsmAllocInfo; urPrintUsmAllocLocationDesc; @@ -535,6 +543,8 @@ urSamplerGetNativeHandle; urSamplerRelease; urSamplerRetain; + urTensorMapEncodeIm2ColExp; + urTensorMapEncodeTiledExp; urUSMDeviceAlloc; urUSMFree; urUSMGetMemAllocInfo; diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index 86a6ad95a0..d152e63dc8 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -9364,6 +9364,149 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueNativeCommandExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urTensorMapEncodeIm2ColExp +__urdlllocal ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( + ur_device_handle_t hDevice, ///< [in] Handle of the device object. + ur_exp_tensor_map_data_type_flags_t + TensorMapType, ///< [in] Data type of the tensor object. + uint32_t TensorRank, ///< [in] Dimensionality of tensor; must be at least 3. + void * + GlobalAddress, ///< [in] Starting address of memory region described by tensor. + const uint64_t * + GlobalDim, ///< [in] Array containing tensor size (number of elements) along each of + ///< the TensorRank dimensions. + const uint64_t * + GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the + ///< TensorRank - 1 dimensions. + const int * + PixelBoxLowerCorner, ///< [in] Array containing DHW dimensions of lower box corner. + const int * + PixelBoxUpperCorner, ///< [in] Array containing DHW dimensions of upper box corner. + uint32_t ChannelsPerPixel, ///< [in] Number of channels per pixel. + uint32_t PixelsPerColumn, ///< [in] Number of pixels per column. + const uint32_t * + ElementStrides, ///< [in] Array containing traversal stride in each of the TensorRank + ///< dimensions. + ur_exp_tensor_map_interleave_flags_t + Interleave, ///< [in] Type of interleaved layout the tensor addresses + ur_exp_tensor_map_swizzle_flags_t + Swizzle, ///< [in] Bank swizzling pattern inside shared memory + ur_exp_tensor_map_l2_promotion_flags_t + L2Promotion, ///< [in] L2 promotion size. + ur_exp_tensor_map_oob_fill_flags_t + OobFill, ///< [in] Indicates whether zero or special NaN constant will be used to + ///< fill out-of-bounds elements. + ur_exp_tensor_map_handle_t + *hTensorMap ///< [out] Handle of the tensor map object. +) { + ur_result_t result = UR_RESULT_SUCCESS; + + [[maybe_unused]] auto context = getContext(); + + // extract platform's function pointer table + auto dditable = reinterpret_cast(hDevice)->dditable; + auto pfnEncodeIm2ColExp = dditable->ur.TensorMapExp.pfnEncodeIm2ColExp; + if (nullptr == pfnEncodeIm2ColExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hDevice = reinterpret_cast(hDevice)->handle; + + // forward to device-platform + result = pfnEncodeIm2ColExp( + hDevice, TensorMapType, TensorRank, GlobalAddress, GlobalDim, + GlobalStrides, PixelBoxLowerCorner, PixelBoxUpperCorner, + ChannelsPerPixel, PixelsPerColumn, ElementStrides, Interleave, Swizzle, + L2Promotion, OobFill, hTensorMap); + + if (UR_RESULT_SUCCESS != result) { + return result; + } + + try { + // convert platform handle to loader handle + *hTensorMap = reinterpret_cast( + context->factories.ur_exp_tensor_map_factory.getInstance( + *hTensorMap, dditable)); + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urTensorMapEncodeTiledExp +__urdlllocal ur_result_t UR_APICALL urTensorMapEncodeTiledExp( + ur_device_handle_t hDevice, ///< [in] Handle of the device object. + ur_exp_tensor_map_data_type_flags_t + TensorMapType, ///< [in] Data type of the tensor object. + uint32_t TensorRank, ///< [in] Dimensionality of tensor; must be at least 3. + void * + GlobalAddress, ///< [in] Starting address of memory region described by tensor. + const uint64_t * + GlobalDim, ///< [in] Array containing tensor size (number of elements) along each of + ///< the TensorRank dimensions. + const uint64_t * + GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the + ///< TensorRank - 1 dimensions. + const uint32_t * + BoxDim, ///< [in] Array containing traversal box size (number of elments) along + ///< each of the TensorRank dimensions. Specifies how many elements to be + ///< traversed along each tensor dimension. + const uint32_t * + ElementStrides, ///< [in] Array containing traversal stride in each of the TensorRank + ///< dimensions. + ur_exp_tensor_map_interleave_flags_t + Interleave, ///< [in] Type of interleaved layout the tensor addresses + ur_exp_tensor_map_swizzle_flags_t + Swizzle, ///< [in] Bank swizzling pattern inside shared memory + ur_exp_tensor_map_l2_promotion_flags_t + L2Promotion, ///< [in] L2 promotion size. + ur_exp_tensor_map_oob_fill_flags_t + OobFill, ///< [in] Indicates whether zero or special NaN constant will be used to + ///< fill out-of-bounds elements. + ur_exp_tensor_map_handle_t + *hTensorMap ///< [out] Handle of the tensor map object. +) { + ur_result_t result = UR_RESULT_SUCCESS; + + [[maybe_unused]] auto context = getContext(); + + // extract platform's function pointer table + auto dditable = reinterpret_cast(hDevice)->dditable; + auto pfnEncodeTiledExp = dditable->ur.TensorMapExp.pfnEncodeTiledExp; + if (nullptr == pfnEncodeTiledExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hDevice = reinterpret_cast(hDevice)->handle; + + // forward to device-platform + result = pfnEncodeTiledExp(hDevice, TensorMapType, TensorRank, + GlobalAddress, GlobalDim, GlobalStrides, BoxDim, + ElementStrides, Interleave, Swizzle, L2Promotion, + OobFill, hTensorMap); + + if (UR_RESULT_SUCCESS != result) { + return result; + } + + try { + // convert platform handle to loader handle + *hTensorMap = reinterpret_cast( + context->factories.ur_exp_tensor_map_factory.getInstance( + *hTensorMap, dditable)); + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + return result; +} + } // namespace ur_loader #if defined(__cplusplus) @@ -10524,6 +10667,68 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's TensorMapExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL urGetTensorMapExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_tensor_map_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (ur_loader::getContext()->version < version) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + ur_result_t result = UR_RESULT_SUCCESS; + + // Load the device-platform DDI tables + for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + + if (platform.initStatus != UR_RESULT_SUCCESS) { + continue; + } + auto getTable = reinterpret_cast( + ur_loader::LibLoader::getFunctionPtr( + platform.handle.get(), "urGetTensorMapExpProcAddrTable")); + if (!getTable) { + continue; + } + platform.initStatus = + getTable(version, &platform.dditable.ur.TensorMapExp); + } + + if (UR_RESULT_SUCCESS == result) { + if (ur_loader::getContext()->platforms.size() != 1 || + ur_loader::getContext()->forceIntercept) { + // return pointers to loader's DDIs + pDdiTable->pfnEncodeIm2ColExp = + ur_loader::urTensorMapEncodeIm2ColExp; + pDdiTable->pfnEncodeTiledExp = ur_loader::urTensorMapEncodeTiledExp; + } else { + // return pointers directly to platform's DDIs + *pDdiTable = ur_loader::getContext() + ->platforms.front() + .dditable.ur.TensorMapExp; + } + } + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's USM table /// with current process' addresses diff --git a/source/loader/ur_ldrddi.hpp b/source/loader/ur_ldrddi.hpp index f748500c73..309fb6cc65 100644 --- a/source/loader/ur_ldrddi.hpp +++ b/source/loader/ur_ldrddi.hpp @@ -87,6 +87,10 @@ using ur_exp_command_buffer_command_factory_t = singleton_factory_t; +using ur_exp_tensor_map_object_t = object_t; +using ur_exp_tensor_map_factory_t = + singleton_factory_t; + struct handle_factories { ur_adapter_factory_t ur_adapter_factory; ur_platform_factory_t ur_platform_factory; @@ -105,6 +109,7 @@ struct handle_factories { ur_exp_command_buffer_factory_t ur_exp_command_buffer_factory; ur_exp_command_buffer_command_factory_t ur_exp_command_buffer_command_factory; + ur_exp_tensor_map_factory_t ur_exp_tensor_map_factory; }; } // namespace ur_loader diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index 3340363737..d83ec2e829 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -9551,4 +9551,156 @@ ur_result_t UR_APICALL urEnqueueNativeCommandExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Encode tensor map with image data +/// +/// @details +/// - Map encode using im2col. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_EXP_TENSOR_MAP_DATA_TYPE_FLAGS_MASK & TensorMapType` +/// + `::UR_EXP_TENSOR_MAP_INTERLEAVE_FLAGS_MASK & Interleave` +/// + `::UR_EXP_TENSOR_MAP_SWIZZLE_FLAGS_MASK & Swizzle` +/// + `::UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAGS_MASK & L2Promotion` +/// + `::UR_EXP_TENSOR_MAP_OOB_FILL_FLAGS_MASK & OobFill` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == GlobalAddress` +/// + `NULL == GlobalDim` +/// + `NULL == GlobalStrides` +/// + `NULL == PixelBoxLowerCorner` +/// + `NULL == PixelBoxUpperCorner` +/// + `NULL == ElementStrides` +/// + `NULL == hTensorMap` +/// - ::UR_RESULT_ERROR_INVALID_ARGUMENT +/// + `TensorRank < 3` +ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( + ur_device_handle_t hDevice, ///< [in] Handle of the device object. + ur_exp_tensor_map_data_type_flags_t + TensorMapType, ///< [in] Data type of the tensor object. + uint32_t TensorRank, ///< [in] Dimensionality of tensor; must be at least 3. + void * + GlobalAddress, ///< [in] Starting address of memory region described by tensor. + const uint64_t * + GlobalDim, ///< [in] Array containing tensor size (number of elements) along each of + ///< the TensorRank dimensions. + const uint64_t * + GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the + ///< TensorRank - 1 dimensions. + const int * + PixelBoxLowerCorner, ///< [in] Array containing DHW dimensions of lower box corner. + const int * + PixelBoxUpperCorner, ///< [in] Array containing DHW dimensions of upper box corner. + uint32_t ChannelsPerPixel, ///< [in] Number of channels per pixel. + uint32_t PixelsPerColumn, ///< [in] Number of pixels per column. + const uint32_t * + ElementStrides, ///< [in] Array containing traversal stride in each of the TensorRank + ///< dimensions. + ur_exp_tensor_map_interleave_flags_t + Interleave, ///< [in] Type of interleaved layout the tensor addresses + ur_exp_tensor_map_swizzle_flags_t + Swizzle, ///< [in] Bank swizzling pattern inside shared memory + ur_exp_tensor_map_l2_promotion_flags_t + L2Promotion, ///< [in] L2 promotion size. + ur_exp_tensor_map_oob_fill_flags_t + OobFill, ///< [in] Indicates whether zero or special NaN constant will be used to + ///< fill out-of-bounds elements. + ur_exp_tensor_map_handle_t + *hTensorMap ///< [out] Handle of the tensor map object. + ) try { + auto pfnEncodeIm2ColExp = + ur_lib::getContext()->urDdiTable.TensorMapExp.pfnEncodeIm2ColExp; + if (nullptr == pfnEncodeIm2ColExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnEncodeIm2ColExp(hDevice, TensorMapType, TensorRank, GlobalAddress, + GlobalDim, GlobalStrides, PixelBoxLowerCorner, + PixelBoxUpperCorner, ChannelsPerPixel, + PixelsPerColumn, ElementStrides, Interleave, + Swizzle, L2Promotion, OobFill, hTensorMap); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Encode tensor map with tiled data +/// +/// @details +/// - Tiled map encode. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_EXP_TENSOR_MAP_DATA_TYPE_FLAGS_MASK & TensorMapType` +/// + `::UR_EXP_TENSOR_MAP_INTERLEAVE_FLAGS_MASK & Interleave` +/// + `::UR_EXP_TENSOR_MAP_SWIZZLE_FLAGS_MASK & Swizzle` +/// + `::UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAGS_MASK & L2Promotion` +/// + `::UR_EXP_TENSOR_MAP_OOB_FILL_FLAGS_MASK & OobFill` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == GlobalAddress` +/// + `NULL == GlobalDim` +/// + `NULL == GlobalStrides` +/// + `NULL == BoxDim` +/// + `NULL == ElementStrides` +/// + `NULL == hTensorMap` +/// - ::UR_RESULT_ERROR_INVALID_ARGUMENT +/// + `TensorRank < 3` +ur_result_t UR_APICALL urTensorMapEncodeTiledExp( + ur_device_handle_t hDevice, ///< [in] Handle of the device object. + ur_exp_tensor_map_data_type_flags_t + TensorMapType, ///< [in] Data type of the tensor object. + uint32_t TensorRank, ///< [in] Dimensionality of tensor; must be at least 3. + void * + GlobalAddress, ///< [in] Starting address of memory region described by tensor. + const uint64_t * + GlobalDim, ///< [in] Array containing tensor size (number of elements) along each of + ///< the TensorRank dimensions. + const uint64_t * + GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the + ///< TensorRank - 1 dimensions. + const uint32_t * + BoxDim, ///< [in] Array containing traversal box size (number of elments) along + ///< each of the TensorRank dimensions. Specifies how many elements to be + ///< traversed along each tensor dimension. + const uint32_t * + ElementStrides, ///< [in] Array containing traversal stride in each of the TensorRank + ///< dimensions. + ur_exp_tensor_map_interleave_flags_t + Interleave, ///< [in] Type of interleaved layout the tensor addresses + ur_exp_tensor_map_swizzle_flags_t + Swizzle, ///< [in] Bank swizzling pattern inside shared memory + ur_exp_tensor_map_l2_promotion_flags_t + L2Promotion, ///< [in] L2 promotion size. + ur_exp_tensor_map_oob_fill_flags_t + OobFill, ///< [in] Indicates whether zero or special NaN constant will be used to + ///< fill out-of-bounds elements. + ur_exp_tensor_map_handle_t + *hTensorMap ///< [out] Handle of the tensor map object. + ) try { + auto pfnEncodeTiledExp = + ur_lib::getContext()->urDdiTable.TensorMapExp.pfnEncodeTiledExp; + if (nullptr == pfnEncodeTiledExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnEncodeTiledExp(hDevice, TensorMapType, TensorRank, GlobalAddress, + GlobalDim, GlobalStrides, BoxDim, ElementStrides, + Interleave, Swizzle, L2Promotion, OobFill, + hTensorMap); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + } // extern "C" diff --git a/source/loader/ur_libddi.cpp b/source/loader/ur_libddi.cpp index 4d88bb2044..910cbfe607 100644 --- a/source/loader/ur_libddi.cpp +++ b/source/loader/ur_libddi.cpp @@ -99,6 +99,11 @@ __urdlllocal ur_result_t context_t::ddiInit() { &urDdiTable.Sampler); } + if (UR_RESULT_SUCCESS == result) { + result = urGetTensorMapExpProcAddrTable(UR_API_VERSION_CURRENT, + &urDdiTable.TensorMapExp); + } + if (UR_RESULT_SUCCESS == result) { result = urGetUSMProcAddrTable(UR_API_VERSION_CURRENT, &urDdiTable.USM); } diff --git a/source/loader/ur_print.cpp b/source/loader/ur_print.cpp index 6b1cbfd5ee..690f562af4 100644 --- a/source/loader/ur_print.cpp +++ b/source/loader/ur_print.cpp @@ -1109,6 +1109,49 @@ ur_result_t urPrintExpEnqueueNativeCommandProperties( return str_copy(&ss, buffer, buff_size, out_size); } +ur_result_t +urPrintExpTensorMapDataTypeFlags(enum ur_exp_tensor_map_data_type_flag_t value, + char *buffer, const size_t buff_size, + size_t *out_size) { + std::stringstream ss; + ss << value; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t urPrintExpTensorMapInterleaveFlags( + enum ur_exp_tensor_map_interleave_flag_t value, char *buffer, + const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << value; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t urPrintExpTensorMapL2PromotionFlags( + enum ur_exp_tensor_map_l2_promotion_flag_t value, char *buffer, + const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << value; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t +urPrintExpTensorMapSwizzleFlags(enum ur_exp_tensor_map_swizzle_flag_t value, + char *buffer, const size_t buff_size, + size_t *out_size) { + std::stringstream ss; + ss << value; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t +urPrintExpTensorMapOobFillFlags(enum ur_exp_tensor_map_oob_fill_flag_t value, + char *buffer, const size_t buff_size, + size_t *out_size) { + std::stringstream ss; + ss << value; + return str_copy(&ss, buffer, buff_size, out_size); +} + ur_result_t urPrintAdapterGetParams(const struct ur_adapter_get_params_t *params, char *buffer, const size_t buff_size, @@ -2508,6 +2551,22 @@ ur_result_t urPrintSamplerCreateWithNativeHandleParams( return str_copy(&ss, buffer, buff_size, out_size); } +ur_result_t urPrintTensorMapEncodeIm_2ColExpParams( + const struct ur_tensor_map_encode_im_2_col_exp_params_t *params, + char *buffer, const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t urPrintTensorMapEncodeTiledExpParams( + const struct ur_tensor_map_encode_tiled_exp_params_t *params, char *buffer, + const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + ur_result_t urPrintUsmHostAllocParams(const struct ur_usm_host_alloc_params_t *params, char *buffer, const size_t buff_size, diff --git a/source/ur_api.cpp b/source/ur_api.cpp index 853d61472e..7be7628651 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -8099,3 +8099,134 @@ ur_result_t UR_APICALL urEnqueueNativeCommandExp( ur_result_t result = UR_RESULT_SUCCESS; return result; } + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Encode tensor map with image data +/// +/// @details +/// - Map encode using im2col. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_EXP_TENSOR_MAP_DATA_TYPE_FLAGS_MASK & TensorMapType` +/// + `::UR_EXP_TENSOR_MAP_INTERLEAVE_FLAGS_MASK & Interleave` +/// + `::UR_EXP_TENSOR_MAP_SWIZZLE_FLAGS_MASK & Swizzle` +/// + `::UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAGS_MASK & L2Promotion` +/// + `::UR_EXP_TENSOR_MAP_OOB_FILL_FLAGS_MASK & OobFill` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == GlobalAddress` +/// + `NULL == GlobalDim` +/// + `NULL == GlobalStrides` +/// + `NULL == PixelBoxLowerCorner` +/// + `NULL == PixelBoxUpperCorner` +/// + `NULL == ElementStrides` +/// + `NULL == hTensorMap` +/// - ::UR_RESULT_ERROR_INVALID_ARGUMENT +/// + `TensorRank < 3` +ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( + ur_device_handle_t hDevice, ///< [in] Handle of the device object. + ur_exp_tensor_map_data_type_flags_t + TensorMapType, ///< [in] Data type of the tensor object. + uint32_t TensorRank, ///< [in] Dimensionality of tensor; must be at least 3. + void * + GlobalAddress, ///< [in] Starting address of memory region described by tensor. + const uint64_t * + GlobalDim, ///< [in] Array containing tensor size (number of elements) along each of + ///< the TensorRank dimensions. + const uint64_t * + GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the + ///< TensorRank - 1 dimensions. + const int * + PixelBoxLowerCorner, ///< [in] Array containing DHW dimensions of lower box corner. + const int * + PixelBoxUpperCorner, ///< [in] Array containing DHW dimensions of upper box corner. + uint32_t ChannelsPerPixel, ///< [in] Number of channels per pixel. + uint32_t PixelsPerColumn, ///< [in] Number of pixels per column. + const uint32_t * + ElementStrides, ///< [in] Array containing traversal stride in each of the TensorRank + ///< dimensions. + ur_exp_tensor_map_interleave_flags_t + Interleave, ///< [in] Type of interleaved layout the tensor addresses + ur_exp_tensor_map_swizzle_flags_t + Swizzle, ///< [in] Bank swizzling pattern inside shared memory + ur_exp_tensor_map_l2_promotion_flags_t + L2Promotion, ///< [in] L2 promotion size. + ur_exp_tensor_map_oob_fill_flags_t + OobFill, ///< [in] Indicates whether zero or special NaN constant will be used to + ///< fill out-of-bounds elements. + ur_exp_tensor_map_handle_t + *hTensorMap ///< [out] Handle of the tensor map object. +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Encode tensor map with tiled data +/// +/// @details +/// - Tiled map encode. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_EXP_TENSOR_MAP_DATA_TYPE_FLAGS_MASK & TensorMapType` +/// + `::UR_EXP_TENSOR_MAP_INTERLEAVE_FLAGS_MASK & Interleave` +/// + `::UR_EXP_TENSOR_MAP_SWIZZLE_FLAGS_MASK & Swizzle` +/// + `::UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAGS_MASK & L2Promotion` +/// + `::UR_EXP_TENSOR_MAP_OOB_FILL_FLAGS_MASK & OobFill` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == GlobalAddress` +/// + `NULL == GlobalDim` +/// + `NULL == GlobalStrides` +/// + `NULL == BoxDim` +/// + `NULL == ElementStrides` +/// + `NULL == hTensorMap` +/// - ::UR_RESULT_ERROR_INVALID_ARGUMENT +/// + `TensorRank < 3` +ur_result_t UR_APICALL urTensorMapEncodeTiledExp( + ur_device_handle_t hDevice, ///< [in] Handle of the device object. + ur_exp_tensor_map_data_type_flags_t + TensorMapType, ///< [in] Data type of the tensor object. + uint32_t TensorRank, ///< [in] Dimensionality of tensor; must be at least 3. + void * + GlobalAddress, ///< [in] Starting address of memory region described by tensor. + const uint64_t * + GlobalDim, ///< [in] Array containing tensor size (number of elements) along each of + ///< the TensorRank dimensions. + const uint64_t * + GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the + ///< TensorRank - 1 dimensions. + const uint32_t * + BoxDim, ///< [in] Array containing traversal box size (number of elments) along + ///< each of the TensorRank dimensions. Specifies how many elements to be + ///< traversed along each tensor dimension. + const uint32_t * + ElementStrides, ///< [in] Array containing traversal stride in each of the TensorRank + ///< dimensions. + ur_exp_tensor_map_interleave_flags_t + Interleave, ///< [in] Type of interleaved layout the tensor addresses + ur_exp_tensor_map_swizzle_flags_t + Swizzle, ///< [in] Bank swizzling pattern inside shared memory + ur_exp_tensor_map_l2_promotion_flags_t + L2Promotion, ///< [in] L2 promotion size. + ur_exp_tensor_map_oob_fill_flags_t + OobFill, ///< [in] Indicates whether zero or special NaN constant will be used to + ///< fill out-of-bounds elements. + ur_exp_tensor_map_handle_t + *hTensorMap ///< [out] Handle of the tensor map object. +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +}