Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
246 changes: 246 additions & 0 deletions include/ur_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,8 @@ typedef enum ur_function_t {
UR_FUNCTION_COMMAND_BUFFER_UPDATE_WAIT_EVENTS_EXP = 244, ///< Enumerator for ::urCommandBufferUpdateWaitEventsExp
UR_FUNCTION_BINDLESS_IMAGES_MAP_EXTERNAL_LINEAR_MEMORY_EXP = 245, ///< Enumerator for ::urBindlessImagesMapExternalLinearMemoryExp
UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT = 246, ///< Enumerator for ::urEnqueueEventsWaitWithBarrierExt
UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP = 247, ///< Enumerator for ::urTensorMapEncodeIm2ColExp
UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP = 248, ///< Enumerator for ::urTensorMapEncodeTiledExp
/// @cond
UR_FUNCTION_FORCE_UINT32 = 0x7fffffff
/// @endcond
Expand Down Expand Up @@ -10161,6 +10163,207 @@ urEnqueueNativeCommandExp(
///< not NULL, phEvent must not refer to an element of the phEventWaitList array.
);

#if !defined(__GNUC__)
#pragma endregion
#endif
// Intel 'oneAPI' Unified Runtime Experimental API for mapping tensor objects
#if !defined(__GNUC__)
#pragma region tensor_map_(experimental)
#endif
///////////////////////////////////////////////////////////////////////////////
/// @brief Handle of tensor map object
typedef struct ur_exp_tensor_map_handle_t_ *ur_exp_tensor_map_handle_t;

///////////////////////////////////////////////////////////////////////////////
/// @brief Tensor map data type
typedef uint32_t ur_exp_tensor_map_data_type_flags_t;
typedef enum ur_exp_tensor_map_data_type_flag_t {
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT8 = UR_BIT(0), ///< 1 byte
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT16 = UR_BIT(1), ///< 2 bytes
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT32 = UR_BIT(2), ///< 4 bytes
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT32 = UR_BIT(3), ///< 4 bytes
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT64 = UR_BIT(4), ///< 8 bytes
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT64 = UR_BIT(5), ///< 8 bytes
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT16 = UR_BIT(6), ///< 2 bytes
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32 = UR_BIT(7), ///< 4 bytes
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT64 = UR_BIT(8), ///< 8 bytes
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_BFLOAT16 = UR_BIT(9), ///< 2 bytes
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32_FTZ = UR_BIT(10), ///< 4 bytes
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32 = UR_BIT(11), ///< 4 bytes
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32_FTZ = UR_BIT(12), ///< 4 bytes
/// @cond
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FORCE_UINT32 = 0x7fffffff
/// @endcond

} ur_exp_tensor_map_data_type_flag_t;
/// @brief Bit Mask for validating ur_exp_tensor_map_data_type_flags_t
#define UR_EXP_TENSOR_MAP_DATA_TYPE_FLAGS_MASK 0xffffe000

///////////////////////////////////////////////////////////////////////////////
/// @brief Tensor map interleave
typedef uint32_t ur_exp_tensor_map_interleave_flags_t;
typedef enum ur_exp_tensor_map_interleave_flag_t {
UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_NONE = UR_BIT(0), ///< No interleave
UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_16B = UR_BIT(1), ///< 16B interleave
UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_32B = UR_BIT(2), ///< 32B interleave
/// @cond
UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_FORCE_UINT32 = 0x7fffffff
/// @endcond

} ur_exp_tensor_map_interleave_flag_t;
/// @brief Bit Mask for validating ur_exp_tensor_map_interleave_flags_t
#define UR_EXP_TENSOR_MAP_INTERLEAVE_FLAGS_MASK 0xfffffff8

///////////////////////////////////////////////////////////////////////////////
/// @brief Tensor map l2 promotion
typedef uint32_t ur_exp_tensor_map_l2_promotion_flags_t;
typedef enum ur_exp_tensor_map_l2_promotion_flag_t {
UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_NONE = UR_BIT(0), ///< No promotion type
UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_64B = UR_BIT(1), ///< 64B promotion type
UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_128B = UR_BIT(2), ///< 128B promotion type
UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_256B = UR_BIT(3), ///< 256B promotion type
/// @cond
UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_FORCE_UINT32 = 0x7fffffff
/// @endcond

} ur_exp_tensor_map_l2_promotion_flag_t;
/// @brief Bit Mask for validating ur_exp_tensor_map_l2_promotion_flags_t
#define UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAGS_MASK 0xfffffff0

///////////////////////////////////////////////////////////////////////////////
/// @brief Tensor map swizzle
typedef uint32_t ur_exp_tensor_map_swizzle_flags_t;
typedef enum ur_exp_tensor_map_swizzle_flag_t {
UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_NONE = UR_BIT(0), ///< No swizzle
UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_32B = UR_BIT(1), ///< 32B swizzle
UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_64B = UR_BIT(2), ///< 64B swizzle
UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_128B = UR_BIT(3), ///< 128B swizzle
/// @cond
UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_FORCE_UINT32 = 0x7fffffff
/// @endcond

} ur_exp_tensor_map_swizzle_flag_t;
/// @brief Bit Mask for validating ur_exp_tensor_map_swizzle_flags_t
#define UR_EXP_TENSOR_MAP_SWIZZLE_FLAGS_MASK 0xfffffff0

///////////////////////////////////////////////////////////////////////////////
/// @brief Tensor map OOB fill
typedef uint32_t ur_exp_tensor_map_oob_fill_flags_t;
typedef enum ur_exp_tensor_map_oob_fill_flag_t {
UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_NONE = UR_BIT(0), ///< No OOB fill
UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_REQUEST_ZERO_FMA = UR_BIT(1), ///< Refer to NVIDIA docs
/// @cond
UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_FORCE_UINT32 = 0x7fffffff
/// @endcond

} ur_exp_tensor_map_oob_fill_flag_t;
/// @brief Bit Mask for validating ur_exp_tensor_map_oob_fill_flags_t
#define UR_EXP_TENSOR_MAP_OOB_FILL_FLAGS_MASK 0xfffffffc

///////////////////////////////////////////////////////////////////////////////
/// @brief Encode tensor map with image data
///
/// @details
/// - Map encode using im2col.
///
/// @returns
/// - ::UR_RESULT_SUCCESS
/// - ::UR_RESULT_ERROR_UNINITIALIZED
/// - ::UR_RESULT_ERROR_DEVICE_LOST
/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
/// + `NULL == hDevice`
/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION
/// + `::UR_EXP_TENSOR_MAP_DATA_TYPE_FLAGS_MASK & TensorMapType`
/// + `::UR_EXP_TENSOR_MAP_INTERLEAVE_FLAGS_MASK & Interleave`
/// + `::UR_EXP_TENSOR_MAP_SWIZZLE_FLAGS_MASK & Swizzle`
/// + `::UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAGS_MASK & L2Promotion`
/// + `::UR_EXP_TENSOR_MAP_OOB_FILL_FLAGS_MASK & OobFill`
/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
/// + `NULL == GlobalAddress`
/// + `NULL == GlobalDim`
/// + `NULL == GlobalStrides`
/// + `NULL == PixelBoxLowerCorner`
/// + `NULL == PixelBoxUpperCorner`
/// + `NULL == ElementStrides`
/// + `NULL == hTensorMap`
/// - ::UR_RESULT_ERROR_INVALID_ARGUMENT
/// + `TensorRank < 3`
UR_APIEXPORT ur_result_t UR_APICALL
urTensorMapEncodeIm2ColExp(
ur_device_handle_t hDevice, ///< [in] Handle of the device object.
ur_exp_tensor_map_data_type_flags_t TensorMapType, ///< [in] Data type of the tensor object.
uint32_t TensorRank, ///< [in] Dimensionality of tensor; must be at least 3.
void *GlobalAddress, ///< [in] Starting address of memory region described by tensor.
const uint64_t *GlobalDim, ///< [in] Array containing tensor size (number of elements) along each of
///< the TensorRank dimensions.
const uint64_t *GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the
///< TensorRank - 1 dimensions.
const int *PixelBoxLowerCorner, ///< [in] Array containing DHW dimensions of lower box corner.
const int *PixelBoxUpperCorner, ///< [in] Array containing DHW dimensions of upper box corner.
uint32_t ChannelsPerPixel, ///< [in] Number of channels per pixel.
uint32_t PixelsPerColumn, ///< [in] Number of pixels per column.
const uint32_t *ElementStrides, ///< [in] Array containing traversal stride in each of the TensorRank
///< dimensions.
ur_exp_tensor_map_interleave_flags_t Interleave, ///< [in] Type of interleaved layout the tensor addresses
ur_exp_tensor_map_swizzle_flags_t Swizzle, ///< [in] Bank swizzling pattern inside shared memory
ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, ///< [in] L2 promotion size.
ur_exp_tensor_map_oob_fill_flags_t OobFill, ///< [in] Indicates whether zero or special NaN constant will be used to
///< fill out-of-bounds elements.
ur_exp_tensor_map_handle_t *hTensorMap ///< [out] Handle of the tensor map object.
);

///////////////////////////////////////////////////////////////////////////////
/// @brief Encode tensor map with tiled data
///
/// @details
/// - Tiled map encode.
///
/// @returns
/// - ::UR_RESULT_SUCCESS
/// - ::UR_RESULT_ERROR_UNINITIALIZED
/// - ::UR_RESULT_ERROR_DEVICE_LOST
/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
/// + `NULL == hDevice`
/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION
/// + `::UR_EXP_TENSOR_MAP_DATA_TYPE_FLAGS_MASK & TensorMapType`
/// + `::UR_EXP_TENSOR_MAP_INTERLEAVE_FLAGS_MASK & Interleave`
/// + `::UR_EXP_TENSOR_MAP_SWIZZLE_FLAGS_MASK & Swizzle`
/// + `::UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAGS_MASK & L2Promotion`
/// + `::UR_EXP_TENSOR_MAP_OOB_FILL_FLAGS_MASK & OobFill`
/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
/// + `NULL == GlobalAddress`
/// + `NULL == GlobalDim`
/// + `NULL == GlobalStrides`
/// + `NULL == BoxDim`
/// + `NULL == ElementStrides`
/// + `NULL == hTensorMap`
/// - ::UR_RESULT_ERROR_INVALID_ARGUMENT
/// + `TensorRank < 3`
UR_APIEXPORT ur_result_t UR_APICALL
urTensorMapEncodeTiledExp(
ur_device_handle_t hDevice, ///< [in] Handle of the device object.
ur_exp_tensor_map_data_type_flags_t TensorMapType, ///< [in] Data type of the tensor object.
uint32_t TensorRank, ///< [in] Dimensionality of tensor; must be at least 3.
void *GlobalAddress, ///< [in] Starting address of memory region described by tensor.
const uint64_t *GlobalDim, ///< [in] Array containing tensor size (number of elements) along each of
///< the TensorRank dimensions.
const uint64_t *GlobalStrides, ///< [in] Array containing stride size (in bytes) along each of the
///< TensorRank - 1 dimensions.
const uint32_t *BoxDim, ///< [in] Array containing traversal box size (number of elments) along
///< each of the TensorRank dimensions. Specifies how many elements to be
///< traversed along each tensor dimension.
const uint32_t *ElementStrides, ///< [in] Array containing traversal stride in each of the TensorRank
///< dimensions.
ur_exp_tensor_map_interleave_flags_t Interleave, ///< [in] Type of interleaved layout the tensor addresses
ur_exp_tensor_map_swizzle_flags_t Swizzle, ///< [in] Bank swizzling pattern inside shared memory
ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, ///< [in] L2 promotion size.
ur_exp_tensor_map_oob_fill_flags_t OobFill, ///< [in] Indicates whether zero or special NaN constant will be used to
///< fill out-of-bounds elements.
ur_exp_tensor_map_handle_t *hTensorMap ///< [out] Handle of the tensor map object.
);

#if !defined(__GNUC__)
#pragma endregion
#endif
Expand Down Expand Up @@ -12333,6 +12536,49 @@ typedef struct ur_command_buffer_command_get_info_exp_params_t {
size_t **ppPropSizeRet;
} ur_command_buffer_command_get_info_exp_params_t;

///////////////////////////////////////////////////////////////////////////////
/// @brief Function parameters for urTensorMapEncodeIm2ColExp
/// @details Each entry is a pointer to the parameter passed to the function;
/// allowing the callback the ability to modify the parameter's value
typedef struct ur_tensor_map_encode_im_2_col_exp_params_t {
ur_device_handle_t *phDevice;
ur_exp_tensor_map_data_type_flags_t *pTensorMapType;
uint32_t *pTensorRank;
void **pGlobalAddress;
const uint64_t **pGlobalDim;
const uint64_t **pGlobalStrides;
const int **pPixelBoxLowerCorner;
const int **pPixelBoxUpperCorner;
uint32_t *pChannelsPerPixel;
uint32_t *pPixelsPerColumn;
const uint32_t **pElementStrides;
ur_exp_tensor_map_interleave_flags_t *pInterleave;
ur_exp_tensor_map_swizzle_flags_t *pSwizzle;
ur_exp_tensor_map_l2_promotion_flags_t *pL2Promotion;
ur_exp_tensor_map_oob_fill_flags_t *pOobFill;
ur_exp_tensor_map_handle_t **phTensorMap;
} ur_tensor_map_encode_im_2_col_exp_params_t;

///////////////////////////////////////////////////////////////////////////////
/// @brief Function parameters for urTensorMapEncodeTiledExp
/// @details Each entry is a pointer to the parameter passed to the function;
/// allowing the callback the ability to modify the parameter's value
typedef struct ur_tensor_map_encode_tiled_exp_params_t {
ur_device_handle_t *phDevice;
ur_exp_tensor_map_data_type_flags_t *pTensorMapType;
uint32_t *pTensorRank;
void **pGlobalAddress;
const uint64_t **pGlobalDim;
const uint64_t **pGlobalStrides;
const uint32_t **pBoxDim;
const uint32_t **pElementStrides;
ur_exp_tensor_map_interleave_flags_t *pInterleave;
ur_exp_tensor_map_swizzle_flags_t *pSwizzle;
ur_exp_tensor_map_l2_promotion_flags_t *pL2Promotion;
ur_exp_tensor_map_oob_fill_flags_t *pOobFill;
ur_exp_tensor_map_handle_t **phTensorMap;
} ur_tensor_map_encode_tiled_exp_params_t;

///////////////////////////////////////////////////////////////////////////////
/// @brief Function parameters for urUsmP2PEnablePeerAccessExp
/// @details Each entry is a pointer to the parameter passed to the function;
Expand Down
2 changes: 2 additions & 0 deletions include/ur_api_funcs.def
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,8 @@ _UR_API(urCommandBufferUpdateSignalEventExp)
_UR_API(urCommandBufferUpdateWaitEventsExp)
_UR_API(urCommandBufferGetInfoExp)
_UR_API(urCommandBufferCommandGetInfoExp)
_UR_API(urTensorMapEncodeIm2ColExp)
_UR_API(urTensorMapEncodeTiledExp)
_UR_API(urUsmP2PEnablePeerAccessExp)
_UR_API(urUsmP2PDisablePeerAccessExp)
_UR_API(urUsmP2PPeerAccessGetInfoExp)
Expand Down
66 changes: 66 additions & 0 deletions include/ur_ddi.h
Original file line number Diff line number Diff line change
Expand Up @@ -2248,6 +2248,71 @@ typedef ur_result_t(UR_APICALL *ur_pfnGetCommandBufferExpProcAddrTable_t)(
ur_api_version_t,
ur_command_buffer_exp_dditable_t *);

///////////////////////////////////////////////////////////////////////////////
/// @brief Function-pointer for urTensorMapEncodeIm2ColExp
typedef ur_result_t(UR_APICALL *ur_pfnTensorMapEncodeIm2ColExp_t)(
ur_device_handle_t,
ur_exp_tensor_map_data_type_flags_t,
uint32_t,
void *,
const uint64_t *,
const uint64_t *,
const int *,
const int *,
uint32_t,
uint32_t,
const uint32_t *,
ur_exp_tensor_map_interleave_flags_t,
ur_exp_tensor_map_swizzle_flags_t,
ur_exp_tensor_map_l2_promotion_flags_t,
ur_exp_tensor_map_oob_fill_flags_t,
ur_exp_tensor_map_handle_t *);

///////////////////////////////////////////////////////////////////////////////
/// @brief Function-pointer for urTensorMapEncodeTiledExp
typedef ur_result_t(UR_APICALL *ur_pfnTensorMapEncodeTiledExp_t)(
ur_device_handle_t,
ur_exp_tensor_map_data_type_flags_t,
uint32_t,
void *,
const uint64_t *,
const uint64_t *,
const uint32_t *,
const uint32_t *,
ur_exp_tensor_map_interleave_flags_t,
ur_exp_tensor_map_swizzle_flags_t,
ur_exp_tensor_map_l2_promotion_flags_t,
ur_exp_tensor_map_oob_fill_flags_t,
ur_exp_tensor_map_handle_t *);

///////////////////////////////////////////////////////////////////////////////
/// @brief Table of TensorMapExp functions pointers
typedef struct ur_tensor_map_exp_dditable_t {
ur_pfnTensorMapEncodeIm2ColExp_t pfnEncodeIm2ColExp;
ur_pfnTensorMapEncodeTiledExp_t pfnEncodeTiledExp;
} ur_tensor_map_exp_dditable_t;

///////////////////////////////////////////////////////////////////////////////
/// @brief Exported function for filling application's TensorMapExp table
/// with current process' addresses
///
/// @returns
/// - ::UR_RESULT_SUCCESS
/// - ::UR_RESULT_ERROR_UNINITIALIZED
/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION
UR_DLLEXPORT ur_result_t UR_APICALL
urGetTensorMapExpProcAddrTable(
ur_api_version_t version, ///< [in] API version requested
ur_tensor_map_exp_dditable_t *pDdiTable ///< [in,out] pointer to table of DDI function pointers
);

///////////////////////////////////////////////////////////////////////////////
/// @brief Function-pointer for urGetTensorMapExpProcAddrTable
typedef ur_result_t(UR_APICALL *ur_pfnGetTensorMapExpProcAddrTable_t)(
ur_api_version_t,
ur_tensor_map_exp_dditable_t *);

///////////////////////////////////////////////////////////////////////////////
/// @brief Function-pointer for urUsmP2PEnablePeerAccessExp
typedef ur_result_t(UR_APICALL *ur_pfnUsmP2PEnablePeerAccessExp_t)(
Expand Down Expand Up @@ -2515,6 +2580,7 @@ typedef struct ur_dditable_t {
ur_usm_dditable_t USM;
ur_usm_exp_dditable_t USMExp;
ur_command_buffer_exp_dditable_t CommandBufferExp;
ur_tensor_map_exp_dditable_t TensorMapExp;
ur_usm_p2p_exp_dditable_t UsmP2PExp;
ur_virtual_mem_dditable_t VirtualMem;
ur_device_dditable_t Device;
Expand Down
Loading
Loading