Skip to content

Commit c767268

Browse files
author
Hugh Delaney
committed
Merge branch 'main' into tensormap-exp-api
2 parents 8a81c2a + 262bea8 commit c767268

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+1528
-724
lines changed

.github/workflows/multi_device.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,4 +63,4 @@ jobs:
6363

6464
- name: Test adapters
6565
working-directory: ${{github.workspace}}/build
66-
run: env UR_CTS_ADAPTER_PLATFORM="${{matrix.adapter.platform}}" ctest -C ${{matrix.build_type}} --output-on-failure -L "conformance" -E "enqueue|kernel|program|integration|exp_command_buffer|exp_enqueue_native|exp_launch_properties|exp_usm_p2p" --timeout 180
66+
run: env UR_CTS_ADAPTER_PLATFORM="${{matrix.adapter.platform}}" ctest -C ${{matrix.build_type}} --output-on-failure -L "conformance" -E "enqueue|kernel|integration|exp_command_buffer|exp_enqueue_native|exp_launch_properties|exp_usm_p2p" --timeout 180

include/ur_api.h

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4204,17 +4204,19 @@ urProgramCreateWithIL(
42044204
);
42054205

42064206
///////////////////////////////////////////////////////////////////////////////
4207-
/// @brief Create a program object from device native binary.
4207+
/// @brief Create a program object from native binaries for the specified
4208+
/// devices.
42084209
///
42094210
/// @details
42104211
/// - The application may call this function from simultaneous threads.
42114212
/// - Following a successful call to this entry point, `phProgram` will
4212-
/// contain a binary of type ::UR_PROGRAM_BINARY_TYPE_COMPILED_OBJECT or
4213-
/// ::UR_PROGRAM_BINARY_TYPE_LIBRARY for `hDevice`.
4214-
/// - The device specified by `hDevice` must be device associated with
4213+
/// contain binaries of type ::UR_PROGRAM_BINARY_TYPE_COMPILED_OBJECT or
4214+
/// ::UR_PROGRAM_BINARY_TYPE_LIBRARY for the specified devices in
4215+
/// `phDevices`.
4216+
/// - The devices specified by `phDevices` must be associated with the
42154217
/// context.
42164218
/// - The adapter may (but is not required to) perform validation of the
4217-
/// provided module during this call.
4219+
/// provided modules during this call.
42184220
///
42194221
/// @remarks
42204222
/// _Analogues_
@@ -4227,21 +4229,27 @@ urProgramCreateWithIL(
42274229
/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
42284230
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
42294231
/// + `NULL == hContext`
4230-
/// + `NULL == hDevice`
42314232
/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
4232-
/// + `NULL == pBinary`
4233+
/// + `NULL == phDevices`
4234+
/// + `NULL == pLengths`
4235+
/// + `NULL == ppBinaries`
42334236
/// + `NULL == phProgram`
42344237
/// + `NULL != pProperties && pProperties->count > 0 && NULL == pProperties->pMetadatas`
42354238
/// - ::UR_RESULT_ERROR_INVALID_SIZE
42364239
/// + `NULL != pProperties && NULL != pProperties->pMetadatas && pProperties->count == 0`
4240+
/// + `numDevices == 0`
42374241
/// - ::UR_RESULT_ERROR_INVALID_NATIVE_BINARY
4238-
/// + If `pBinary` isn't a valid binary for `hDevice.`
4242+
/// + If any binary in `ppBinaries` isn't a valid binary for the corresponding device in `phDevices.`
42394243
UR_APIEXPORT ur_result_t UR_APICALL
42404244
urProgramCreateWithBinary(
42414245
ur_context_handle_t hContext, ///< [in] handle of the context instance
4242-
ur_device_handle_t hDevice, ///< [in] handle to device associated with binary.
4243-
size_t size, ///< [in] size in bytes.
4244-
const uint8_t *pBinary, ///< [in] pointer to binary.
4246+
uint32_t numDevices, ///< [in] number of devices
4247+
ur_device_handle_t *phDevices, ///< [in][range(0, numDevices)] a pointer to a list of device handles. The
4248+
///< binaries are loaded for devices specified in this list.
4249+
size_t *pLengths, ///< [in][range(0, numDevices)] array of sizes of program binaries
4250+
///< specified by `pBinaries` (in bytes).
4251+
const uint8_t **ppBinaries, ///< [in][range(0, numDevices)] pointer to program binaries to be loaded
4252+
///< for devices specified by `phDevices`.
42454253
const ur_program_properties_t *pProperties, ///< [in][optional] pointer to program creation properties.
42464254
ur_program_handle_t *phProgram ///< [out] pointer to handle of Program object created.
42474255
);
@@ -10528,9 +10536,10 @@ typedef struct ur_program_create_with_il_params_t {
1052810536
/// allowing the callback the ability to modify the parameter's value
1052910537
typedef struct ur_program_create_with_binary_params_t {
1053010538
ur_context_handle_t *phContext;
10531-
ur_device_handle_t *phDevice;
10532-
size_t *psize;
10533-
const uint8_t **ppBinary;
10539+
uint32_t *pnumDevices;
10540+
ur_device_handle_t **pphDevices;
10541+
size_t **ppLengths;
10542+
const uint8_t ***pppBinaries;
1053410543
const ur_program_properties_t **ppProperties;
1053510544
ur_program_handle_t **pphProgram;
1053610545
} ur_program_create_with_binary_params_t;

include/ur_ddi.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -284,9 +284,10 @@ typedef ur_result_t(UR_APICALL *ur_pfnProgramCreateWithIL_t)(
284284
/// @brief Function-pointer for urProgramCreateWithBinary
285285
typedef ur_result_t(UR_APICALL *ur_pfnProgramCreateWithBinary_t)(
286286
ur_context_handle_t,
287-
ur_device_handle_t,
288-
size_t,
289-
const uint8_t *,
287+
uint32_t,
288+
ur_device_handle_t *,
289+
size_t *,
290+
const uint8_t **,
290291
const ur_program_properties_t *,
291292
ur_program_handle_t *);
292293

include/ur_print.hpp

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11705,21 +11705,44 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct
1170511705
*(params->phContext));
1170611706

1170711707
os << ", ";
11708-
os << ".hDevice = ";
11708+
os << ".numDevices = ";
1170911709

11710-
ur::details::printPtr(os,
11711-
*(params->phDevice));
11710+
os << *(params->pnumDevices);
1171211711

1171311712
os << ", ";
11714-
os << ".size = ";
11713+
os << ".phDevices = {";
11714+
for (size_t i = 0; *(params->pphDevices) != NULL && i < *params->pnumDevices; ++i) {
11715+
if (i != 0) {
11716+
os << ", ";
11717+
}
1171511718

11716-
os << *(params->psize);
11719+
ur::details::printPtr(os,
11720+
(*(params->pphDevices))[i]);
11721+
}
11722+
os << "}";
1171711723

1171811724
os << ", ";
11719-
os << ".pBinary = ";
11725+
os << ".pLengths = {";
11726+
for (size_t i = 0; *(params->ppLengths) != NULL && i < *params->pnumDevices; ++i) {
11727+
if (i != 0) {
11728+
os << ", ";
11729+
}
1172011730

11721-
ur::details::printPtr(os,
11722-
*(params->ppBinary));
11731+
os << (*(params->ppLengths))[i];
11732+
}
11733+
os << "}";
11734+
11735+
os << ", ";
11736+
os << ".ppBinaries = {";
11737+
for (size_t i = 0; *(params->pppBinaries) != NULL && i < *params->pnumDevices; ++i) {
11738+
if (i != 0) {
11739+
os << ", ";
11740+
}
11741+
11742+
ur::details::printPtr(os,
11743+
(*(params->pppBinaries))[i]);
11744+
}
11745+
os << "}";
1172311746

1172411747
os << ", ";
1172511748
os << ".pProperties = ";

scripts/core/program.yml

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ returns:
119119
- "`length == 0`"
120120
--- #--------------------------------------------------------------------------
121121
type: function
122-
desc: "Create a program object from device native binary."
122+
desc: "Create a program object from native binaries for the specified devices."
123123
class: $xProgram
124124
name: CreateWithBinary
125125
decl: static
@@ -128,22 +128,25 @@ analogue:
128128
- "**clCreateProgramWithBinary**"
129129
details:
130130
- "The application may call this function from simultaneous threads."
131-
- "Following a successful call to this entry point, `phProgram` will contain a binary of type $X_PROGRAM_BINARY_TYPE_COMPILED_OBJECT or $X_PROGRAM_BINARY_TYPE_LIBRARY for `hDevice`."
132-
- "The device specified by `hDevice` must be device associated with context."
133-
- "The adapter may (but is not required to) perform validation of the provided module during this call."
131+
- "Following a successful call to this entry point, `phProgram` will contain binaries of type $X_PROGRAM_BINARY_TYPE_COMPILED_OBJECT or $X_PROGRAM_BINARY_TYPE_LIBRARY for the specified devices in `phDevices`."
132+
- "The devices specified by `phDevices` must be associated with the context."
133+
- "The adapter may (but is not required to) perform validation of the provided modules during this call."
134134
params:
135135
- type: $x_context_handle_t
136136
name: hContext
137137
desc: "[in] handle of the context instance"
138-
- type: $x_device_handle_t
139-
name: hDevice
140-
desc: "[in] handle to device associated with binary."
141-
- type: size_t
142-
name: size
143-
desc: "[in] size in bytes."
144-
- type: const uint8_t*
145-
name: pBinary
146-
desc: "[in] pointer to binary."
138+
- type: uint32_t
139+
name: numDevices
140+
desc: "[in] number of devices"
141+
- type: $x_device_handle_t*
142+
name: phDevices
143+
desc: "[in][range(0, numDevices)] a pointer to a list of device handles. The binaries are loaded for devices specified in this list."
144+
- type: size_t*
145+
name: pLengths
146+
desc: "[in][range(0, numDevices)] array of sizes of program binaries specified by `pBinaries` (in bytes)."
147+
- type: const uint8_t**
148+
name: ppBinaries
149+
desc: "[in][range(0, numDevices)] pointer to program binaries to be loaded for devices specified by `phDevices`."
147150
- type: const $x_program_properties_t*
148151
name: pProperties
149152
desc: "[in][optional] pointer to program creation properties."
@@ -155,8 +158,9 @@ returns:
155158
- "`NULL != pProperties && pProperties->count > 0 && NULL == pProperties->pMetadatas`"
156159
- $X_RESULT_ERROR_INVALID_SIZE:
157160
- "`NULL != pProperties && NULL != pProperties->pMetadatas && pProperties->count == 0`"
161+
- "`numDevices == 0`"
158162
- $X_RESULT_ERROR_INVALID_NATIVE_BINARY:
159-
- "If `pBinary` isn't a valid binary for `hDevice.`"
163+
- "If any binary in `ppBinaries` isn't a valid binary for the corresponding device in `phDevices.`"
160164
--- #--------------------------------------------------------------------------
161165
type: function
162166
desc: "Produces an executable program from one program, negates need for the linking step."

source/adapters/cuda/program.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -493,12 +493,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetNativeHandle(
493493
}
494494

495495
UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary(
496-
ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size,
497-
const uint8_t *pBinary, const ur_program_properties_t *pProperties,
496+
ur_context_handle_t hContext, uint32_t numDevices,
497+
ur_device_handle_t *phDevices, size_t *pLengths, const uint8_t **ppBinaries,
498+
const ur_program_properties_t *pProperties,
498499
ur_program_handle_t *phProgram) {
500+
if (numDevices > 1)
501+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
499502

500-
UR_CHECK_ERROR(
501-
createProgram(hContext, hDevice, size, pBinary, pProperties, phProgram));
503+
UR_CHECK_ERROR(createProgram(hContext, phDevices[0], pLengths[0],
504+
ppBinaries[0], pProperties, phProgram));
502505
(*phProgram)->BinaryType = UR_PROGRAM_BINARY_TYPE_COMPILED_OBJECT;
503506

504507
return UR_RESULT_SUCCESS;

source/adapters/hip/program.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -480,9 +480,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetNativeHandle(
480480
///
481481
/// Note: Only supports one device
482482
UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary(
483-
ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size,
484-
const uint8_t *pBinary, const ur_program_properties_t *pProperties,
483+
ur_context_handle_t hContext, uint32_t numDevices,
484+
ur_device_handle_t *phDevices, size_t *pLengths, const uint8_t **ppBinaries,
485+
const ur_program_properties_t *pProperties,
485486
ur_program_handle_t *phProgram) {
487+
if (numDevices > 1)
488+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
489+
490+
auto hDevice = phDevices[0];
491+
auto pBinary = ppBinaries[0];
492+
auto size = pLengths[0];
486493
UR_ASSERT(std::find(hContext->getDevices().begin(),
487494
hContext->getDevices().end(),
488495
hDevice) != hContext->getDevices().end(),

source/adapters/level_zero/command_buffer.cpp

Lines changed: 2 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -627,32 +627,6 @@ urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t CommandBuffer) {
627627
return UR_RESULT_SUCCESS;
628628
}
629629

630-
/**
631-
* Sets the global offset for a kernel command that will be appended to the
632-
* command buffer.
633-
* @param[in] CommandBuffer The CommandBuffer where the command will be
634-
* appended.
635-
* @param[in] Kernel The handle to the kernel that will be appended.
636-
* @param[in] GlobalWorkOffset The global offset value.
637-
* @return UR_RESULT_SUCCESS or an error code on failure
638-
*/
639-
ur_result_t setKernelGlobalOffset(ur_exp_command_buffer_handle_t CommandBuffer,
640-
ur_kernel_handle_t Kernel,
641-
const size_t *GlobalWorkOffset) {
642-
643-
if (!CommandBuffer->Context->getPlatform()
644-
->ZeDriverGlobalOffsetExtensionFound) {
645-
logger::debug("No global offset extension found on this driver");
646-
return UR_RESULT_ERROR_INVALID_VALUE;
647-
}
648-
649-
ZE2UR_CALL(zeKernelSetGlobalOffsetExp,
650-
(Kernel->ZeKernel, GlobalWorkOffset[0], GlobalWorkOffset[1],
651-
GlobalWorkOffset[2]));
652-
653-
return UR_RESULT_SUCCESS;
654-
}
655-
656630
/**
657631
* Sets the kernel arguments for a kernel command that will be appended to the
658632
* command buffer.
@@ -754,7 +728,8 @@ ur_result_t urCommandBufferAppendKernelLaunchExp(
754728
Kernel->Mutex, Kernel->Program->Mutex, CommandBuffer->Mutex);
755729

756730
if (GlobalWorkOffset != NULL) {
757-
UR_CALL(setKernelGlobalOffset(CommandBuffer, Kernel, GlobalWorkOffset));
731+
UR_CALL(setKernelGlobalOffset(CommandBuffer->Context, Kernel->ZeKernel,
732+
WorkDim, GlobalWorkOffset));
758733
}
759734

760735
// If there are any pending arguments set them now.

source/adapters/level_zero/helpers/kernel_helpers.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,16 +67,17 @@ ur_result_t getSuggestedLocalWorkSize(ur_device_handle_t hDevice,
6767
}
6868

6969
ur_result_t setKernelGlobalOffset(ur_context_handle_t Context,
70-
ze_kernel_handle_t Kernel,
70+
ze_kernel_handle_t Kernel, uint32_t WorkDim,
7171
const size_t *GlobalWorkOffset) {
7272
if (!Context->getPlatform()->ZeDriverGlobalOffsetExtensionFound) {
7373
logger::debug("No global offset extension found on this driver");
7474
return UR_RESULT_ERROR_INVALID_VALUE;
7575
}
7676

77-
ZE2UR_CALL(
78-
zeKernelSetGlobalOffsetExp,
79-
(Kernel, GlobalWorkOffset[0], GlobalWorkOffset[1], GlobalWorkOffset[2]));
77+
auto OffsetX = GlobalWorkOffset[0];
78+
auto OffsetY = WorkDim > 1 ? GlobalWorkOffset[1] : 0;
79+
auto OffsetZ = WorkDim > 2 ? GlobalWorkOffset[2] : 0;
80+
ZE2UR_CALL(zeKernelSetGlobalOffsetExp, (Kernel, OffsetX, OffsetY, OffsetZ));
8081

8182
return UR_RESULT_SUCCESS;
8283
}

source/adapters/level_zero/helpers/kernel_helpers.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,12 @@ ur_result_t calculateKernelWorkDimensions(
3636
* command buffer.
3737
* @param[in] Context Context associated with the queue.
3838
* @param[in] Kernel The handle to the kernel that will be appended.
39-
* @param[in] GlobalWorkOffset The global offset value.
39+
* @param[in] WorkDim The number of work dimensions.
40+
* @param[in] GlobalWorkOffset Array of size WorkDim.
4041
* @return UR_RESULT_SUCCESS or an error code on failure
4142
*/
4243
ur_result_t setKernelGlobalOffset(ur_context_handle_t Context,
43-
ze_kernel_handle_t Kernel,
44+
ze_kernel_handle_t Kernel, uint32_t WorkDim,
4445
const size_t *GlobalWorkOffset);
4546

4647
/**

0 commit comments

Comments
 (0)