-
Notifications
You must be signed in to change notification settings - Fork 791
[UR][L0 v2] Set pointer kernel arguments only for queue's associated device #20179
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
fc827a1
4f4e6b8
3b716f1
aa04221
c8f3096
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -33,6 +33,17 @@ ur_single_device_kernel_t::ur_single_device_kernel_t(ur_device_handle_t hDevice, | |
| }; | ||
| } | ||
|
|
||
| ur_result_t ur_single_device_kernel_t::setArgValue(uint32_t argIndex, | ||
| size_t argSize, | ||
| const void *pArgValue) { | ||
| return setArgValueOnZeKernel(hKernel.get(), argIndex, argSize, pArgValue); | ||
| } | ||
|
|
||
| ur_result_t ur_single_device_kernel_t::setArgPointer(uint32_t argIndex, | ||
| const void *pArgValue) { | ||
| return setArgValue(argIndex, sizeof(void *), &pArgValue); | ||
| } | ||
|
|
||
| ur_result_t ur_single_device_kernel_t::release() { | ||
| hKernel.reset(); | ||
| return UR_RESULT_SUCCESS; | ||
|
|
@@ -184,19 +195,6 @@ ur_result_t ur_kernel_handle_t_::setArgValue( | |
| uint32_t argIndex, size_t argSize, | ||
| const ur_kernel_arg_value_properties_t * /*pProperties*/, | ||
| const void *pArgValue) { | ||
|
|
||
| // OpenCL: "the arg_value pointer can be NULL or point to a NULL value | ||
| // in which case a NULL value will be used as the value for the argument | ||
| // declared as a pointer to global or constant memory in the kernel" | ||
| // | ||
| // We don't know the type of the argument but it seems that the only time | ||
| // SYCL RT would send a pointer to NULL in 'arg_value' is when the argument | ||
| // is a NULL pointer. Treat a pointer to NULL in 'arg_value' as a NULL. | ||
| if (argSize == sizeof(void *) && pArgValue && | ||
| *(void **)(const_cast<void *>(pArgValue)) == nullptr) { | ||
| pArgValue = nullptr; | ||
| } | ||
|
|
||
| if (argIndex > zeCommonProperties->numKernelArgs - 1) { | ||
| return UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX; | ||
| } | ||
|
|
@@ -206,15 +204,8 @@ ur_result_t ur_kernel_handle_t_::setArgValue( | |
| continue; | ||
| } | ||
|
|
||
| auto zeResult = ZE_CALL_NOCHECK(zeKernelSetArgumentValue, | ||
| (singleDeviceKernel.value().hKernel.get(), | ||
| argIndex, argSize, pArgValue)); | ||
|
|
||
| if (zeResult == ZE_RESULT_ERROR_INVALID_ARGUMENT) { | ||
| return UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE; | ||
| } else if (zeResult != ZE_RESULT_SUCCESS) { | ||
| return ze2urResult(zeResult); | ||
| } | ||
| UR_CALL(setArgValueOnZeKernel(singleDeviceKernel.value().hKernel.get(), | ||
| argIndex, argSize, pArgValue)); | ||
| } | ||
| return UR_RESULT_SUCCESS; | ||
| } | ||
|
|
@@ -278,7 +269,11 @@ ur_result_t ur_kernel_handle_t_::prepareForSubmission( | |
| const size_t *pGlobalWorkOffset, uint32_t workDim, uint32_t groupSizeX, | ||
| uint32_t groupSizeY, uint32_t groupSizeZ, | ||
| ze_command_list_handle_t commandList, wait_list_view &waitListView) { | ||
| auto hZeKernel = getZeHandle(hDevice); | ||
| auto &deviceKernelOpt = deviceKernels[deviceIndex(hDevice)]; | ||
| if (!deviceKernelOpt.has_value()) | ||
| return UR_RESULT_ERROR_INVALID_KERNEL; | ||
| auto &deviceKernel = deviceKernelOpt.value(); | ||
| auto hZeKernel = deviceKernel.hKernel.get(); | ||
|
|
||
| if (pGlobalWorkOffset != NULL) { | ||
| UR_CALL( | ||
|
|
@@ -301,10 +296,17 @@ ur_result_t ur_kernel_handle_t_::prepareForSubmission( | |
| zePtr = reinterpret_cast<void *>(hImage->getZeImage()); | ||
| } | ||
| } | ||
| UR_CALL(setArgPointer(pending.argIndex, nullptr, zePtr)); | ||
| // Set the argument only on this device's kernel. | ||
| UR_CALL(deviceKernel.setArgPointer(pending.argIndex, zePtr)); | ||
| } | ||
| pending_allocations.clear(); | ||
|
|
||
| // Apply any pending raw pointer arguments (USM pointers) for this device. | ||
| for (auto &pending : pending_pointer_args) { | ||
| UR_CALL(deviceKernel.setArgPointer(pending.argIndex, pending.ptrArgValue)); | ||
| } | ||
| pending_pointer_args.clear(); | ||
|
|
||
| return UR_RESULT_SUCCESS; | ||
| } | ||
|
|
||
|
|
@@ -319,6 +321,18 @@ ur_result_t ur_kernel_handle_t_::addPendingMemoryAllocation( | |
| return UR_RESULT_SUCCESS; | ||
| } | ||
|
|
||
| ur_result_t | ||
| ur_kernel_handle_t_::addPendingPointerArgument(uint32_t argIndex, | ||
| const void *pArgValue) { | ||
| if (argIndex > zeCommonProperties->numKernelArgs - 1) { | ||
| return UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX; | ||
| } | ||
|
|
||
| pending_pointer_args.push_back({argIndex, pArgValue}); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a potential allocation in the hot path. Might be worth testing performance. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure, here is the successful perf testing for this PR: https://github.com/intel/llvm/actions/runs/18106004233 |
||
|
|
||
| return UR_RESULT_SUCCESS; | ||
| } | ||
|
|
||
| std::vector<char> ur_kernel_handle_t_::getSourceAttributes() const { | ||
| uint32_t size; | ||
| ZE2UR_CALL_THROWS(zeKernelGetSourceAttributes, | ||
|
|
@@ -412,6 +426,13 @@ ur_result_t urKernelSetArgPointer( | |
| TRACK_SCOPE_LATENCY("urKernelSetArgPointer"); | ||
|
|
||
| std::scoped_lock<ur_shared_mutex> guard(hKernel->Mutex); | ||
| // In multi-device context store the raw pointer value and defer setting the | ||
| // argument until we know the device where kernel is being submitted. | ||
| if (hKernel->getContext()->getDevices().size() > 1) { | ||
| hKernel->addPendingPointerArgument(argIndex, pArgValue); | ||
| return UR_RESULT_SUCCESS; | ||
| } | ||
|
||
|
|
||
| return hKernel->setArgPointer(argIndex, pProperties, pArgValue); | ||
| } catch (...) { | ||
| return exceptionToResult(std::current_exception()); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: I'm wondering whether
getZeHandleshould also perform this.has_value()check.