@@ -31,8 +31,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSize(
3131 ze_kernel_handle_t ZeKernel{};
3232 UR_CALL (getZeKernel (Legacy (hQueue)->Device ->ZeDevice , hKernel, &ZeKernel));
3333
34- UR_CALL (getSuggestedLocalWorkSize (Legacy (hQueue), ZeKernel, GlobalWorkSize3D ,
35- LocalWorkSize));
34+ UR_CALL (getSuggestedLocalWorkSize (Legacy (hQueue)-> Device , ZeKernel,
35+ GlobalWorkSize3D, LocalWorkSize));
3636
3737 std::copy (LocalWorkSize, LocalWorkSize + workDim, pSuggestedLocalWorkSize);
3838 return UR_RESULT_SUCCESS;
@@ -54,52 +54,6 @@ ur_result_t getZeKernel(ze_device_handle_t hDevice, ur_kernel_handle_t hKernel,
5454 return UR_RESULT_SUCCESS;
5555}
5656
57- ur_result_t getSuggestedLocalWorkSize (ur_queue_handle_legacy_t hQueue,
58- ze_kernel_handle_t hZeKernel,
59- size_t GlobalWorkSize3D[3 ],
60- uint32_t SuggestedLocalWorkSize3D[3 ]) {
61- uint32_t *WG = SuggestedLocalWorkSize3D;
62-
63- // We can't call to zeKernelSuggestGroupSize if 64-bit GlobalWorkSize
64- // values do not fit to 32-bit that the API only supports currently.
65- bool SuggestGroupSize = true ;
66- for (int I : {0 , 1 , 2 }) {
67- if (GlobalWorkSize3D[I] > UINT32_MAX) {
68- SuggestGroupSize = false ;
69- }
70- }
71- if (SuggestGroupSize) {
72- ZE2UR_CALL (zeKernelSuggestGroupSize,
73- (hZeKernel, GlobalWorkSize3D[0 ], GlobalWorkSize3D[1 ],
74- GlobalWorkSize3D[2 ], &WG[0 ], &WG[1 ], &WG[2 ]));
75- } else {
76- for (int I : {0 , 1 , 2 }) {
77- // Try to find a I-dimension WG size that the GlobalWorkSize[I] is
78- // fully divisable with. Start with the max possible size in
79- // each dimension.
80- uint32_t GroupSize[] = {
81- hQueue->Device ->ZeDeviceComputeProperties ->maxGroupSizeX ,
82- hQueue->Device ->ZeDeviceComputeProperties ->maxGroupSizeY ,
83- hQueue->Device ->ZeDeviceComputeProperties ->maxGroupSizeZ };
84- GroupSize[I] = (std::min)(size_t (GroupSize[I]), GlobalWorkSize3D[I]);
85- while (GlobalWorkSize3D[I] % GroupSize[I]) {
86- --GroupSize[I];
87- }
88- if (GlobalWorkSize3D[I] / GroupSize[I] > UINT32_MAX) {
89- logger::error (" getSuggestedLocalWorkSize: can't find a WG size "
90- " suitable for global work size > UINT32_MAX" );
91- return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
92- }
93- WG[I] = GroupSize[I];
94- }
95- logger::debug (
96- " getSuggestedLocalWorkSize: using computed WG size = {{{}, {}, {}}}" ,
97- WG[0 ], WG[1 ], WG[2 ]);
98- }
99-
100- return UR_RESULT_SUCCESS;
101- }
102-
10357ur_result_t ur_queue_handle_legacy_t_::enqueueKernelLaunch (
10458 ur_kernel_handle_t Kernel, // /< [in] handle of the kernel object
10559 uint32_t WorkDim, // /< [in] number of dimensions, from 1 to 3, to specify
0 commit comments