@@ -255,9 +255,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill(
255255 return UR_RESULT_SUCCESS;
256256 }
257257
258- // OpenCL only supports pattern sizes as large as the largest CL type
259- // (double16/long16 - 128 bytes), anything larger we need to do on the host
260- // side and copy it into the target allocation.
258+ // OpenCL only supports pattern sizes which are powers of 2 and are as large
259+ // as the largest CL type (double16/long16 - 128 bytes), anything larger or
260+ // not a power of 2, we need to do on the host side and copy it into the
261+ // target allocation.
261262 clHostMemAllocINTEL_fn HostMemAlloc = nullptr ;
262263 UR_RETURN_ON_FAILURE (cl_ext::getExtFuncFromContext<clHostMemAllocINTEL_fn>(
263264 CLContext, cl_ext::ExtFuncPtrCache->clHostMemAllocINTELCache ,
@@ -274,14 +275,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill(
274275 cl_ext::MemBlockingFreeName, &USMFree));
275276
276277 cl_int ClErr = CL_SUCCESS;
277- auto HostBuffer = static_cast < uint64_t *>(
278- HostMemAlloc (CLContext, nullptr , size, 0 , &ClErr));
278+ auto HostBuffer =
279+ static_cast < uint8_t *>( HostMemAlloc (CLContext, nullptr , size, 0 , &ClErr));
279280 CL_RETURN_ON_FAILURE (ClErr);
280281
281- auto NumValues = size / sizeof (uint64_t );
282- auto NumChunks = patternSize / sizeof (uint64_t );
283- for (size_t i = 0 ; i < NumValues; i++) {
284- HostBuffer[i] = static_cast <const uint64_t *>(pPattern)[i % NumChunks];
282+ auto *End = HostBuffer + size;
283+ for (auto *Iter = HostBuffer; Iter < End; Iter += patternSize) {
284+ std::memcpy (Iter, pPattern, patternSize);
285285 }
286286
287287 cl_event CopyEvent = nullptr ;
0 commit comments