Skip to content

Commit 633ec40

Browse files
authored
Merge pull request #1412 from konradkusiak97/memsetLargePatternL0
[L0][OpenCL] Emulate Fill with copy when patternSize is not a power of 2
2 parents e822514 + 2727e8a commit 633ec40

File tree

2 files changed

+38
-14
lines changed

2 files changed

+38
-14
lines changed

source/adapters/level_zero/memory.cpp

Lines changed: 35 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <algorithm>
1212
#include <climits>
1313
#include <string.h>
14+
#include <ur/ur.hpp>
1415

1516
#include "context.hpp"
1617
#include "event.hpp"
@@ -183,9 +184,6 @@ static ur_result_t enqueueMemFillHelper(ur_command_t CommandType,
183184
uint32_t NumEventsInWaitList,
184185
const ur_event_handle_t *EventWaitList,
185186
ur_event_handle_t *OutEvent) {
186-
// Pattern size must be a power of two.
187-
UR_ASSERT((PatternSize > 0) && ((PatternSize & (PatternSize - 1)) == 0),
188-
UR_RESULT_ERROR_INVALID_VALUE);
189187
auto &Device = Queue->Device;
190188

191189
// Make sure that pattern size matches the capability of the copy queues.
@@ -237,18 +235,42 @@ static ur_result_t enqueueMemFillHelper(ur_command_t CommandType,
237235
const auto &ZeCommandList = CommandList->first;
238236
const auto &WaitList = (*Event)->WaitList;
239237

240-
ZE2UR_CALL(zeCommandListAppendMemoryFill,
241-
(ZeCommandList, Ptr, Pattern, PatternSize, Size, ZeEvent,
242-
WaitList.Length, WaitList.ZeEventList));
238+
// PatternSize must be a power of two for zeCommandListAppendMemoryFill.
239+
// When it's not, the fill is emulated with zeCommandListAppendMemoryCopy.
240+
if (isPowerOf2(PatternSize)) {
241+
ZE2UR_CALL(zeCommandListAppendMemoryFill,
242+
(ZeCommandList, Ptr, Pattern, PatternSize, Size, ZeEvent,
243+
WaitList.Length, WaitList.ZeEventList));
243244

244-
logger::debug("calling zeCommandListAppendMemoryFill() with"
245-
" ZeEvent {}",
246-
ur_cast<uint64_t>(ZeEvent));
247-
printZeEventList(WaitList);
245+
logger::debug("calling zeCommandListAppendMemoryFill() with"
246+
" ZeEvent {}",
247+
ur_cast<uint64_t>(ZeEvent));
248+
printZeEventList(WaitList);
248249

249-
// Execute command list asynchronously, as the event will be used
250-
// to track down its completion.
251-
UR_CALL(Queue->executeCommandList(CommandList, false, OkToBatch));
250+
// Execute command list asynchronously, as the event will be used
251+
// to track down its completion.
252+
UR_CALL(Queue->executeCommandList(CommandList, false, OkToBatch));
253+
} else {
254+
// Copy pattern into every entry in memory array pointed by Ptr.
255+
uint32_t NumOfCopySteps = Size / PatternSize;
256+
const void *Src = Pattern;
257+
258+
for (uint32_t step = 0; step < NumOfCopySteps; ++step) {
259+
void *Dst = reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(Ptr) +
260+
step * PatternSize);
261+
ZE2UR_CALL(zeCommandListAppendMemoryCopy,
262+
(ZeCommandList, Dst, Src, PatternSize, ZeEvent,
263+
WaitList.Length, WaitList.ZeEventList));
264+
}
265+
266+
logger::debug("calling zeCommandListAppendMemoryCopy() with"
267+
" ZeEvent {}",
268+
ur_cast<uint64_t>(ZeEvent));
269+
printZeEventList(WaitList);
270+
271+
// Execute command list synchronously.
272+
UR_CALL(Queue->executeCommandList(CommandList, true, OkToBatch));
273+
}
252274

253275
return UR_RESULT_SUCCESS;
254276
}

source/adapters/opencl/usm.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
//
99
//===----------------------------------------------------------------------===//
1010

11+
#include <ur/ur.hpp>
12+
1113
#include "common.hpp"
1214

1315
inline cl_mem_alloc_flags_intel
@@ -239,7 +241,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill(
239241
return mapCLErrorToUR(CLErr);
240242
}
241243

242-
if (patternSize <= 128) {
244+
if (patternSize <= 128 && isPowerOf2(patternSize)) {
243245
clEnqueueMemFillINTEL_fn EnqueueMemFill = nullptr;
244246
UR_RETURN_ON_FAILURE(
245247
cl_ext::getExtFuncFromContext<clEnqueueMemFillINTEL_fn>(

0 commit comments

Comments
 (0)