@@ -650,14 +650,28 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueGenericFillUnlocked(
650650 waitListView.clear ();
651651 }));
652652
653- // TODO: support non-power-of-two pattern sizes
654-
655653 // PatternSize must be a power of two for zeCommandListAppendMemoryFill.
656654 // When it's not, the fill is emulated with zeCommandListAppendMemoryCopy.
657- ZE2UR_CALL (zeCommandListAppendMemoryFill,
658- (commandListManager.getZeCommandList (), pDst, pPattern,
659- patternSize, size, zeSignalEvent, waitListView.num ,
660- waitListView.handles ));
655+ if (isPowerOf2 (patternSize)) {
656+ ZE2UR_CALL (zeCommandListAppendMemoryFill,
657+ (commandListManager.getZeCommandList (), pDst, pPattern,
658+ patternSize, size, zeSignalEvent, waitListView.num ,
659+ waitListView.handles ));
660+ } else {
661+ // Copy pattern into every entry in memory array pointed by Ptr.
662+ uint32_t numOfCopySteps = size / patternSize;
663+ const void *src = pPattern;
664+
665+ for (uint32_t step = 0 ; step < numOfCopySteps; ++step) {
666+ void *dst = reinterpret_cast <void *>(reinterpret_cast <uint8_t *>(pDst) +
667+ step * patternSize);
668+ ZE2UR_CALL (zeCommandListAppendMemoryCopy,
669+ (commandListManager.getZeCommandList (), dst, src, patternSize,
670+ step == numOfCopySteps - 1 ? zeSignalEvent : nullptr ,
671+ waitListView.num , waitListView.handles ));
672+ waitListView.clear ();
673+ }
674+ }
661675
662676 return UR_RESULT_SUCCESS;
663677}
0 commit comments