@@ -224,17 +224,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchCooperativeKernel(
224224 return ret;
225225 }
226226
227- ret = appendLaunchKernelWithParams (hKernel, pLaunchFuncArgs,
228- hSignalEvent, false , false , true );
229- if (ret) {
230- return ret;
231- }
232-
233- if (hSignalEvent) {
234- programEventL3Flush (hSignalEvent, this ->device , this ->partitionCount , commandContainer);
235- }
236-
237- return ret;
227+ return appendLaunchKernelWithParams (hKernel, pLaunchFuncArgs,
228+ hSignalEvent, false , false , true );
238229}
239230
240231template <GFXCORE_FAMILY gfxCoreFamily>
@@ -251,12 +242,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_
251242 appendEventForProfiling (hEvent, true , false );
252243 ret = appendLaunchKernelWithParams (hKernel, pDispatchArgumentsBuffer,
253244 nullptr , true , false , false );
254- if (ret) {
255- return ret;
256- }
257- if (hEvent) {
258- programEventL3Flush (hEvent, this ->device , this ->partitionCount , commandContainer);
259- }
260245 appendSignalEventPostWalker (hEvent, false );
261246
262247 return ret;
@@ -291,9 +276,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
291276 return ret;
292277 }
293278 }
294- if (hEvent) {
295- programEventL3Flush (hEvent, this ->device , this ->partitionCount , commandContainer);
296- }
279+
297280 appendSignalEventPostWalker (hEvent, false );
298281
299282 return ret;
@@ -817,6 +800,22 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemAdvise(ze_device_hand
817800 return ZE_RESULT_ERROR_INVALID_ARGUMENT;
818801}
819802
803+ template <GFXCORE_FAMILY gfxCoreFamily>
804+ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(ze_kernel_handle_t hKernel,
805+ const ze_group_count_t *pThreadGroupDimensions,
806+ ze_event_handle_t hEvent) {
807+ return appendLaunchKernelWithParams (hKernel, pThreadGroupDimensions, nullptr , false , false , false );
808+ }
809+
810+ template <GFXCORE_FAMILY gfxCoreFamily>
811+ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(ze_event_handle_t hEvent, bool beforeWalker) {
812+ if (beforeWalker) {
813+ appendEventForProfiling (hEvent, true , false );
814+ } else {
815+ appendSignalEventPostWalker (hEvent, false );
816+ }
817+ }
818+
820819template <GFXCORE_FAMILY gfxCoreFamily>
821820ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernelWithGA(void *dstPtr,
822821 NEO::GraphicsAllocation *dstPtrAlloc,
@@ -1070,7 +1069,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
10701069 }
10711070
10721071 appendEventForProfilingAllWalkers (hSignalEvent, true );
1073- adjustEventKernelCount (hSignalEvent);
10741072
10751073 if (ret == ZE_RESULT_SUCCESS && leftSize) {
10761074 Builtin func = Builtin::CopyBufferToBufferSide;
@@ -1130,22 +1128,16 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
11301128 isStateless);
11311129 }
11321130
1133- if (hSignalEvent) {
1134- programEventL3Flush (hSignalEvent, this ->device , this ->partitionCount , commandContainer);
1135- }
11361131 appendEventForProfilingAllWalkers (hSignalEvent, false );
11371132
11381133 const auto &hwInfo = this ->device ->getHwInfo ();
11391134 if (NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable (true , hwInfo)) {
11401135 auto event = Event::fromHandle (hSignalEvent);
11411136 if (event) {
11421137 dstAllocationStruct.needsFlush &= !event->signalScope ;
1143- dstAllocationStruct.needsFlush &= !event->l3FlushWaApplied ;
11441138 }
11451139
1146- dstAllocationStruct.needsFlush &= !isCopyOnly ();
1147-
1148- if (dstAllocationStruct.needsFlush ) {
1140+ if (dstAllocationStruct.needsFlush && !isCopyOnly ()) {
11491141 NEO::PipeControlArgs args;
11501142 args.dcFlushEnable = true ;
11511143 NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl (*commandContainer.getCommandStream (), args);
@@ -1460,7 +1452,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
14601452 builtinFunction->setArgumentValue (2 , sizeof (value), &value);
14611453
14621454 appendEventForProfilingAllWalkers (hSignalEvent, true );
1463- adjustEventKernelCount (hSignalEvent);
14641455
14651456 uint32_t groups = static_cast <uint32_t >(size) / groupSizeX;
14661457 ze_group_count_t dispatchFuncArgs{groups, 1u , 1u };
@@ -1535,7 +1526,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
15351526 builtinFunction->setArgumentValue (3 , sizeof (patternSizeInEls), &patternSizeInEls);
15361527
15371528 appendEventForProfilingAllWalkers (hSignalEvent, true );
1538- adjustEventKernelCount (hSignalEvent);
15391529
15401530 ze_group_count_t dispatchFuncArgs{groups, 1u , 1u };
15411531 res = appendLaunchKernelSplit (builtinFunction->toHandle (), &dispatchFuncArgs, hSignalEvent);
@@ -1574,21 +1564,15 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
15741564 }
15751565 }
15761566
1577- if (hSignalEvent) {
1578- programEventL3Flush (hSignalEvent, this ->device , this ->partitionCount , commandContainer);
1579- }
15801567 appendEventForProfilingAllWalkers (hSignalEvent, false );
15811568
15821569 const auto &hwInfo = this ->device ->getHwInfo ();
15831570 if (NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable (true , hwInfo)) {
15841571 auto event = Event::fromHandle (hSignalEvent);
15851572 if (event) {
15861573 hostPointerNeedsFlush &= !event->signalScope ;
1587- hostPointerNeedsFlush &= !event->l3FlushWaApplied ;
15881574 }
15891575
1590- hostPointerNeedsFlush &= !isCopyOnly ();
1591-
15921576 if (hostPointerNeedsFlush) {
15931577 NEO::PipeControlArgs args;
15941578 args.dcFlushEnable = true ;
0 commit comments