Skip to content

Commit 4faf1ee

Browse files
Flush tag update while enqueue mem fill
Signed-off-by: Lukasz Jobczyk <[email protected]>
1 parent f9fab3f commit 4faf1ee

File tree

2 files changed

+57
-29
lines changed

2 files changed

+57
-29
lines changed

opencl/source/command_queue/enqueue_common.h

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -760,35 +760,35 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
760760
auto memoryCompressionState = getGpgpuCommandStreamReceiver().getMemoryCompressionState(auxTranslationRequired, device->getHardwareInfo());
761761

762762
DispatchFlags dispatchFlags(
763-
{}, // csrDependencies
764-
&timestampPacketDependencies.barrierNodes, // barrierTimestampPacketNodes
765-
{}, // pipelineSelectArgs
766-
this->flushStamp->getStampReference(), // flushStampReference
767-
getThrottle(), // throttle
768-
ClPreemptionHelper::taskPreemptionMode(getDevice(), multiDispatchInfo), // preemptionMode
769-
numGrfRequired, // numGrfRequired
770-
L3CachingSettings::l3CacheOn, // l3CacheSettings
771-
kernel->getDescriptor().kernelAttributes.threadArbitrationPolicy, // threadArbitrationPolicy
772-
kernel->getAdditionalKernelExecInfo(), // additionalKernelExecInfo
773-
kernel->getExecutionType(), // kernelExecutionType
774-
memoryCompressionState, // memoryCompressionState
775-
getSliceCount(), // sliceCount
776-
blocking, // blocking
777-
shouldFlushDC(commandType, printfHandler) || allocNeedsFlushDC, // dcFlush
778-
multiDispatchInfo.usesSlm(), // useSLM
779-
!getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled(), // guardCommandBufferWithPipeControl
780-
commandType == CL_COMMAND_NDRANGE_KERNEL, // GSBA32BitRequired
781-
requiresCoherency, // requiresCoherency
782-
(QueuePriority::LOW == priority), // lowPriority
783-
implicitFlush, // implicitFlush
784-
!eventBuilder.getEvent() || getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), // outOfOrderExecutionAllowed
785-
false, // epilogueRequired
786-
false, // usePerDssBackedBuffer
787-
kernel->isSingleSubdevicePreferred(), // useSingleSubdevice
788-
useGlobalAtomics, // useGlobalAtomics
789-
kernel->areMultipleSubDevicesInContext(), // areMultipleSubDevicesInContext
790-
kernel->requiresMemoryMigration(), // memoryMigrationRequired
791-
isTextureCacheFlushNeeded(commandType)); // textureCacheFlush
763+
{}, // csrDependencies
764+
&timestampPacketDependencies.barrierNodes, // barrierTimestampPacketNodes
765+
{}, // pipelineSelectArgs
766+
this->flushStamp->getStampReference(), // flushStampReference
767+
getThrottle(), // throttle
768+
ClPreemptionHelper::taskPreemptionMode(getDevice(), multiDispatchInfo), // preemptionMode
769+
numGrfRequired, // numGrfRequired
770+
L3CachingSettings::l3CacheOn, // l3CacheSettings
771+
kernel->getDescriptor().kernelAttributes.threadArbitrationPolicy, // threadArbitrationPolicy
772+
kernel->getAdditionalKernelExecInfo(), // additionalKernelExecInfo
773+
kernel->getExecutionType(), // kernelExecutionType
774+
memoryCompressionState, // memoryCompressionState
775+
getSliceCount(), // sliceCount
776+
blocking, // blocking
777+
shouldFlushDC(commandType, printfHandler) || allocNeedsFlushDC, // dcFlush
778+
multiDispatchInfo.usesSlm(), // useSLM
779+
!getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled() || commandType == CL_COMMAND_FILL_BUFFER, // guardCommandBufferWithPipeControl
780+
commandType == CL_COMMAND_NDRANGE_KERNEL, // GSBA32BitRequired
781+
requiresCoherency, // requiresCoherency
782+
(QueuePriority::LOW == priority), // lowPriority
783+
implicitFlush, // implicitFlush
784+
!eventBuilder.getEvent() || getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), // outOfOrderExecutionAllowed
785+
false, // epilogueRequired
786+
false, // usePerDssBackedBuffer
787+
kernel->isSingleSubdevicePreferred(), // useSingleSubdevice
788+
useGlobalAtomics, // useGlobalAtomics
789+
kernel->areMultipleSubDevicesInContext(), // areMultipleSubDevicesInContext
790+
kernel->requiresMemoryMigration(), // memoryMigrationRequired
791+
isTextureCacheFlushNeeded(commandType)); // textureCacheFlush
792792

793793
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = mediaSamplerRequired;
794794
dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = systolicPipelineSelectMode;

opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,34 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEnableUpdateTaskFromWaitWhenN
108108
buffer->release();
109109
}
110110

111+
HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEnableUpdateTaskFromWaitWhenEnqueueFillIsMadeThenPipeControlInserted) {
112+
DebugManagerStateRestore restorer;
113+
DebugManager.flags.UpdateTaskCountFromWait.set(3u);
114+
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
115+
MockContext ctx(pClDevice);
116+
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
117+
commandStreamReceiver.timestampPacketWriteEnabled = false;
118+
CommandQueueHw<FamilyType> commandQueue(&ctx, pClDevice, 0, false);
119+
size_t tempBuffer[] = {0, 1, 2};
120+
size_t dstBuffer[] = {0};
121+
cl_int retVal = 0;
122+
123+
auto buffer = Buffer::create(&ctx, CL_MEM_USE_HOST_PTR, sizeof(tempBuffer), tempBuffer, retVal);
124+
125+
commandQueue.enqueueFillBuffer(buffer, dstBuffer, 1, 0, sizeof(dstBuffer), 0, nullptr, nullptr);
126+
127+
auto &commandStreamTask = *commandStreamReceiver.lastFlushedCommandStream;
128+
129+
cmdList.clear();
130+
// Parse command list
131+
parseCommands<FamilyType>(commandStreamTask, 0);
132+
133+
auto itorPC = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
134+
EXPECT_NE(cmdList.end(), itorPC);
135+
136+
buffer->release();
137+
}
138+
111139
HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenTaskCsPassedAsCommandStreamParamWhenFlushingTaskThenCompletionStampIsCorrect) {
112140
CommandQueueHw<FamilyType> commandQueue(nullptr, pClDevice, 0, false);
113141
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();

0 commit comments

Comments
 (0)