Skip to content

Commit 8263d48

Browse files
Submit Semaphore dependency for enqueue read/write without Kernel
Change-Id: I22e1743b4cbd6e8285527fdfe25424a6cb3ff462 Signed-off-by: Dunajski, Bartosz <[email protected]> Related-To: NEO-3020
1 parent 165d1e4 commit 8263d48

File tree

11 files changed

+154
-61
lines changed

11 files changed

+154
-61
lines changed

runtime/command_queue/command_queue.cpp

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -584,26 +584,6 @@ bool CommandQueue::bufferCpuCopyAllowed(Buffer *buffer, cl_command_type commandT
584584
buffer->isReadWriteOnCpuAllowed(blocking, numEventsInWaitList, ptr, size);
585585
}
586586

587-
cl_int CommandQueue::enqueueReadWriteBufferWithBlitTransfer(cl_command_type commandType, Buffer *buffer, bool blocking,
588-
size_t offset, size_t size, void *ptr, cl_uint numEventsInWaitList,
589-
const cl_event *eventWaitList, cl_event *event) {
590-
auto blitCommandStreamReceiver = context->getCommandStreamReceiverForBlitOperation(*buffer);
591-
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
592-
TimestampPacketContainer previousTimestampPacketNodes;
593-
CsrDependencies csrDependencies;
594-
595-
csrDependencies.fillFromEventsRequestAndMakeResident(eventsRequest, *blitCommandStreamReceiver,
596-
CsrDependencies::DependenciesType::All);
597-
598-
obtainNewTimestampPacketNodes(1, previousTimestampPacketNodes, queueDependenciesClearRequired());
599-
csrDependencies.push_back(&previousTimestampPacketNodes);
600-
601-
auto copyDirection = (CL_COMMAND_WRITE_BUFFER == commandType) ? BlitterConstants::BlitWithHostPtrDirection::FromHostPtr
602-
: BlitterConstants::BlitWithHostPtrDirection::ToHostPtr;
603-
blitCommandStreamReceiver->blitWithHostPtr(*buffer, ptr, blocking, offset, size, copyDirection, csrDependencies, *timestampPacketContainer);
604-
return CL_SUCCESS;
605-
}
606-
607587
bool CommandQueue::queueDependenciesClearRequired() const {
608588
return isOOQEnabled() || DebugManager.flags.OmitTimestampPacketDependencies.get();
609589
}

runtime/command_queue/command_queue.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -439,10 +439,6 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
439439
void *enqueueMapMemObject(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet);
440440
cl_int enqueueUnmapMemObject(TransferProperties &transferProperties, EventsRequest &eventsRequest);
441441

442-
cl_int enqueueReadWriteBufferWithBlitTransfer(cl_command_type commandType, Buffer *buffer, bool blocking,
443-
size_t offset, size_t size, void *ptr, cl_uint numEventsInWaitList,
444-
const cl_event *eventWaitList, cl_event *event);
445-
446442
virtual void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType, bool updateQueueTaskLevel){};
447443

448444
MOCKABLE_VIRTUAL void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, MemObjsForAuxTranslation &memObjsForAuxTranslation,

runtime/command_queue/command_queue_hw.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,7 @@ class CommandQueueHw : public CommandQueue {
289289
void enqueueHandler(Surface **surfacesForResidency,
290290
size_t numSurfaceForResidency,
291291
bool blocking,
292+
bool blitEnqueue,
292293
const MultiDispatchInfo &dispatchInfo,
293294
cl_uint numEventsInWaitList,
294295
const cl_event *eventWaitList,
@@ -301,7 +302,7 @@ class CommandQueueHw : public CommandQueue {
301302
cl_uint numEventsInWaitList,
302303
const cl_event *eventWaitList,
303304
cl_event *event) {
304-
enqueueHandler<enqueueType>(surfacesForResidency, size, blocking, dispatchInfo, numEventsInWaitList, eventWaitList, event);
305+
enqueueHandler<enqueueType>(surfacesForResidency, size, blocking, false, dispatchInfo, numEventsInWaitList, eventWaitList, event);
305306
}
306307

307308
template <uint32_t enqueueType, size_t size>
@@ -376,6 +377,10 @@ class CommandQueueHw : public CommandQueue {
376377
cl_int enqueueMarkerForReadWriteOperation(MemObj *memObj, void *ptr, cl_command_type commandType, cl_bool blocking, cl_uint numEventsInWaitList,
377378
const cl_event *eventWaitList, cl_event *event);
378379

380+
cl_int enqueueReadWriteBufferWithBlitTransfer(cl_command_type commandType, Buffer *buffer, bool blocking,
381+
size_t offset, size_t size, void *ptr, cl_uint numEventsInWaitList,
382+
const cl_event *eventWaitList, cl_event *event);
383+
379384
private:
380385
bool isTaskLevelUpdateRequired(const uint32_t &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType);
381386
void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType, bool updateQueueTaskLevel) override;

runtime/command_queue/command_queue_hw_base.inl

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,4 +99,34 @@ cl_int CommandQueueHw<Family>::enqueueMarkerForReadWriteOperation(MemObj *memObj
9999
return CL_SUCCESS;
100100
}
101101

102+
template <typename Family>
103+
cl_int CommandQueueHw<Family>::enqueueReadWriteBufferWithBlitTransfer(cl_command_type commandType, Buffer *buffer, bool blocking,
104+
size_t offset, size_t size, void *ptr, cl_uint numEventsInWaitList,
105+
const cl_event *eventWaitList, cl_event *event) {
106+
auto blitCommandStreamReceiver = context->getCommandStreamReceiverForBlitOperation(*buffer);
107+
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
108+
TimestampPacketContainer previousTimestampPacketNodes;
109+
CsrDependencies csrDependencies;
110+
111+
csrDependencies.fillFromEventsRequestAndMakeResident(eventsRequest, *blitCommandStreamReceiver,
112+
CsrDependencies::DependenciesType::All);
113+
114+
obtainNewTimestampPacketNodes(1, previousTimestampPacketNodes, queueDependenciesClearRequired());
115+
csrDependencies.push_back(&previousTimestampPacketNodes);
116+
117+
auto copyDirection = (CL_COMMAND_WRITE_BUFFER == commandType) ? BlitterConstants::BlitWithHostPtrDirection::FromHostPtr
118+
: BlitterConstants::BlitWithHostPtrDirection::ToHostPtr;
119+
blitCommandStreamReceiver->blitWithHostPtr(*buffer, ptr, blocking, offset, size, copyDirection, csrDependencies, *timestampPacketContainer);
120+
121+
MultiDispatchInfo multiDispatchInfo;
122+
123+
if (CL_COMMAND_WRITE_BUFFER == commandType) {
124+
enqueueHandler<CL_COMMAND_WRITE_BUFFER>(nullptr, 0, blocking, true, multiDispatchInfo, numEventsInWaitList, eventWaitList, event);
125+
} else {
126+
enqueueHandler<CL_COMMAND_READ_BUFFER>(nullptr, 0, blocking, true, multiDispatchInfo, numEventsInWaitList, eventWaitList, event);
127+
}
128+
129+
return CL_SUCCESS;
130+
}
131+
102132
} // namespace NEO

runtime/command_queue/enqueue_common.h

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -123,12 +123,13 @@ template <uint32_t commandType>
123123
void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
124124
size_t numSurfaceForResidency,
125125
bool blocking,
126+
bool blitEnqueue,
126127
const MultiDispatchInfo &multiDispatchInfo,
127128
cl_uint numEventsInWaitList,
128129
const cl_event *eventWaitList,
129130
cl_event *event) {
130-
if (multiDispatchInfo.empty() && !isCommandWithoutKernel(commandType)) {
131-
enqueueHandler<CL_COMMAND_MARKER>(surfacesForResidency, numSurfaceForResidency, blocking, multiDispatchInfo,
131+
if (multiDispatchInfo.empty() && !isCommandWithoutKernel(commandType) && !blitEnqueue) {
132+
enqueueHandler<CL_COMMAND_MARKER>(surfacesForResidency, numSurfaceForResidency, blocking, false, multiDispatchInfo,
132133
numEventsInWaitList, eventWaitList, event);
133134
if (event) {
134135
castToObjectOrAbort<Event>(*event)->setCmdType(commandType);
@@ -231,6 +232,9 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
231232
processDispatchForKernels<commandType>(multiDispatchInfo, printfHandler, eventBuilder.getEvent(),
232233
hwTimeStamps, parentKernel, blockQueue, devQueueHw, csrDeps, blockedCommandsData,
233234
previousTimestampPacketNodes, preemption);
235+
} else if (blitEnqueue) {
236+
auto currentTimestampPacketNode = timestampPacketContainer->peekNodes().at(0);
237+
TimestampPacketHelper::programSemaphoreWithImplicitDependency<GfxFamily>(commandStream, *currentTimestampPacketNode);
234238
} else if (isCacheFlushCommand(commandType)) {
235239
processDispatchForCacheFlush(surfacesForResidency, numSurfaceForResidency, &commandStream, csrDeps);
236240
} else if (getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
@@ -253,9 +257,9 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
253257
processDeviceEnqueue(parentKernel, devQueueHw, multiDispatchInfo, hwTimeStamps, preemption, blocking);
254258
}
255259

256-
auto submissionRequired = !isCommandWithoutKernel(commandType);
260+
auto kernelSubmissionRequired = !isCommandWithoutKernel(commandType) && !blitEnqueue;
257261

258-
if (submissionRequired) {
262+
if (kernelSubmissionRequired) {
259263
completionStamp = enqueueNonBlocked<commandType>(
260264
surfacesForResidency,
261265
numSurfaceForResidency,
@@ -292,7 +296,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
292296
devQueueHw->getDebugQueue());
293297
}
294298
}
295-
} else if (isCacheFlushCommand(commandType)) {
299+
} else if (isCacheFlushCommand(commandType) || blitEnqueue) {
296300
completionStamp = enqueueCommandWithoutKernel(
297301
surfacesForResidency,
298302
numSurfaceForResidency,

runtime/command_queue/gpgpu_walker_base.inl

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -320,8 +320,14 @@ size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, c
320320
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, &scheduler);
321321
}
322322
if (commandQueue.getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
323-
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredForTimestampPacketWrite();
323+
bool isReadWriteBufferOperationWithoutKernel = (CL_COMMAND_READ_BUFFER == eventType || CL_COMMAND_WRITE_BUFFER == eventType) &&
324+
multiDispatchInfo.empty();
324325
expectedSizeCS += TimestampPacketHelper::getRequiredCmdStreamSize<GfxFamily>(csrDeps);
326+
if (isReadWriteBufferOperationWithoutKernel) {
327+
expectedSizeCS += TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependency<GfxFamily>();
328+
} else {
329+
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredForTimestampPacketWrite();
330+
}
325331
}
326332
return expectedSizeCS;
327333
}

runtime/helpers/timestamp_packet.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,14 +110,19 @@ struct TimestampPacketHelper {
110110
}
111111
}
112112

113+
template <typename GfxFamily>
114+
static size_t getRequiredCmdStreamSizeForNodeDependency() {
115+
return sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT) + sizeof(typename GfxFamily::MI_ATOMIC);
116+
}
117+
113118
template <typename GfxFamily>
114119
static size_t getRequiredCmdStreamSize(const CsrDependencies &csrDependencies) {
115120
size_t totalNodesCount = 0;
116121
for (auto timestampPacketContainer : csrDependencies) {
117122
totalNodesCount += timestampPacketContainer->peekNodes().size();
118123
}
119124

120-
return totalNodesCount * (sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT) + sizeof(typename GfxFamily::MI_ATOMIC));
125+
return totalNodesCount * getRequiredCmdStreamSizeForNodeDependency<GfxFamily>();
121126
}
122127
};
123128
} // namespace NEO

unit_tests/command_queue/enqueue_command_without_kernel_tests.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ HWTEST_F(EnqueueHandlerTest, GivenCommandStreamWithoutKernelAndZeroSurfacesWhenE
4949
std::unique_ptr<MockCommandQueueWithCacheFlush<FamilyType>> mockCmdQ(new MockCommandQueueWithCacheFlush<FamilyType>(context, pDevice, 0));
5050

5151
mockCmdQ->commandRequireCacheFlush = true;
52-
mockCmdQ->template enqueueHandler<CL_COMMAND_MARKER>(nullptr, 0, false, nullptr, 0, nullptr, nullptr);
52+
mockCmdQ->template enqueueHandler<CL_COMMAND_MARKER>(nullptr, 0, false, false, nullptr, 0, nullptr, nullptr);
5353
EXPECT_EQ(mockCmdQ->getCS(0).getUsed(), 0u);
5454
}
5555
HWTEST_F(EnqueueHandlerTest, givenTimestampPacketWriteEnabledAndCommandWithCacheFlushWhenEnqueueingHandlerThenObtainNewStamp) {
@@ -63,7 +63,7 @@ HWTEST_F(EnqueueHandlerTest, givenTimestampPacketWriteEnabledAndCommandWithCache
6363

6464
cl_event event;
6565

66-
mockCmdQ->template enqueueHandler<CL_COMMAND_MARKER>(nullptr, 0, false, nullptr, 0, nullptr, &event);
66+
mockCmdQ->template enqueueHandler<CL_COMMAND_MARKER>(nullptr, 0, false, false, nullptr, 0, nullptr, &event);
6767
auto node1 = mockCmdQ->timestampPacketContainer->peekNodes().at(0);
6868
EXPECT_NE(nullptr, node1);
6969
clReleaseEvent(event);
@@ -79,7 +79,7 @@ HWTEST_F(EnqueueHandlerTest, givenTimestampPacketWriteDisabledAndCommandWithCach
7979

8080
cl_event event;
8181

82-
mockCmdQ->template enqueueHandler<CL_COMMAND_MARKER>(nullptr, 0, false, nullptr, 0, nullptr, &event);
82+
mockCmdQ->template enqueueHandler<CL_COMMAND_MARKER>(nullptr, 0, false, false, nullptr, 0, nullptr, &event);
8383
auto container = mockCmdQ->timestampPacketContainer.get();
8484
EXPECT_EQ(nullptr, container);
8585
clReleaseEvent(event);

unit_tests/command_queue/enqueue_handler_tests.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ HWTEST_F(EnqueueHandlerTest, givenEnqueueHandlerWithKernelSplitWhenAubCsrIsActiv
6767
auto mockCmdQ = std::unique_ptr<MockCommandQueueHw<FamilyType>>(new MockCommandQueueHw<FamilyType>(context, pDevice, 0));
6868
MockMultiDispatchInfo multiDispatchInfo(std::vector<Kernel *>({kernel1.mockKernel, kernel2.mockKernel}));
6969

70-
mockCmdQ->template enqueueHandler<CL_COMMAND_WRITE_BUFFER>(nullptr, 0, true, multiDispatchInfo, 0, nullptr, nullptr);
70+
mockCmdQ->template enqueueHandler<CL_COMMAND_WRITE_BUFFER>(nullptr, 0, true, false, multiDispatchInfo, 0, nullptr, nullptr);
7171

7272
EXPECT_TRUE(aubCsr->addAubCommentCalled);
7373

@@ -301,6 +301,7 @@ HWTEST_F(EnqueueHandlerTest, enqueueBlockedWithoutReturnEventCreatesVirtualEvent
301301
mockCmdQ->template enqueueHandler<CL_COMMAND_NDRANGE_KERNEL>(nullptr,
302302
0,
303303
blocking,
304+
false,
304305
multiDispatchInfo,
305306
0,
306307
nullptr,
@@ -333,6 +334,7 @@ HWTEST_F(EnqueueHandlerTest, enqueueBlockedSetsVirtualEventAsCurrentCmdQVirtualE
333334
mockCmdQ->template enqueueHandler<CL_COMMAND_NDRANGE_KERNEL>(nullptr,
334335
0,
335336
blocking,
337+
false,
336338
multiDispatchInfo,
337339
0,
338340
nullptr,
@@ -357,6 +359,7 @@ HWTEST_F(EnqueueHandlerTest, enqueueWithOutputEventRegistersEvent) {
357359
mockCmdQ->template enqueueHandler<CL_COMMAND_NDRANGE_KERNEL>(nullptr,
358360
0,
359361
blocking,
362+
false,
360363
multiDispatchInfo,
361364
0,
362365
nullptr,
@@ -433,6 +436,7 @@ HWTEST_F(EnqueueHandlerTest, givenExternallySynchronizedParentEventWhenRequestin
433436
mockCmdQ->template enqueueHandler<CL_COMMAND_MARKER>(nullptr,
434437
0,
435438
blocking,
439+
false,
436440
emptyDispatchInfo,
437441
1U,
438442
&inEv,
@@ -458,6 +462,7 @@ HWTEST_F(EnqueueHandlerTest, givenEnqueueHandlerWhenSubCaptureIsOffThenActivateS
458462
mockCmdQ->template enqueueHandler<CL_COMMAND_NDRANGE_KERNEL>(nullptr,
459463
0,
460464
false,
465+
false,
461466
multiDispatchInfo,
462467
0,
463468
nullptr,
@@ -480,6 +485,7 @@ HWTEST_F(EnqueueHandlerTest, givenEnqueueHandlerWhenSubCaptureIsOnThenActivateSu
480485
mockCmdQ->template enqueueHandler<CL_COMMAND_NDRANGE_KERNEL>(nullptr,
481486
0,
482487
false,
488+
false,
483489
multiDispatchInfo,
484490
0,
485491
nullptr,
@@ -505,6 +511,7 @@ HWTEST_F(EnqueueHandlerTestBasic, givenEnqueueHandlerWhenCommandIsBlokingThenCom
505511
mockCmdQ->template enqueueHandler<CL_COMMAND_WRITE_BUFFER>(nullptr,
506512
0,
507513
true,
514+
false,
508515
multiDispatchInfo,
509516
0,
510517
nullptr,

unit_tests/helpers/timestamp_packet_tests.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,11 @@ HWTEST_F(TimestampPacketTests, givenEventsRequestWithEventsWithoutTimestampsWhen
351351
EXPECT_EQ(expectedSize, TimestampPacketHelper::getRequiredCmdStreamSize<FamilyType>(csrDepsSize3));
352352
}
353353

354+
HWTEST_F(TimestampPacketTests, whenEstimatingSizeForNodeDependencyThenReturnCorrectValue) {
355+
size_t expectedSize = sizeof(typename FamilyType::MI_SEMAPHORE_WAIT) + sizeof(typename FamilyType::MI_ATOMIC);
356+
EXPECT_EQ(expectedSize, TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependency<FamilyType>());
357+
}
358+
354359
HWCMDTEST_F(IGFX_GEN8_CORE, TimestampPacketTests, givenTimestampPacketWhenDispatchingGpuWalkerThenAddTwoPcForLastWalker) {
355360
using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER;
356361
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;

0 commit comments

Comments
 (0)