Skip to content

Commit b498fea

Browse files
Removed unused atomic latestTaskCountWaited
Related-To: NEO-3869 Change-Id: If7e9db2645c9c9c38d65a0b200960e387441c9ca Signed-off-by: Konstanty Misiak <[email protected]>
1 parent bd9cd46 commit b498fea

File tree

11 files changed

+148
-105
lines changed

11 files changed

+148
-105
lines changed

runtime/command_queue/command_queue.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -148,9 +148,7 @@ void CommandQueue::waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushS
148148

149149
getGpgpuCommandStreamReceiver().waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait,
150150
useQuickKmdSleep, forcePowerSavingMode);
151-
152151
DEBUG_BREAK_IF(getHwTag() < taskCountToWait);
153-
latestTaskCountWaited = taskCountToWait;
154152

155153
if (auto bcsCsr = getBcsCommandStreamReceiver()) {
156154
bcsCsr->waitForTaskCountWithKmdNotifyFallback(bcsTaskCount, 0, false, false);

runtime/command_queue/command_queue.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -415,8 +415,6 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
415415

416416
std::unique_ptr<FlushStampTracker> flushStamp;
417417

418-
std::atomic<uint32_t> latestTaskCountWaited{std::numeric_limits<uint32_t>::max()};
419-
420418
// virtual event that holds last Enqueue information
421419
Event *virtualEvent = nullptr;
422420

unit_tests/command_queue/command_queue_hw_tests.cpp

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -181,44 +181,44 @@ HWTEST_F(CommandQueueHwTest, WhenAddMapUnmapToWaitlistEventsThenDependenciesAreN
181181

182182
HWTEST_F(CommandQueueHwTest, givenMapCommandWhenZeroStateCommandIsSubmittedThenTaskCountIsNotBeingWaited) {
183183
auto buffer = new MockBuffer;
184-
CommandQueueHw<FamilyType> *pHwQ = reinterpret_cast<CommandQueueHw<FamilyType> *>(pCmdQ);
184+
MockCommandQueueHw<FamilyType> mockCmdQueueHw(context, pClDevice, nullptr);
185185

186186
MockEventBuilder eventBuilder;
187187
MemObjSizeArray size = {{1, 1, 1}};
188188
MemObjOffsetArray offset = {{0, 0, 0}};
189-
pHwQ->enqueueBlockedMapUnmapOperation(nullptr,
190-
0,
191-
MAP,
192-
buffer,
193-
size, offset, false,
194-
eventBuilder);
189+
mockCmdQueueHw.enqueueBlockedMapUnmapOperation(nullptr,
190+
0,
191+
MAP,
192+
buffer,
193+
size, offset, false,
194+
eventBuilder);
195195

196-
EXPECT_NE(nullptr, pHwQ->virtualEvent);
197-
pHwQ->virtualEvent->setStatus(CL_COMPLETE);
196+
EXPECT_NE(nullptr, mockCmdQueueHw.virtualEvent);
197+
mockCmdQueueHw.virtualEvent->setStatus(CL_COMPLETE);
198+
EXPECT_EQ(std::numeric_limits<uint32_t>::max(), mockCmdQueueHw.latestTaskCountWaited);
198199

199-
EXPECT_EQ(std::numeric_limits<uint32_t>::max(), pHwQ->latestTaskCountWaited);
200200
buffer->decRefInternal();
201201
}
202202

203203
HWTEST_F(CommandQueueHwTest, givenMapCommandWhenZeroStateCommandIsSubmittedOnNonZeroCopyBufferThenTaskCountIsBeingWaited) {
204204
auto buffer = new MockBuffer;
205205
buffer->isZeroCopy = false;
206-
CommandQueueHw<FamilyType> *pHwQ = reinterpret_cast<CommandQueueHw<FamilyType> *>(pCmdQ);
206+
MockCommandQueueHw<FamilyType> mockCmdQueueHw(context, pClDevice, nullptr);
207207

208208
MockEventBuilder eventBuilder;
209209
MemObjSizeArray size = {{1, 1, 1}};
210210
MemObjOffsetArray offset = {{0, 0, 0}};
211-
pHwQ->enqueueBlockedMapUnmapOperation(nullptr,
212-
0,
213-
MAP,
214-
buffer,
215-
size, offset, false,
216-
eventBuilder);
211+
mockCmdQueueHw.enqueueBlockedMapUnmapOperation(nullptr,
212+
0,
213+
MAP,
214+
buffer,
215+
size, offset, false,
216+
eventBuilder);
217217

218-
EXPECT_NE(nullptr, pHwQ->virtualEvent);
219-
pHwQ->virtualEvent->setStatus(CL_COMPLETE);
218+
EXPECT_NE(nullptr, mockCmdQueueHw.virtualEvent);
219+
mockCmdQueueHw.virtualEvent->setStatus(CL_COMPLETE);
220+
EXPECT_EQ(1u, mockCmdQueueHw.latestTaskCountWaited);
220221

221-
EXPECT_EQ(1u, pHwQ->latestTaskCountWaited);
222222
buffer->decRefInternal();
223223
}
224224

unit_tests/command_queue/enqueue_barrier_tests.cpp

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,19 @@
11
/*
2-
* Copyright (C) 2017-2019 Intel Corporation
2+
* Copyright (C) 2017-2020 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
66
*/
77

8+
#include "runtime/command_queue/command_queue_hw.h"
89
#include "runtime/command_queue/gpgpu_walker.h"
910
#include "runtime/command_stream/command_stream_receiver.h"
1011
#include "runtime/event/user_event.h"
1112
#include "runtime/helpers/hardware_commands_helper.h"
1213
#include "test.h"
1314
#include "unit_tests/command_queue/command_enqueue_fixture.h"
1415
#include "unit_tests/gen_common/gen_cmd_parse.h"
16+
#include "unit_tests/mocks/mock_command_queue.h"
1517

1618
using namespace NEO;
1719

@@ -187,10 +189,12 @@ HWTEST_F(BarrierTest, WhenEnqueingBarrierWithWaitListThenDependenciesShouldSync)
187189
delete pEvent;
188190
}
189191
HWTEST_F(BarrierTest, givenNotBlockedCommandQueueAndEnqueueBarrierWithWaitlistReturningEventWhenCallIsMadeThenDontWaitUntilEventIsSignaled) {
192+
MockCommandQueueHw<FamilyType> mockCmdQueue(context, pClDevice, nullptr);
193+
190194
// In N:1, event.level <= pCmdQ.level
191-
Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 5, 15);
192-
Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 6, 16);
193-
Event event3(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 1, 17);
195+
Event event1(&mockCmdQueue, CL_COMMAND_NDRANGE_KERNEL, 5, 15);
196+
Event event2(&mockCmdQueue, CL_COMMAND_NDRANGE_KERNEL, 6, 16);
197+
Event event3(&mockCmdQueue, CL_COMMAND_NDRANGE_KERNEL, 1, 17);
194198
cl_event eventWaitList[] =
195199
{
196200
&event1,
@@ -199,17 +203,18 @@ HWTEST_F(BarrierTest, givenNotBlockedCommandQueueAndEnqueueBarrierWithWaitlistRe
199203
cl_uint numEventsInWaitList = sizeof(eventWaitList) / sizeof(eventWaitList[0]);
200204
cl_event event = nullptr;
201205

202-
auto latestTaskCountWaitedBeforeEnqueue = this->pCmdQ->latestTaskCountWaited.load();
203-
auto retVal = pCmdQ->enqueueBarrierWithWaitList(
206+
auto latestTaskCountWaitedBeforeEnqueue = mockCmdQueue.latestTaskCountWaited.load();
207+
auto retVal = mockCmdQueue.enqueueBarrierWithWaitList(
204208
numEventsInWaitList,
205209
eventWaitList,
206210
&event);
207211

208-
auto &csr = pCmdQ->getGpgpuCommandStreamReceiver();
212+
auto &csr = mockCmdQueue.getGpgpuCommandStreamReceiver();
209213

210214
EXPECT_EQ(CL_SUCCESS, retVal);
211-
EXPECT_EQ(latestTaskCountWaitedBeforeEnqueue, this->pCmdQ->latestTaskCountWaited);
215+
EXPECT_EQ(latestTaskCountWaitedBeforeEnqueue, mockCmdQueue.latestTaskCountWaited);
212216
auto pEvent = castToObject<Event>(event);
217+
EXPECT_NE(nullptr, pEvent);
213218

214219
if (csr.peekTimestampPacketWriteEnabled()) {
215220
EXPECT_EQ(csr.peekTaskCount(), pEvent->peekTaskCount());

unit_tests/command_queue/enqueue_kernel_2_tests.cpp

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -555,15 +555,42 @@ HWTEST_P(EnqueueKernelPrintfTest, GivenKernelWithPrintfWhenBeingDispatchedThenL3
555555
patchData.Size = 256;
556556
patchData.DataParamOffset = 64;
557557

558+
MockCommandQueueHw<FamilyType> mockCmdQueue(context, pClDevice, nullptr);
558559
MockKernelWithInternals mockKernel(*pClDevice);
560+
559561
mockKernel.crossThreadData[64] = 0;
560562
mockKernel.kernelInfo.patchInfo.pAllocateStatelessPrintfSurface = &patchData;
561-
auto &csr = pCmdQ->getGpgpuCommandStreamReceiver();
563+
auto &csr = mockCmdQueue.getGpgpuCommandStreamReceiver();
562564
auto latestSentTaskCount = csr.peekTaskCount();
563-
enqueueKernel<FamilyType, false>(mockKernel);
565+
566+
cl_uint workDim = 1;
567+
size_t globalWorkOffset[3] = {0, 0, 0};
568+
569+
cl_uint numEventsInWaitList = 0;
570+
cl_event *eventWaitList = nullptr;
571+
cl_event *event = nullptr;
572+
573+
FillValues();
574+
// Compute # of expected work items
575+
expectedWorkItems = 1;
576+
for (auto i = 0u; i < workDim; i++) {
577+
expectedWorkItems *= globalWorkSize[i];
578+
}
579+
580+
auto retVal = mockCmdQueue.enqueueKernel(
581+
mockKernel,
582+
workDim,
583+
globalWorkOffset,
584+
globalWorkSize,
585+
localWorkSize,
586+
numEventsInWaitList,
587+
eventWaitList,
588+
event);
589+
ASSERT_EQ(CL_SUCCESS, retVal);
590+
564591
auto newLatestSentTaskCount = csr.peekTaskCount();
565592
EXPECT_GT(newLatestSentTaskCount, latestSentTaskCount);
566-
EXPECT_EQ(pCmdQ->latestTaskCountWaited, newLatestSentTaskCount);
593+
EXPECT_EQ(mockCmdQueue.latestTaskCountWaited, newLatestSentTaskCount);
567594
}
568595

569596
HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueKernelPrintfTest, GivenKernelWithPrintfBlockedByEventWhenEventUnblockedThenL3CacheIsFlushed) {
@@ -575,10 +602,11 @@ HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueKernelPrintfTest, GivenKernelWithPrintfBlocke
575602
patchData.Size = 256;
576603
patchData.DataParamOffset = 64;
577604

605+
MockCommandQueueHw<FamilyType> mockCommandQueue(context, pClDevice, nullptr);
578606
MockKernelWithInternals mockKernel(*pClDevice);
579607
mockKernel.crossThreadData[64] = 0;
580608
mockKernel.kernelInfo.patchInfo.pAllocateStatelessPrintfSurface = &patchData;
581-
auto &csr = pCmdQ->getGpgpuCommandStreamReceiver();
609+
auto &csr = mockCommandQueue.getGpgpuCommandStreamReceiver();
582610
auto latestSentDcFlushTaskCount = csr.peekTaskCount();
583611

584612
cl_uint workDim = 1;
@@ -587,7 +615,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueKernelPrintfTest, GivenKernelWithPrintfBlocke
587615
FillValues();
588616

589617
cl_event blockedEvent = &userEvent;
590-
auto retVal = pCmdQ->enqueueKernel(
618+
auto retVal = mockCommandQueue.enqueueKernel(
591619
mockKernel,
592620
workDim,
593621
globalWorkOffset,
@@ -600,11 +628,11 @@ HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueKernelPrintfTest, GivenKernelWithPrintfBlocke
600628

601629
userEvent.setStatus(CL_COMPLETE);
602630

603-
parseCommands<FamilyType>(*pCmdQ);
631+
parseCommands<FamilyType>(mockCommandQueue);
604632

605633
auto newLatestSentDCFlushTaskCount = csr.peekTaskCount();
606634
EXPECT_GT(newLatestSentDCFlushTaskCount, latestSentDcFlushTaskCount);
607-
EXPECT_EQ(pCmdQ->latestTaskCountWaited, newLatestSentDCFlushTaskCount);
635+
EXPECT_EQ(mockCommandQueue.latestTaskCountWaited, newLatestSentDCFlushTaskCount);
608636
}
609637

610638
HWTEST_P(EnqueueKernelPrintfTest, GivenKernelWithPrintfBlockedByEventWhenEventUnblockedThenOutputPrinted) {

unit_tests/command_queue/enqueue_map_buffer_tests.cpp

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "unit_tests/fixtures/buffer_fixture.h"
1515
#include "unit_tests/fixtures/device_fixture.h"
1616
#include "unit_tests/mocks/mock_buffer.h"
17+
#include "unit_tests/mocks/mock_command_queue.h"
1718
#include "unit_tests/mocks/mock_context.h"
1819
#include "unit_tests/mocks/mock_kernel.h"
1920

@@ -260,7 +261,7 @@ TEST_F(EnqueueMapBufferTest, GivenValidArgsWhenMappingBufferThenSuccessIsReturne
260261
EXPECT_EQ(CL_SUCCESS, retVal);
261262
}
262263

263-
TEST_F(EnqueueMapBufferTest, givenNonBlockingReadOnlyMapBufferOnZeroCopyBufferWhenItIsCalledThenSynchronizationIsNotMadeUntilWaitForEvents) {
264+
HWTEST_F(EnqueueMapBufferTest, givenNonBlockingReadOnlyMapBufferOnZeroCopyBufferWhenItIsCalledThenSynchronizationIsNotMadeUntilWaitForEvents) {
264265
DebugManagerStateRestore dbgRestore;
265266
DebugManager.flags.EnableAsyncEventsHandler.set(false);
266267
cl_event mapEventReturned = nullptr;
@@ -285,18 +286,20 @@ TEST_F(EnqueueMapBufferTest, givenNonBlockingReadOnlyMapBufferOnZeroCopyBufferWh
285286
EXPECT_EQ(CL_SUCCESS, retVal);
286287
EXPECT_NE(nullptr, buffer);
287288

288-
auto &commandStreamReceiver = pCmdQ->getGpgpuCommandStreamReceiver();
289+
MockCommandQueueHw<FamilyType> mockCmdQueue(context, pClDevice, nullptr);
290+
291+
auto &commandStreamReceiver = mockCmdQueue.getGpgpuCommandStreamReceiver();
289292
uint32_t taskCount = commandStreamReceiver.peekTaskCount();
290293
EXPECT_EQ(0u, taskCount);
291294

292295
// enqueue something that can be finished...
293-
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, 0, &GWS, nullptr, 0, nullptr, nullptr);
296+
retVal = clEnqueueNDRangeKernel(&mockCmdQueue, kernel, 1, 0, &GWS, nullptr, 0, nullptr, nullptr);
294297
EXPECT_EQ(retVal, CL_SUCCESS);
295298

296299
EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount());
297300

298301
auto ptrResult = clEnqueueMapBuffer(
299-
pCmdQ,
302+
&mockCmdQueue,
300303
buffer,
301304
CL_FALSE,
302305
CL_MAP_READ,
@@ -334,14 +337,14 @@ TEST_F(EnqueueMapBufferTest, givenNonBlockingReadOnlyMapBufferOnZeroCopyBufferWh
334337

335338
//wait for event do not sent flushTask
336339
EXPECT_EQ(1u, commandStreamReceiver.peekTaskCount());
337-
EXPECT_EQ(1u, pCmdQ->latestTaskCountWaited);
340+
EXPECT_EQ(1u, mockCmdQueue.latestTaskCountWaited);
338341

339342
EXPECT_TRUE(neoEvent->updateStatusAndCheckCompletion());
340343

341344
EXPECT_EQ(1u, callbackCalled);
342345

343346
retVal = clEnqueueUnmapMemObject(
344-
pCmdQ,
347+
&mockCmdQueue,
345348
buffer,
346349
ptrResult,
347350
0,
@@ -514,7 +517,7 @@ TEST_F(EnqueueMapBufferTest, givenNonBlockingMapBufferAfterL3IsAlreadyFlushedThe
514517
clReleaseEvent(eventReturned);
515518
}
516519

517-
TEST_F(EnqueueMapBufferTest, GivenBufferThatIsNotZeroCopyWhenNonBlockingMapIsCalledThenFinishIsCalledAndDataTransferred) {
520+
HWTEST_F(EnqueueMapBufferTest, GivenBufferThatIsNotZeroCopyWhenNonBlockingMapIsCalledThenFinishIsCalledAndDataTransferred) {
518521
const auto bufferSize = 100;
519522
auto localSize = bufferSize;
520523
char misaligned[bufferSize] = {1};
@@ -540,16 +543,18 @@ TEST_F(EnqueueMapBufferTest, GivenBufferThatIsNotZeroCopyWhenNonBlockingMapIsCal
540543
auto pBuffer = castToObject<Buffer>(buffer);
541544
ASSERT_FALSE(pBuffer->isMemObjZeroCopy());
542545

546+
MockCommandQueueHw<FamilyType> mockCmdQueue(context, pClDevice, nullptr);
547+
543548
// enqueue something that can be finished
544-
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, 0, &GWS, nullptr, 0, nullptr, nullptr);
549+
retVal = clEnqueueNDRangeKernel(&mockCmdQueue, kernel, 1, 0, &GWS, nullptr, 0, nullptr, nullptr);
545550
EXPECT_EQ(retVal, CL_SUCCESS);
546551

547-
auto &commandStreamReceiver = pCmdQ->getGpgpuCommandStreamReceiver();
552+
auto &commandStreamReceiver = mockCmdQueue.getGpgpuCommandStreamReceiver();
548553
uint32_t taskCount = commandStreamReceiver.peekTaskCount();
549554
EXPECT_EQ(1u, taskCount);
550555

551556
auto ptrResult = clEnqueueMapBuffer(
552-
pCmdQ,
557+
&mockCmdQueue,
553558
buffer,
554559
CL_FALSE,
555560
CL_MAP_READ,
@@ -566,7 +571,7 @@ TEST_F(EnqueueMapBufferTest, GivenBufferThatIsNotZeroCopyWhenNonBlockingMapIsCal
566571
commandStreamReceiver.peekTaskCount();
567572

568573
EXPECT_EQ(1u, commandStreamReceiver.peekLatestSentTaskCount());
569-
EXPECT_EQ(1u, pCmdQ->latestTaskCountWaited);
574+
EXPECT_EQ(1u, mockCmdQueue.latestTaskCountWaited);
570575

571576
retVal = clReleaseMemObject(buffer);
572577
EXPECT_EQ(CL_SUCCESS, retVal);

0 commit comments

Comments
 (0)