Skip to content

Commit 5fdcb14

Browse files
WSL fallback to staging buffer on blocking reads
Signed-off-by: Kamil Diedrich <[email protected]>
1 parent 32592d9 commit 5fdcb14

File tree

14 files changed

+220
-17
lines changed

14 files changed

+220
-17
lines changed

opencl/source/built_ins/builtins_dispatch_builder.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ struct BuiltinOpParams {
5252
size_t dstSlicePitch = 0;
5353
uint32_t srcMipLevel = 0;
5454
uint32_t dstMipLevel = 0;
55+
void *userPtrForPostOperationCpuCopy = nullptr;
5556
};
5657

5758
class BuiltinDispatchInfoBuilder {

opencl/source/command_queue/command_queue.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ bool CommandQueue::isCompleted(uint32_t gpgpuTaskCount, CopyEngineState bcsState
194194
return false;
195195
}
196196

197-
void CommandQueue::waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) {
197+
void CommandQueue::waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList) {
198198
WAIT_ENTER()
199199

200200
DBG_LOG(LogTaskCounts, __FUNCTION__, "Waiting for taskCount:", gpgpuTaskCountToWait);
@@ -218,7 +218,11 @@ void CommandQueue::waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEn
218218
bcsCsr->waitForTaskCountAndCleanTemporaryAllocationList(copyEngine.taskCount);
219219
}
220220

221-
getGpgpuCommandStreamReceiver().waitForTaskCountAndCleanTemporaryAllocationList(gpgpuTaskCountToWait);
221+
if (cleanTemporaryAllocationList) {
222+
getGpgpuCommandStreamReceiver().waitForTaskCountAndCleanTemporaryAllocationList(gpgpuTaskCountToWait);
223+
} else {
224+
getGpgpuCommandStreamReceiver().waitForTaskCount(gpgpuTaskCountToWait);
225+
}
222226

223227
WAIT_LEAVE()
224228
}
@@ -930,7 +934,7 @@ void CommandQueue::aubCaptureHook(bool &blocking, bool &clearAllDependencies, co
930934
}
931935
}
932936

933-
void CommandQueue::waitForAllEngines(bool blockedQueue, PrintfHandler *printfHandler) {
937+
void CommandQueue::waitForAllEngines(bool blockedQueue, PrintfHandler *printfHandler, bool cleanTemporaryAllocationsList) {
934938
if (blockedQueue) {
935939
while (isQueueBlocked()) {
936940
}
@@ -947,7 +951,7 @@ void CommandQueue::waitForAllEngines(bool blockedQueue, PrintfHandler *printfHan
947951
activeBcsStates.push_back(state);
948952
}
949953
}
950-
waitUntilComplete(taskCount, activeBcsStates, flushStamp->peekStamp(), false);
954+
waitUntilComplete(taskCount, activeBcsStates, flushStamp->peekStamp(), false, cleanTemporaryAllocationsList);
951955

952956
if (printfHandler) {
953957
printfHandler->printEnqueueOutput();

opencl/source/command_queue/command_queue.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -217,8 +217,14 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
217217

218218
MOCKABLE_VIRTUAL bool isQueueBlocked();
219219

220-
MOCKABLE_VIRTUAL void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep);
221-
MOCKABLE_VIRTUAL void waitForAllEngines(bool blockedQueue, PrintfHandler *printfHandler);
220+
MOCKABLE_VIRTUAL void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList);
221+
MOCKABLE_VIRTUAL void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) {
222+
this->waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, true);
223+
}
224+
MOCKABLE_VIRTUAL void waitForAllEngines(bool blockedQueue, PrintfHandler *printfHandler, bool cleanTemporaryAllocationsList);
225+
MOCKABLE_VIRTUAL void waitForAllEngines(bool blockedQueue, PrintfHandler *printfHandler) {
226+
this->waitForAllEngines(blockedQueue, printfHandler, true);
227+
}
222228

223229
static uint32_t getTaskLevelFromWaitList(uint32_t taskLevel,
224230
cl_uint numEventsInWaitList,

opencl/source/command_queue/enqueue_common.h

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,18 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
409409
commandStreamReceiverOwnership.unlock();
410410

411411
if (blocking) {
412-
waitForAllEngines(blockQueue, (blockQueue ? nullptr : printfHandler.get()));
412+
auto &builtinOpParams = multiDispatchInfo.peekBuiltinOpParams();
413+
if (builtinOpParams.userPtrForPostOperationCpuCopy) {
414+
waitForAllEngines(blockQueue, (blockQueue ? nullptr : printfHandler.get()), false);
415+
auto hostPtrAlloc = builtinOpParams.transferAllocation;
416+
UNRECOVERABLE_IF(nullptr == hostPtrAlloc);
417+
auto size = hostPtrAlloc->getUnderlyingBufferSize();
418+
[[maybe_unused]] int cpuCopyStatus = memcpy_s(builtinOpParams.userPtrForPostOperationCpuCopy, size, hostPtrAlloc->getUnderlyingBuffer(), size);
419+
DEBUG_BREAK_IF(cpuCopyStatus != 0);
420+
waitForAllEngines(blockQueue, (blockQueue ? nullptr : printfHandler.get()), true);
421+
} else {
422+
waitForAllEngines(blockQueue, (blockQueue ? nullptr : printfHandler.get()), true);
423+
}
413424
}
414425
if (migratedMemory) {
415426
getGpgpuCommandStreamReceiver().flushBatchedSubmissions();

opencl/source/command_queue/enqueue_read_image.h

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadImage(
6868
GeneralSurface mapSurface;
6969
Surface *surfaces[] = {&srcImgSurf, nullptr};
7070

71+
bool tempAllocFallback = false;
7172
if (mapAllocation) {
7273
surfaces[1] = &mapSurface;
7374
mapSurface.setGraphicsAllocation(mapAllocation);
@@ -81,7 +82,16 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadImage(
8182
region[2] != 0) {
8283
bool status = csr.createAllocationForHostSurface(hostPtrSurf, true);
8384
if (!status) {
84-
return CL_OUT_OF_RESOURCES;
85+
if (CL_TRUE == blockingRead) {
86+
hostPtrSurf.setIsPtrCopyAllowed(true);
87+
status = csr.createAllocationForHostSurface(hostPtrSurf, true);
88+
if (!status) {
89+
return CL_OUT_OF_RESOURCES;
90+
}
91+
tempAllocFallback = true;
92+
} else {
93+
return CL_OUT_OF_RESOURCES;
94+
}
8595
}
8696
dstPtr = reinterpret_cast<void *>(hostPtrSurf.getAllocation()->getGpuAddress());
8797
}
@@ -102,6 +112,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadImage(
102112
dc.srcMipLevel = findMipLevel(srcImage->getImageDesc().image_type, origin);
103113
}
104114
dc.transferAllocation = mapAllocation ? mapAllocation : hostPtrSurf.getAllocation();
115+
if (tempAllocFallback) {
116+
dc.userPtrForPostOperationCpuCopy = ptr;
117+
}
105118

106119
auto eBuiltInOps = EBuiltInOps::CopyImage3dToBuffer;
107120
MultiDispatchInfo dispatchInfo(dc);

opencl/test/unit_test/command_queue/command_queue_tests.cpp

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -808,7 +808,7 @@ struct WaitForQueueCompletionTests : public ::testing::Test {
808808
template <typename Family>
809809
struct MyCmdQueue : public CommandQueueHw<Family> {
810810
MyCmdQueue(Context *context, ClDevice *device) : CommandQueueHw<Family>(context, device, nullptr, false){};
811-
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override {
811+
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList) override {
812812
requestedUseQuickKmdSleep = useQuickKmdSleep;
813813
waitUntilCompleteCounter++;
814814
}
@@ -855,6 +855,61 @@ HWTEST_F(WaitForQueueCompletionTests, whenFinishIsCalledThenCallWaitWithoutQuick
855855
EXPECT_FALSE(cmdQ->requestedUseQuickKmdSleep);
856856
}
857857

858+
template <class GfxFamily>
859+
class CommandStreamReceiverHwMock : public CommandStreamReceiverHw<GfxFamily> {
860+
public:
861+
CommandStreamReceiverHwMock(ExecutionEnvironment &executionEnvironment,
862+
uint32_t rootDeviceIndex,
863+
const DeviceBitfield deviceBitfield)
864+
: CommandStreamReceiverHw<GfxFamily>(executionEnvironment, rootDeviceIndex, deviceBitfield) {}
865+
bool wiatForTaskCountCalled = false;
866+
867+
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) override {
868+
return;
869+
}
870+
871+
void waitForTaskCount(uint32_t requiredTaskCount) override {
872+
wiatForTaskCountCalled = true;
873+
return;
874+
}
875+
};
876+
877+
struct WaitUntilCompletionTests : public ::testing::Test {
878+
template <typename Family>
879+
struct MyCmdQueue : public CommandQueueHw<Family> {
880+
public:
881+
using CommandQueue::gpgpuEngine;
882+
883+
MyCmdQueue(Context *context, ClDevice *device) : CommandQueueHw<Family>(context, device, nullptr, false){};
884+
};
885+
886+
void SetUp() override {
887+
device = std::make_unique<MockClDevice>(MockClDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
888+
context.reset(new MockContext(device.get()));
889+
}
890+
891+
std::unique_ptr<MockClDevice> device;
892+
std::unique_ptr<MockContext> context;
893+
};
894+
895+
HWTEST_F(WaitUntilCompletionTests, givenCommandQueueAndCleanTemporaryAllocationListWhenWaitUntilCompleteThenWaitForTaskCountIsCalled) {
896+
std::unique_ptr<CommandStreamReceiverHwMock<FamilyType>> cmdStream(new CommandStreamReceiverHwMock<FamilyType>(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield()));
897+
cmdStream->initializeTagAllocation();
898+
std::unique_ptr<MyCmdQueue<FamilyType>> cmdQ(new MyCmdQueue<FamilyType>(context.get(), device.get()));
899+
CommandStreamReceiver *oldCommandStreamReceiver = cmdQ->gpgpuEngine->commandStreamReceiver;
900+
901+
cmdQ->gpgpuEngine->commandStreamReceiver = cmdStream.get();
902+
uint32_t taskCount = 0u;
903+
StackVec<CopyEngineState, bcsInfoMaskSize> activeBcsStates{};
904+
cmdQ->waitUntilComplete(taskCount, activeBcsStates, cmdQ->flushStamp->peekStamp(), false, false);
905+
906+
auto cmdStreamPtr = &device->getGpgpuCommandStreamReceiver();
907+
908+
EXPECT_TRUE(static_cast<CommandStreamReceiverHwMock<FamilyType> *>(cmdStreamPtr)->wiatForTaskCountCalled);
909+
910+
cmdQ->gpgpuEngine->commandStreamReceiver = oldCommandStreamReceiver;
911+
}
912+
858913
TEST(CommandQueue, givenEnqueueAcquireSharedObjectsWhenNoObjectsThenReturnSuccess) {
859914
MockContext context;
860915
MockCommandQueue cmdQ(&context, nullptr, 0, false);

opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,9 +111,9 @@ struct EnqueueHandlerWithAubSubCaptureTests : public EnqueueHandlerTest {
111111
public:
112112
MockCmdQWithAubSubCapture(Context *context, ClDevice *device) : CommandQueueHw<FamilyType>(context, device, nullptr, false) {}
113113

114-
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override {
114+
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList) override {
115115
waitUntilCompleteCalled = true;
116-
CommandQueueHw<FamilyType>::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep);
116+
CommandQueueHw<FamilyType>::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, cleanTemporaryAllocationList);
117117
}
118118

119119
void obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes, bool clearAllDependencies, CommandStreamReceiver &csr) override {

opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -801,9 +801,9 @@ struct EnqueueAuxKernelTests : public EnqueueKernelTest {
801801
auxTranslationDirection);
802802
}
803803

804-
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override {
804+
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList) override {
805805
waitCalled++;
806-
MockCommandQueueHw<FamilyType>::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep);
806+
MockCommandQueueHw<FamilyType>::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, cleanTemporaryAllocationList);
807807
}
808808

809809
std::vector<AuxTranslationDirection> auxTranslationDirections;

opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,105 @@ HWTEST_F(EnqueueReadImageTest, whenEnqueueReadImageThenBuiltinKernelIsResolved)
9393
pCmdQ->finish();
9494
}
9595

96+
template <typename GfxFamily>
97+
struct CreateAllocationForHostSurfaceFailCsr : public CommandStreamReceiverHw<GfxFamily> {
98+
using CommandStreamReceiverHw<GfxFamily>::CommandStreamReceiverHw;
99+
100+
bool createAllocationForHostSurface(HostPtrSurface &surface, bool requiresL3Flush) override {
101+
return CL_FALSE;
102+
}
103+
};
104+
105+
HWTEST_F(EnqueueReadImageTest, givenCommandQueueAndFailingAllocationForHostSurfaceWhenEnqueueReadImageThenOutOfResourceIsReturned) {
106+
MockCommandQueueHw<FamilyType> cmdQ(context, pClDevice, nullptr);
107+
auto failCsr = std::make_unique<CreateAllocationForHostSurfaceFailCsr<FamilyType>>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
108+
109+
failCsr->setupContext(*pDevice->getDefaultEngine().osContext);
110+
CommandStreamReceiver *oldCommandStreamReceiver = cmdQ.gpgpuEngine->commandStreamReceiver;
111+
cmdQ.gpgpuEngine->commandStreamReceiver = failCsr.get();
112+
113+
auto srcImage = Image2dHelper<>::create(context);
114+
auto retVal = cmdQ.enqueueReadImage(srcImage, CL_FALSE,
115+
EnqueueReadImageTraits::origin,
116+
EnqueueReadImageTraits::region,
117+
EnqueueReadImageTraits::rowPitch,
118+
EnqueueReadImageTraits::slicePitch,
119+
EnqueueReadImageTraits::hostPtr,
120+
EnqueueReadImageTraits::mapAllocation,
121+
0u,
122+
nullptr,
123+
nullptr);
124+
EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal);
125+
cmdQ.gpgpuEngine->commandStreamReceiver = oldCommandStreamReceiver;
126+
srcImage->release();
127+
}
128+
129+
HWTEST_F(EnqueueReadImageTest, givenCommandQueueAndFailingAllocationForHostSurfaceWhenBlockingEnqueueReadImageThenOutOfResourceIsReturned) {
130+
MockCommandQueueHw<FamilyType> cmdQ(context, pClDevice, nullptr);
131+
auto failCsr = std::make_unique<CreateAllocationForHostSurfaceFailCsr<FamilyType>>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
132+
133+
failCsr->setupContext(*pDevice->getDefaultEngine().osContext);
134+
CommandStreamReceiver *oldCommandStreamReceiver = cmdQ.gpgpuEngine->commandStreamReceiver;
135+
cmdQ.gpgpuEngine->commandStreamReceiver = failCsr.get();
136+
137+
auto srcImage = Image2dHelper<>::create(context);
138+
auto retVal = cmdQ.enqueueReadImage(srcImage, CL_TRUE,
139+
EnqueueReadImageTraits::origin,
140+
EnqueueReadImageTraits::region,
141+
EnqueueReadImageTraits::rowPitch,
142+
EnqueueReadImageTraits::slicePitch,
143+
EnqueueReadImageTraits::hostPtr,
144+
EnqueueReadImageTraits::mapAllocation,
145+
0u,
146+
nullptr,
147+
nullptr);
148+
EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal);
149+
cmdQ.gpgpuEngine->commandStreamReceiver = oldCommandStreamReceiver;
150+
srcImage->release();
151+
}
152+
153+
template <typename GfxFamily>
154+
struct CreateAllocationForHostSurfaceCsr : public CommandStreamReceiverHw<GfxFamily> {
155+
using CommandStreamReceiverHw<GfxFamily>::CommandStreamReceiverHw;
156+
157+
bool createAllocationForHostSurface(HostPtrSurface &surface, bool requiresL3Flush) override {
158+
if (surface.peekIsPtrCopyAllowed()) {
159+
return CommandStreamReceiverHw<GfxFamily>::createAllocationForHostSurface(surface, requiresL3Flush);
160+
} else {
161+
return CL_FALSE;
162+
}
163+
}
164+
165+
CompletionStamp flushTask(LinearStream &commandStream, size_t commandStreamStart,
166+
const IndirectHeap &dsh, const IndirectHeap &ioh, const IndirectHeap &ssh,
167+
uint32_t taskLevel, DispatchFlags &dispatchFlags, Device &device) override {
168+
return CompletionStamp{0u, 0u, static_cast<FlushStamp>(0u)};
169+
}
170+
};
171+
172+
HWTEST_F(EnqueueReadImageTest, givenCommandQueueAndPtrCopyAllowedForHostSurfaceWhenBlockingEnqueueReadImageThenSuccessIsReturned) {
173+
auto csr = std::make_unique<CreateAllocationForHostSurfaceCsr<FamilyType>>(*pDevice->getExecutionEnvironment(), pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
174+
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context, pClDevice, nullptr);
175+
176+
csr->setupContext(*pDevice->getDefaultEngine().osContext);
177+
CommandStreamReceiver *oldCommandStreamReceiver = cmdQ->gpgpuEngine->commandStreamReceiver;
178+
cmdQ->gpgpuEngine->commandStreamReceiver = csr.get();
179+
csr->initializeTagAllocation();
180+
181+
auto retVal = cmdQ->enqueueReadImage(srcImage, CL_TRUE,
182+
EnqueueReadImageTraits::origin,
183+
EnqueueReadImageTraits::region,
184+
EnqueueReadImageTraits::rowPitch,
185+
EnqueueReadImageTraits::slicePitch,
186+
EnqueueReadImageTraits::hostPtr,
187+
EnqueueReadImageTraits::mapAllocation,
188+
0u,
189+
nullptr,
190+
nullptr);
191+
EXPECT_EQ(CL_SUCCESS, retVal);
192+
cmdQ->gpgpuEngine->commandStreamReceiver = oldCommandStreamReceiver;
193+
}
194+
96195
HWTEST_F(EnqueueReadImageTest, givenMultiRootDeviceImageWhenEnqueueReadImageThenKernelRequiresMigration) {
97196

98197
MockDefaultContext context;

opencl/test/unit_test/mocks/mock_command_queue.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,11 @@ class MockCommandQueue : public CommandQueue {
8686
return writeBufferRetValue;
8787
}
8888

89+
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList) override {
90+
latestTaskCountWaited = gpgpuTaskCountToWait;
91+
return CommandQueue::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, cleanTemporaryAllocationList);
92+
}
93+
8994
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override {
9095
latestTaskCountWaited = gpgpuTaskCountToWait;
9196
return CommandQueue::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep);
@@ -325,9 +330,9 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
325330
useBcsCsrOnNotifyEnabled = notifyBcsCsr;
326331
}
327332

328-
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override {
333+
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList) override {
329334
latestTaskCountWaited = gpgpuTaskCountToWait;
330-
return BaseClass::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep);
335+
return BaseClass::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, cleanTemporaryAllocationList);
331336
}
332337

333338
bool isCacheFlushForBcsRequired() const override {

0 commit comments

Comments
 (0)