Skip to content

Commit 37deaf1

Browse files
lyuCompute-Runtime-Automation
authored andcommitted
fix: serialize printf kernel accesses using device-wise locks
Related-To: LOCI-4114 Signed-off-by: Lu, Wenbin <[email protected]>
1 parent 83ac95d commit 37deaf1

22 files changed

+716
-128
lines changed

level_zero/core/source/cmdlist/cmdlist.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "level_zero/core/source/driver/driver_handle_imp.h"
2323
#include "level_zero/core/source/event/event.h"
2424
#include "level_zero/core/source/kernel/kernel.h"
25+
#include "level_zero/core/source/kernel/kernel_imp.h"
2526

2627
namespace L0 {
2728

@@ -38,11 +39,11 @@ CommandList::~CommandList() {
3839
}
3940

4041
void CommandList::storePrintfKernel(Kernel *kernel) {
41-
auto it = std::find(this->printfKernelContainer.begin(), this->printfKernelContainer.end(),
42-
kernel);
42+
auto it = std::find_if(this->printfKernelContainer.begin(), this->printfKernelContainer.end(), [&kernel](const auto &kernelWeakPtr) { return kernelWeakPtr.lock().get() == kernel; });
4343

4444
if (it == this->printfKernelContainer.end()) {
45-
this->printfKernelContainer.push_back(kernel);
45+
auto module = static_cast<const ModuleImp *>(&static_cast<KernelImp *>(kernel)->getParentModule());
46+
this->printfKernelContainer.push_back(module->getPrintfKernelWeakPtr(kernel->toHandle()));
4647
}
4748
}
4849

level_zero/core/source/cmdlist/cmdlist.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ struct CommandList : _ze_command_list_handle_t {
231231
return indirectAllocationsAllowed;
232232
}
233233

234-
std::vector<Kernel *> &getPrintfKernelContainer() {
234+
std::vector<std::weak_ptr<Kernel>> &getPrintfKernelContainer() {
235235
return this->printfKernelContainer;
236236
}
237237

@@ -360,7 +360,7 @@ struct CommandList : _ze_command_list_handle_t {
360360
std::map<const void *, NEO::GraphicsAllocation *> hostPtrMap;
361361
NEO::PrivateAllocsToReuseContainer ownedPrivateAllocations;
362362
std::vector<NEO::GraphicsAllocation *> patternAllocations;
363-
std::vector<Kernel *> printfKernelContainer;
363+
std::vector<std::weak_ptr<Kernel>> printfKernelContainer;
364364

365365
NEO::CommandContainer commandContainer;
366366

level_zero/core/source/cmdlist/cmdlist_hw.inl

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -434,7 +434,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_
434434
if (hEvent) {
435435
event = Event::fromHandle(hEvent);
436436
if (Kernel::fromHandle(kernelHandle)->getPrintfBufferAllocation() != nullptr) {
437-
event->setKernelForPrintf(Kernel::fromHandle(kernelHandle));
437+
Kernel *kernel = Kernel::fromHandle(kernelHandle);
438+
auto module = static_cast<const ModuleImp *>(&static_cast<KernelImp *>(kernel)->getParentModule());
439+
event->setKernelForPrintf(module->getPrintfKernelWeakPtr(kernelHandle));
440+
event->setKernelWithPrintfDeviceMutex(kernel->getDevicePrintfKernelMutex());
438441
}
439442
launchParams.isHostSignalScopeEvent = event->isSignalScope(ZE_EVENT_SCOPE_FLAG_HOST);
440443
}

level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,7 @@ inline ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommand
387387

388388
this->cmdQImmediate->setTaskCount(completionStamp.taskCount);
389389

390-
if (this->isSyncModeQueue || this->printfKernelContainer.size() > 0u) {
390+
if (this->isSyncModeQueue) {
391391
status = hostSynchronize(std::numeric_limits<uint64_t>::max(), completionStamp.taskCount, true);
392392
}
393393

@@ -1238,9 +1238,11 @@ bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isBarrierRequired() {
12381238

12391239
template <GFXCORE_FAMILY gfxCoreFamily>
12401240
void CommandListCoreFamilyImmediate<gfxCoreFamily>::printKernelsPrintfOutput(bool hangDetected) {
1241-
size_t size = this->printfKernelContainer.size();
1242-
for (size_t i = 0; i < size; i++) {
1243-
this->printfKernelContainer[i]->printPrintfOutput(hangDetected);
1241+
for (auto &kernelWeakPtr : this->printfKernelContainer) {
1242+
std::lock_guard<std::mutex> lock(static_cast<DeviceImp *>(this->device)->printfKernelMutex);
1243+
if (!kernelWeakPtr.expired()) {
1244+
kernelWeakPtr.lock()->printPrintfOutput(hangDetected);
1245+
}
12441246
}
12451247
this->printfKernelContainer.clear();
12461248
}

level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
271271
}
272272

273273
if (event != nullptr && kernel->getPrintfBufferAllocation() != nullptr) {
274-
event->setKernelForPrintf(kernel);
274+
auto module = static_cast<const ModuleImp *>(&static_cast<KernelImp *>(kernel)->getParentModule());
275+
event->setKernelForPrintf(module->getPrintfKernelWeakPtr(kernel->toHandle()));
276+
event->setKernelWithPrintfDeviceMutex(kernel->getDevicePrintfKernelMutex());
275277
}
276278

277279
if (this->isInOrderExecutionEnabled() && !launchParams.isKernelSplitOperation) {

level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,9 @@
2323
#include "shared/source/utilities/software_tags_manager.h"
2424

2525
#include "level_zero/core/source/cmdlist/cmdlist_hw.h"
26+
#include "level_zero/core/source/device/device.h"
2627
#include "level_zero/core/source/driver/driver_handle_imp.h"
28+
#include "level_zero/core/source/event/event.h"
2729
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h"
2830
#include "level_zero/core/source/kernel/kernel_imp.h"
2931
#include "level_zero/core/source/module/module.h"
@@ -180,7 +182,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
180182
Event *eventForInOrderExec = event;
181183
if (event) {
182184
if (kernel->getPrintfBufferAllocation() != nullptr) {
183-
event->setKernelForPrintf(kernel);
185+
auto module = static_cast<const ModuleImp *>(&static_cast<KernelImp *>(kernel)->getParentModule());
186+
event->setKernelForPrintf(module->getPrintfKernelWeakPtr(kernel->toHandle()));
187+
event->setKernelWithPrintfDeviceMutex(kernel->getDevicePrintfKernelMutex());
184188
}
185189
isHostSignalScopeEvent = event->isSignalScope(ZE_EVENT_SCOPE_FLAG_HOST);
186190
if (compactL3FlushEvent(getDcFlushRequired(event->isSignalScope()))) {

level_zero/core/source/cmdqueue/cmdqueue.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -190,9 +190,11 @@ ze_result_t CommandQueueImp::synchronizeByPollingForTaskCount(uint64_t timeout)
190190
}
191191

192192
void CommandQueueImp::printKernelsPrintfOutput(bool hangDetected) {
193-
size_t size = this->printfKernelContainer.size();
194-
for (size_t i = 0; i < size; i++) {
195-
this->printfKernelContainer[i]->printPrintfOutput(hangDetected);
193+
for (auto &kernelWeakPtr : this->printfKernelContainer) {
194+
std::lock_guard<std::mutex> lock(static_cast<DeviceImp *>(this->getDevice())->printfKernelMutex);
195+
if (!kernelWeakPtr.expired()) {
196+
kernelWeakPtr.lock()->printPrintfOutput(hangDetected);
197+
}
196198
}
197199
this->printfKernelContainer.clear();
198200
}

level_zero/core/source/cmdqueue/cmdqueue_imp.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ struct CommandQueueImp : public CommandQueue {
146146
NEO::LinearStream firstCmdListStream{};
147147
NEO::HeapContainer heapContainer;
148148
ze_command_queue_desc_t desc;
149-
std::vector<Kernel *> printfKernelContainer;
149+
std::vector<std::weak_ptr<Kernel>> printfKernelContainer;
150150

151151
Device *device = nullptr;
152152
NEO::CommandStreamReceiver *csr = nullptr;

level_zero/core/source/device/device_imp.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,8 @@ struct DeviceImp : public Device, NEO::NonCopyableOrMovableClass {
132132
ze_pci_speed_ext_t pciMaxSpeed = {-1, -1, -1};
133133
Device *rootDevice = nullptr;
134134

135+
std::mutex printfKernelMutex;
136+
135137
BcsSplit bcsSplit;
136138

137139
bool resourcesReleased = false;

level_zero/core/source/event/event.h

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include <chrono>
1919
#include <limits>
2020
#include <memory>
21+
#include <mutex>
2122
#include <vector>
2223

2324
struct _ze_event_handle_t {};
@@ -206,12 +207,24 @@ struct Event : _ze_event_handle_t {
206207
uint32_t getMaxKernelCount() const {
207208
return maxKernelCount;
208209
}
209-
void setKernelForPrintf(Kernel *inputKernelPtr) {
210-
kernelWithPrintf = inputKernelPtr;
210+
void setKernelForPrintf(std::weak_ptr<Kernel> inputKernelWeakPtr) {
211+
kernelWithPrintf = inputKernelWeakPtr;
211212
}
212-
Kernel *getKernelForPrintf() {
213+
std::weak_ptr<Kernel> getKernelForPrintf() {
213214
return kernelWithPrintf;
214215
}
216+
void resetKernelForPrintf() {
217+
kernelWithPrintf.reset();
218+
}
219+
void setKernelWithPrintfDeviceMutex(std::mutex *mutexPtr) {
220+
kernelWithPrintfDeviceMutex = mutexPtr;
221+
}
222+
std::mutex *getKernelWithPrintfDeviceMutex() {
223+
return kernelWithPrintfDeviceMutex;
224+
}
225+
void resetKernelWithPrintfDeviceMutex() {
226+
kernelWithPrintfDeviceMutex = nullptr;
227+
}
215228

216229
bool isSignalScope() const {
217230
return !!signalScope;
@@ -280,7 +293,8 @@ struct Event : _ze_event_handle_t {
280293
void *hostAddress = nullptr;
281294
Device *device = nullptr;
282295
EventPool *eventPool = nullptr;
283-
Kernel *kernelWithPrintf = nullptr;
296+
std::weak_ptr<Kernel> kernelWithPrintf = std::weak_ptr<Kernel>{};
297+
std::mutex *kernelWithPrintfDeviceMutex = nullptr;
284298
std::shared_ptr<InOrderExecInfo> inOrderExecInfo;
285299
CommandQueue *latestUsedCmdQueue = nullptr;
286300

0 commit comments

Comments
 (0)