Skip to content

Commit 110ac2c

Browse files
committed
[DeviceSanitizer] Support detecting out-of-bounds errors on sycl::buffer
As we discussed before, we intercepted buffer with usm for short-term solution. For long-term, we will draft a new ocl extension to allow user to create buffer with fixed address.
1 parent 717791b commit 110ac2c

File tree

6 files changed

+1238
-1
lines changed

6 files changed

+1238
-1
lines changed

source/loader/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,8 @@ if(UR_ENABLE_SANITIZER)
108108
${CMAKE_CURRENT_SOURCE_DIR}/../ur/ur.cpp
109109
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_allocator.cpp
110110
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_allocator.hpp
111+
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_buffer.cpp
112+
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_buffer.hpp
111113
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_interceptor.cpp
112114
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_interceptor.hpp
113115
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_libdevice.hpp
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
/*
2+
*
3+
* Copyright (C) 2024 Intel Corporation
4+
*
5+
* Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
6+
* See LICENSE.TXT
7+
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8+
*
9+
* @file asan_buffer.cpp
10+
*
11+
*/
12+
13+
#include "asan_buffer.hpp"
14+
#include "asan_interceptor.hpp"
15+
#include "ur_sanitizer_layer.hpp"
16+
#include "ur_sanitizer_utils.hpp"
17+
18+
namespace ur_sanitizer_layer {
19+
20+
ur_result_t EnqueueMemCopyRectHelper(
21+
ur_queue_handle_t Queue, char *pSrc, char *pDst, ur_rect_offset_t SrcOffset,
22+
ur_rect_offset_t DstOffset, ur_rect_region_t Region, size_t SrcRowPitch,
23+
size_t SrcSlicePitch, size_t DstRowPitch, size_t DstSlicePitch,
24+
bool Blocking, uint32_t NumEventsInWaitList,
25+
const ur_event_handle_t *EventWaitList, ur_event_handle_t *Event) {
26+
// If user doesn't determine src/dst row pitch and slice pitch, just use
27+
// region for it.
28+
if (SrcRowPitch == 0) {
29+
SrcRowPitch = Region.width;
30+
}
31+
32+
if (SrcSlicePitch == 0) {
33+
SrcSlicePitch = SrcRowPitch * Region.height;
34+
}
35+
36+
if (DstRowPitch == 0) {
37+
DstRowPitch = Region.width;
38+
}
39+
40+
if (DstSlicePitch == 0) {
41+
DstSlicePitch = DstRowPitch * Region.height;
42+
}
43+
44+
// Calculate the src and dst addresses that actually will be copied.
45+
char *SrcOrigin = pSrc + SrcOffset.x + SrcRowPitch * SrcOffset.y +
46+
SrcSlicePitch * SrcOffset.z;
47+
char *DstOrigin = pDst + DstOffset.x + DstRowPitch * DstOffset.y +
48+
DstSlicePitch * DstOffset.z;
49+
50+
std::vector<ur_event_handle_t> Events;
51+
Events.reserve(Region.depth);
52+
// For now, USM doesn't support 3D memory copy operation, so we can only
53+
// loop call 2D memory copy function to implement it.
54+
for (size_t i = 0; i < Region.depth; i++) {
55+
ur_event_handle_t NewEvent{};
56+
UR_CALL(context.urDdiTable.Enqueue.pfnUSMMemcpy2D(
57+
Queue, Blocking, DstOrigin + (i * DstSlicePitch), DstRowPitch,
58+
SrcOrigin + (i * SrcSlicePitch), SrcRowPitch, Region.width,
59+
Region.height, NumEventsInWaitList, EventWaitList, &NewEvent));
60+
61+
Events.push_back(NewEvent);
62+
}
63+
64+
UR_CALL(context.urDdiTable.Enqueue.pfnEventsWait(Queue, Events.size(),
65+
Events.data(), Event));
66+
67+
return UR_RESULT_SUCCESS;
68+
}
69+
70+
ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) {
71+
// Sub-buffers don't maintain own allocations but rely on parent buffer.
72+
if (SubBuffer) {
73+
UR_CALL(SubBuffer->Parent->getHandle(Device, Handle));
74+
Handle += SubBuffer->Origin;
75+
return UR_RESULT_SUCCESS;
76+
}
77+
78+
auto &Allocation = Allocations[Device];
79+
if (!Allocation) {
80+
ur_usm_desc_t USMDesc{};
81+
USMDesc.align = getAlignment();
82+
ur_usm_pool_handle_t Pool{};
83+
ur_result_t URes = context.interceptor->allocateMemory(
84+
Context, Device, &USMDesc, Pool, Size, AllocType::MEM_BUFFER,
85+
ur_cast<void **>(&Allocation));
86+
if (URes != UR_RESULT_SUCCESS) {
87+
context.logger.error(
88+
"Failed to allocate {} bytes memory for buffer {}", Size, this);
89+
return URes;
90+
}
91+
92+
if (HostPtr) {
93+
ManagedQueue Queue(Context, Device);
94+
URes = context.urDdiTable.Enqueue.pfnUSMMemcpy(
95+
Queue, true, Allocation, HostPtr, Size, 0, nullptr, nullptr);
96+
if (URes != UR_RESULT_SUCCESS) {
97+
context.logger.error("Failed to copy {} bytes data from host "
98+
"pointer {} to buffer {}",
99+
Size, HostPtr, this);
100+
return URes;
101+
}
102+
}
103+
}
104+
105+
Handle = Allocation;
106+
107+
return UR_RESULT_SUCCESS;
108+
}
109+
110+
ur_result_t MemBuffer::free() {
111+
for (const auto &[_, Ptr] : Allocations) {
112+
ur_result_t URes = context.interceptor->releaseMemory(Context, Ptr);
113+
if (URes != UR_RESULT_SUCCESS) {
114+
context.logger.error("Failed to free buffer handle {}", Ptr);
115+
return URes;
116+
}
117+
}
118+
Allocations.clear();
119+
return UR_RESULT_SUCCESS;
120+
}
121+
122+
size_t MemBuffer::getAlignment() {
123+
// Choose an alignment that is at most 64 and is the next power of 2
124+
// for sizes less than 64.
125+
// TODO: If we don't set the alignment size explicitly, the device will
126+
// usually choose a very large size (more than 1k). Then sanitizer will
127+
// allocate extra unnessary memory. Not sure if this will impact
128+
// performance.
129+
size_t MsbIdx = 63 - __builtin_clz(Size);
130+
size_t Alignment = (1 << (MsbIdx + 1));
131+
if (Alignment > 128) {
132+
Alignment = 128;
133+
}
134+
return Alignment;
135+
}
136+
137+
} // namespace ur_sanitizer_layer
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
/*
2+
*
3+
* Copyright (C) 2024 Intel Corporation
4+
*
5+
* Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
6+
* See LICENSE.TXT
7+
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8+
*
9+
* @file asan_buffer.hpp
10+
*
11+
*/
12+
13+
#pragma once
14+
15+
#include <atomic>
16+
#include <memory>
17+
#include <optional>
18+
19+
#include "common.hpp"
20+
21+
namespace ur_sanitizer_layer {
22+
23+
struct MemBuffer {
24+
// Buffer constructor
25+
MemBuffer(ur_context_handle_t Context, size_t Size, char *HostPtr)
26+
: Context(Context), Size(Size), HostPtr(HostPtr) {}
27+
28+
// Sub-buffer constructor
29+
MemBuffer(std::shared_ptr<MemBuffer> Parent, size_t Origin, size_t Size)
30+
: Context(Parent->Context), Size(Size), SubBuffer{{Parent, Origin}} {}
31+
32+
ur_result_t getHandle(ur_device_handle_t Device, char *&Handle);
33+
34+
ur_result_t free();
35+
36+
size_t getAlignment();
37+
38+
std::unordered_map<ur_device_handle_t, char *> Allocations;
39+
40+
enum AccessMode { UNKNOWN, READ_WRITE, READ_ONLY, WRITE_ONLY };
41+
42+
struct Mapping {
43+
size_t Offset;
44+
size_t Size;
45+
};
46+
47+
std::unordered_map<void *, Mapping> Mappings;
48+
49+
ur_context_handle_t Context;
50+
51+
size_t Size;
52+
53+
char *HostPtr{};
54+
55+
struct SubBuffer_t {
56+
std::shared_ptr<MemBuffer> Parent;
57+
size_t Origin;
58+
};
59+
60+
std::optional<SubBuffer_t> SubBuffer;
61+
62+
std::atomic<int32_t> RefCount;
63+
64+
ur_shared_mutex Mutex;
65+
};
66+
67+
ur_result_t EnqueueMemCopyRectHelper(
68+
ur_queue_handle_t Queue, char *pSrc, char *pDst, ur_rect_offset_t SrcOffset,
69+
ur_rect_offset_t DstOffset, ur_rect_region_t Region, size_t SrcRowPitch,
70+
size_t SrcSlicePitch, size_t DstRowPitch, size_t DstSlicePitch,
71+
bool Blocking, uint32_t NumEventsInWaitList,
72+
const ur_event_handle_t *EventWaitList, ur_event_handle_t *Event);
73+
74+
} // namespace ur_sanitizer_layer

source/loader/layers/sanitizer/asan_interceptor.cpp

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,9 @@ ur_result_t SanitizerInterceptor::allocateMemory(
232232
} else if (Type == AllocType::SHARED_USM) {
233233
UR_CALL(context.urDdiTable.USM.pfnSharedAlloc(
234234
Context, Device, Properties, Pool, NeededSize, &Allocated));
235+
} else if (Type == AllocType::MEM_BUFFER) {
236+
UR_CALL(context.urDdiTable.USM.pfnDeviceAlloc(
237+
Context, Device, Properties, Pool, NeededSize, &Allocated));
235238
} else {
236239
context.logger.error("Unsupport memory type");
237240
return UR_RESULT_ERROR_INVALID_ARGUMENT;
@@ -627,13 +630,71 @@ ur_result_t SanitizerInterceptor::eraseDevice(ur_device_handle_t Device) {
627630
return UR_RESULT_SUCCESS;
628631
}
629632

633+
ur_result_t SanitizerInterceptor::insertKernel(ur_kernel_handle_t Kernel) {
634+
std::scoped_lock<ur_shared_mutex> Guard(m_KernelMapMutex);
635+
if (m_KernelMap.find(Kernel) != m_KernelMap.end()) {
636+
return UR_RESULT_SUCCESS;
637+
}
638+
m_KernelMap.emplace(Kernel, std::make_shared<KernelInfo>(Kernel));
639+
return UR_RESULT_SUCCESS;
640+
}
641+
642+
ur_result_t SanitizerInterceptor::eraseKernel(ur_kernel_handle_t Kernel) {
643+
std::scoped_lock<ur_shared_mutex> Guard(m_KernelMapMutex);
644+
assert(m_KernelMap.find(Kernel) != m_KernelMap.end());
645+
m_KernelMap.erase(Kernel);
646+
return UR_RESULT_SUCCESS;
647+
}
648+
649+
ur_result_t
650+
SanitizerInterceptor::insertMemBuffer(std::shared_ptr<MemBuffer> MemBuffer) {
651+
std::scoped_lock<ur_shared_mutex> Guard(m_MemBufferMapMutex);
652+
assert(m_MemBufferMap.find(ur_cast<ur_mem_handle_t>(MemBuffer.get())) ==
653+
m_MemBufferMap.end());
654+
m_MemBufferMap.emplace(reinterpret_cast<ur_mem_handle_t>(MemBuffer.get()),
655+
MemBuffer);
656+
return UR_RESULT_SUCCESS;
657+
}
658+
659+
ur_result_t SanitizerInterceptor::eraseMemBuffer(ur_mem_handle_t MemHandle) {
660+
std::scoped_lock<ur_shared_mutex> Guard(m_MemBufferMapMutex);
661+
assert(m_MemBufferMap.find(MemHandle) != m_MemBufferMap.end());
662+
m_MemBufferMap.erase(MemHandle);
663+
return UR_RESULT_SUCCESS;
664+
}
665+
666+
std::shared_ptr<MemBuffer>
667+
SanitizerInterceptor::getMemBuffer(ur_mem_handle_t MemHandle) {
668+
std::shared_lock<ur_shared_mutex> Guard(m_MemBufferMapMutex);
669+
if (m_MemBufferMap.find(MemHandle) != m_MemBufferMap.end()) {
670+
return m_MemBufferMap[MemHandle];
671+
}
672+
return nullptr;
673+
}
674+
630675
ur_result_t SanitizerInterceptor::prepareLaunch(
631676
ur_context_handle_t Context, std::shared_ptr<DeviceInfo> &DeviceInfo,
632677
ur_queue_handle_t Queue, ur_kernel_handle_t Kernel,
633678
LaunchInfo &LaunchInfo) {
634679
auto Program = GetProgram(Kernel);
635680

636681
do {
682+
// Set membuffer arguments
683+
auto KernelInfo = getKernelInfo(Kernel);
684+
for (const auto &[ArgIndex, MemBuffer] : KernelInfo->BufferArgs) {
685+
char *ArgPointer = nullptr;
686+
UR_CALL(MemBuffer->getHandle(DeviceInfo->Handle, ArgPointer));
687+
ur_result_t URes = context.urDdiTable.Kernel.pfnSetArgPointer(
688+
Kernel, ArgIndex, nullptr, &ArgPointer);
689+
if (URes != UR_RESULT_SUCCESS) {
690+
context.logger.error(
691+
"Failed to set buffer {} as the {} arg to kernel {}: {}",
692+
ur_cast<ur_mem_handle_t>(MemBuffer.get()), ArgIndex, Kernel,
693+
URes);
694+
return URes;
695+
}
696+
}
697+
637698
// Write global variable to program
638699
auto EnqueueWriteGlobal = [Queue, Program](const char *Name,
639700
const void *Value,

source/loader/layers/sanitizer/asan_interceptor.hpp

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#pragma once
1414

1515
#include "asan_allocator.hpp"
16+
#include "asan_buffer.hpp"
1617
#include "asan_libdevice.hpp"
1718
#include "common.hpp"
1819
#include "ur_sanitizer_layer.hpp"
@@ -79,6 +80,24 @@ struct QueueInfo {
7980
}
8081
};
8182

83+
struct KernelInfo {
84+
ur_kernel_handle_t Handle;
85+
ur_shared_mutex Mutex;
86+
std::unordered_map<uint32_t, std::shared_ptr<MemBuffer>> BufferArgs;
87+
88+
explicit KernelInfo(ur_kernel_handle_t Kernel) : Handle(Kernel) {
89+
[[maybe_unused]] auto Result =
90+
context.urDdiTable.Kernel.pfnRetain(Kernel);
91+
assert(Result == UR_RESULT_SUCCESS);
92+
}
93+
94+
~KernelInfo() {
95+
[[maybe_unused]] auto Result =
96+
context.urDdiTable.Kernel.pfnRelease(Handle);
97+
assert(Result == UR_RESULT_SUCCESS);
98+
}
99+
};
100+
82101
struct ContextInfo {
83102
ur_context_handle_t Handle;
84103

@@ -173,6 +192,13 @@ class SanitizerInterceptor {
173192
std::shared_ptr<DeviceInfo> &CI);
174193
ur_result_t eraseDevice(ur_device_handle_t Device);
175194

195+
ur_result_t insertKernel(ur_kernel_handle_t Kernel);
196+
ur_result_t eraseKernel(ur_kernel_handle_t Kernel);
197+
198+
ur_result_t insertMemBuffer(std::shared_ptr<MemBuffer> MemBuffer);
199+
ur_result_t eraseMemBuffer(ur_mem_handle_t MemHandle);
200+
std::shared_ptr<MemBuffer> getMemBuffer(ur_mem_handle_t MemHandle);
201+
176202
std::optional<AllocationIterator> findAllocInfoByAddress(uptr Address);
177203

178204
std::shared_ptr<ContextInfo> getContextInfo(ur_context_handle_t Context) {
@@ -181,6 +207,12 @@ class SanitizerInterceptor {
181207
return m_ContextMap[Context];
182208
}
183209

210+
std::shared_ptr<KernelInfo> getKernelInfo(ur_kernel_handle_t Kernel) {
211+
std::shared_lock<ur_shared_mutex> Guard(m_KernelMapMutex);
212+
assert(m_KernelMap.find(Kernel) != m_KernelMap.end());
213+
return m_KernelMap[Kernel];
214+
}
215+
184216
private:
185217
ur_result_t updateShadowMemory(std::shared_ptr<ContextInfo> &ContextInfo,
186218
std::shared_ptr<DeviceInfo> &DeviceInfo,
@@ -210,11 +242,18 @@ class SanitizerInterceptor {
210242
std::unordered_map<ur_context_handle_t, std::shared_ptr<ContextInfo>>
211243
m_ContextMap;
212244
ur_shared_mutex m_ContextMapMutex;
213-
214245
std::unordered_map<ur_device_handle_t, std::shared_ptr<DeviceInfo>>
215246
m_DeviceMap;
216247
ur_shared_mutex m_DeviceMapMutex;
217248

249+
std::unordered_map<ur_kernel_handle_t, std::shared_ptr<KernelInfo>>
250+
m_KernelMap;
251+
ur_shared_mutex m_KernelMapMutex;
252+
253+
std::unordered_map<ur_mem_handle_t, std::shared_ptr<MemBuffer>>
254+
m_MemBufferMap;
255+
ur_shared_mutex m_MemBufferMapMutex;
256+
218257
/// Assumption: all USM chunks are allocated in one VA
219258
AllocationMap m_AllocationMap;
220259
ur_shared_mutex m_AllocationMapMutex;

0 commit comments

Comments
 (0)