Skip to content

Commit fa7007d

Browse files
committed
[ASan][Offload] Add support to utlize device malloc/free.
Dynamic memory allocation/deallocation support using api's malloc/new or free/delete must inherently invoke __asan* friendly api's when ASan is enabled. Change-Id: I5732428319194b458f3fb36daa05709317990e63
1 parent a8cc66c commit fa7007d

File tree

6 files changed

+83
-9
lines changed

6 files changed

+83
-9
lines changed

offload/DeviceRTL/CMakeLists.txt

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,8 +149,9 @@ function(compileDeviceRTLLibrary target_name target_triple)
149149
VERBATIM
150150
)
151151

152-
if(${outfile} MATCHES "State.cpp")
152+
if(${outfile} MATCHES "State.cpp" OR SANITIZER_AMDGPU)
153153
# Run the prep tool on the library to replace internal attribute with linkonce_odr for dm_alloc only.
154+
# If SANITIZER_AMDGPU is enabled then run prep tool for all device functions.
154155
set(outfile_prep "${outfile}-${target_name}-prep.bc")
155156
add_custom_target(${outfile_prep}
156157
COMMAND ${PREP_TOOL} -dm ${outfile}
@@ -346,6 +347,13 @@ set(amd_options -Xclang -mcode-object-version=none
346347
-Xclang -mcode-object-version=none
347348
)
348349

350+
if(SANITIZER_AMDGPU)
351+
get_target_property(asanrtl_path asanrtl IMPORTED_LOCATION)
352+
list(APPEND amd_options -DSANITIZER_AMDGPU=1)
353+
list(APPEND amd_options -Xclang -mlink-bitcode-file -Xclang ${asanrtl_path})
354+
endif()
355+
356+
349357
compileDeviceRTLLibrary(amdgpu amdgcn-amd-amdhsa ${amd_options})
350358
compileDeviceRTLLibrary(nvptx nvptx64-nvidia-cuda --cuda-feature=+ptx63)
351359

offload/DeviceRTL/src/State.cpp

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@
2424
using namespace ompx;
2525

2626
#pragma omp begin declare target device_type(host)
27-
void *internal_malloc(uint64_t Size);
28-
void internal_free(void *Ptr);
27+
__attribute__((noinline)) void *internal_malloc(uint64_t Size);
28+
__attribute__((noinline)) void internal_free(void *Ptr);
2929
#pragma omp end declare target
3030

3131
#pragma omp begin declare target device_type(nohost)
@@ -59,18 +59,35 @@ namespace {
5959
///
6060
///{
6161

62-
// global_allocate uses ockl_dm_alloc to manage a global memory heap
62+
// global_allocate uses ockl_dm_alloc/asan_malloc_impl to manage a global memory
63+
// heap
6364
__attribute__((noinline)) extern "C" uint64_t __ockl_dm_alloc(uint64_t bufsz);
6465
__attribute__((noinline)) extern "C" void __ockl_dm_dealloc(uint64_t ptr);
65-
66+
#if SANITIZER_AMDGPU
67+
__attribute__((noinline)) extern "C" uint64_t __asan_malloc_impl(uint64_t bufsz,
68+
uint64_t pc);
69+
__attribute__((noinline)) extern "C" void __asan_free_impl(uint64_t ptr,
70+
uint64_t pc);
71+
#endif
6672
#pragma omp begin declare variant match(device = {arch(amdgcn)})
6773
extern "C" {
68-
void *internal_malloc(uint64_t Size) {
74+
__attribute__((noinline)) void *internal_malloc(uint64_t Size) {
75+
#if SANITIZER_AMDGPU
76+
uint64_t ptr =
77+
__asan_malloc_impl(Size, (uint64_t)__builtin_return_address(0));
78+
#else
6979
uint64_t ptr = __ockl_dm_alloc(Size);
80+
#endif
7081
return (void *)ptr;
7182
}
7283

73-
void internal_free(void *Ptr) { __ockl_dm_dealloc((uint64_t)Ptr); }
84+
__attribute__((noinline)) void internal_free(void *Ptr) {
85+
#if SANITIZER_AMDGPU
86+
__asan_free_impl((uint64_t)Ptr, (uint64_t)__builtin_return_address(0));
87+
#else
88+
__ockl_dm_dealloc((uint64_t)Ptr);
89+
#endif
90+
}
7491
}
7592
#pragma omp end declare variant
7693

offload/DeviceRTL/src/exports

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ f90print*
2424
fprintf_*
2525

2626
hostexec*
27+
__ockl*
28+
__asan*
2729
__strlen_max
2830
__tgt_fort_ptr_assn_i8
2931
global_allocate
@@ -36,4 +38,4 @@ sin
3638
cos
3739
atan2f
3840

39-
abs
41+
abs

offload/hostexec/CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,12 @@ foreach(archname ${HOSTRPC_ARCHS})
179179
-Xclang -mlink-builtin-bitcode -Xclang ${ockl_path}
180180
-Wno-linker-warnings # Silence the empty host compilation.
181181
-c -emit-llvm --offload-device-only -nogpulib)
182+
183+
if(SANITIZER_AMDGPU)
184+
get_target_property(asanrtl_path asanrtl IMPORTED_LOCATION)
185+
list(APPEND openmp_device_args -DSANITIZER_AMDGPU=1)
186+
list(APPEND openmp_device_args -Xclang -mlink-bitcode-file -Xclang ${asanrtl_path})
187+
endif()
182188
elseif(${archname} STREQUAL "nvptx")
183189
set(openmp_device_args
184190
-I../../runtime/src # to pickup omp.h, we may need a dependency

offload/hostexec/src/hostexec_stubs.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,13 +224,28 @@ long hostexec_long_execute(char *print_buffer, uint32_t bufsz) {
224224
// See https://github.com/llvm/llvm-project/issues/63597
225225
__attribute__((noinline)) extern "C" uint64_t __ockl_dm_alloc(uint64_t bufsz);
226226
__attribute__((noinline)) extern "C" void __ockl_dm_dealloc(uint64_t ptr);
227+
#if SANITIZER_AMDGPU
228+
__attribute__((noinline)) extern "C" uint64_t __asan_malloc_impl(uint64_t bufsz,
229+
uint64_t pc);
230+
__attribute__((noinline)) extern "C" void __asan_free_impl(uint64_t ptr,
231+
uint64_t pc);
232+
#endif
227233

228234
// FIXME: Deprecate upstream, change test cases to use malloc & free directly
229235
__attribute__((flatten, always_inline)) char *global_allocate(uint32_t bufsz) {
236+
#if SANITIZER_AMDGPU
237+
return (char *)__asan_malloc_impl(bufsz,
238+
(uint64_t)__builtin_return_address(0));
239+
#else
230240
return (char *)__ockl_dm_alloc(bufsz);
241+
#endif
231242
}
232243
__attribute__((flatten, always_inline)) int global_free(void *ptr) {
244+
#if SANITIZER_AMDGPU
245+
__asan_free_impl((uint64_t)ptr, (uint64_t)__builtin_return_address(0));
246+
#else
233247
__ockl_dm_dealloc((uint64_t)ptr);
248+
#endif
234249
return 0;
235250
}
236251

offload/tools/prep-libomptarget-bc/prep-libomptarget-bc.cpp

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,25 @@ static bool convertExternsToLinkOnce(Module *MOUT, LLVMContext &Ctx) {
152152
if (!strncmp(F->getName().str().c_str(), "hostexec_invoke",
153153
strlen("hostexec_invoke")))
154154
continue;
155+
#if SANITIZER_AMDGPU
156+
if (!strncmp(F->getName().str().c_str(), "internal_malloc",
157+
strlen("internal_malloc")) ||
158+
!strncmp(F->getName().str().c_str(), "internal_free",
159+
strlen("internal_free")))
160+
continue;
161+
if (!strncmp(F->getName().str().c_str(), "global_allocate",
162+
strlen("global_allocate")) ||
163+
!strncmp(F->getName().str().c_str(), "global_free",
164+
strlen("global_free"))) {
165+
F->removeFnAttr(llvm::Attribute::AlwaysInline);
166+
F->addFnAttr(llvm::Attribute::NoInline);
167+
continue;
168+
}
169+
// Tag every '__kmpc*' function with 'sanitize_address' attribute by
170+
// default when sanitizer is enabled for amdgpu. This is mainly required
171+
// for LDS overflow detection as it will instrument all LDS variables.
172+
F->addFnAttr(llvm::Attribute::SanitizeAddress);
173+
#endif
155174
// all other functions
156175
F->removeFnAttr(llvm::Attribute::OptimizeNone);
157176
F->removeFnAttr(llvm::Attribute::NoInline);
@@ -185,7 +204,14 @@ static bool convertDmAllocToLinkOnce(Module *MOUT, LLVMContext &Ctx) {
185204
F->setLinkage(GlobalValue::LinkOnceODRLinkage);
186205
F->setVisibility(GlobalValue::ProtectedVisibility);
187206
}
188-
continue;
207+
#if SANITIZER_AMDGPU
208+
// Tag every '__kmpc*' function with 'sanitize_address' attribute by
209+
// default when sanitizer is enabled for amdgpu. This is mainly required
210+
// for LDS overflow detection as it will instrument all LDS variables.
211+
// if (regex.match(F->getName().str()))
212+
F->addFnAttr(llvm::Attribute::SanitizeAddress);
213+
#endif
214+
continue;
189215
}
190216
}
191217
}

0 commit comments

Comments
 (0)