Skip to content

Commit 9fe7898

Browse files
authored
Fix very low frequency false ASAN use after free report with device m… (llvm#1386)
2 parents 573e23c + 494250b commit 9fe7898

File tree

1 file changed

+48
-40
lines changed
  • amd/device-libs/asanrtl/src

1 file changed

+48
-40
lines changed

amd/device-libs/asanrtl/src/dm.cl

Lines changed: 48 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
#include "asan_util.h"
1010
#include "shadow_mapping.h"
1111

12+
#define OPTNONE __attribute__((optnone))
13+
1214
static const __constant uchar kAsanHeapLeftRedzoneMagic = (uchar)0xfa;
1315
static const __constant uint kAsanHeapLeftRedzoneMagicx4 = 0xfafafafaU;
1416
static const __constant ulong kAsanHeapLeftRedzoneMagicx8 = 0xfafafafafafafafaUL;
@@ -30,8 +32,8 @@ extern ulong __ockl_devmem_request(ulong addr, ulong size);
3032
#define AA(P,V) __opencl_atomic_fetch_add(P, V, memory_order_relaxed, memory_scope_device)
3133
#define AN(P,V) __opencl_atomic_fetch_and(P, V, memory_order_relaxed, memory_scope_device)
3234
#define AO(P,V) __opencl_atomic_fetch_or(P, V, memory_order_relaxed, memory_scope_device)
33-
#define AX(P,V) __opencl_atomic_fetch_xor(P, V, memory_order_relaxed, memory_scope_device)
3435
#define ACE(P,E,V) __opencl_atomic_compare_exchange_strong(P, E, V, memory_order_relaxed, memory_order_relaxed, memory_scope_device)
36+
#define RF() __builtin_amdgcn_fence(__ATOMIC_RELEASE, "agent", "global")
3537
#define ARF() __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "agent", "global")
3638

3739
// An allocation
@@ -110,11 +112,6 @@ typedef struct heap_s {
110112
lifo_t la[NLA]; // Storage for available slabs
111113
} heap_t;
112114

113-
// Inhibit control flow optimizations
114-
__attribute__((overloadable)) static int o0(int x) { int y; __asm__ volatile("" : "=v"(y) : "0"(x)); return y; }
115-
__attribute__((overloadable)) static uint o0(uint x) { uint y; __asm__ volatile("" : "=v"(y) : "0"(x)); return y; }
116-
__attribute__((overloadable)) static ulong o0(ulong x) { ulong y; __asm__ volatile("" : "=v"(y) : "0"(x)); return y; }
117-
118115
// Overloads to broadcast the value held by the first active lane
119116
// The result is known to be wave-uniform
120117
static __attribute__((overloadable)) uint
@@ -198,13 +195,14 @@ slab_pause(void)
198195

199196

200197
// Intended to be called from only one lane of a wave
198+
OPTNONE
201199
NO_SANITIZE_ADDR
202200
static void
203201
put_free_slab(__global heap_t *hp, __global slab_t *sp)
204202
{
205203
__global lifo_t *lp = LP(hp, AA(&hp->wid, 1UL));
206204

207-
for (ulong i=1;;++i) {
205+
for (;;) {
208206
ulong top = AL(&lp->top);
209207
AS(&sp->next, (ulong)slabptr(top));
210208
if (ACE(&lp->top, &top, addcnt((ulong)sp, top))) {
@@ -224,7 +222,7 @@ get_free_slab(__global heap_t *hp)
224222

225223
__global lifo_t *lp = LP(hp, AA(&hp->rid, 1UL));
226224

227-
for (ulong i=1;;++i) {
225+
for (;;) {
228226
ulong top = AL(&lp->top);
229227
__global slab_t *sp = slabptr(top);
230228
if (sp) {
@@ -236,6 +234,7 @@ get_free_slab(__global heap_t *hp)
236234
}
237235
slab_pause();
238236
}
237+
239238
}
240239

241240
NO_SANITIZE_ADDR
@@ -268,6 +267,7 @@ slab_free(__global alloc_t *ap, ulong pc)
268267
unpublish_allocation(ap, pc);
269268
__global heap_t *hp = get_heap_ptr();
270269
__global slab_t *sp = (__global slab_t *)ap->sp;
270+
271271
int go = 1;
272272
do {
273273
if (go) {
@@ -277,9 +277,6 @@ slab_free(__global alloc_t *ap, ulong pc)
277277
if (aid == 0) {
278278
uint rb = AA(&sp->rb, sz) + sz;
279279
if (rb == SLAB_BYTES - SLAB_HEADER_BYTES) {
280-
AO(&sp->ap, (ulong)F_UNREADY);
281-
ulong cs = (ulong)sp;
282-
ACE(&hp->cs, &cs, 0UL);
283280
put_free_slab(hp, sp);
284281
}
285282
}
@@ -406,7 +403,8 @@ try_new_slab(__global heap_t *hp)
406403
__global slab_t *sp = obtain_new_slab(hp);
407404
if (sp) {
408405
AS(&sp->next, 0UL);
409-
AS(&sp->ap, (ulong)sp | (ulong)(F_POISON_PENDING | F_POISON_NEEDED | F_UNREADY));
406+
AS(&sp->rb, 0U);
407+
AS(&sp->ap, (ulong)sp + (ulong)SLAB_HEADER_BYTES + (ulong)(F_UNREADY | F_POISON_PENDING | F_POISON_NEEDED));
410408
#if defined SLAB_IDENTITY
411409
AS(&sp->sid, AA(&hp->num_slab_allocations, 1UL));
412410
#else
@@ -429,11 +427,12 @@ new_slab_wait(__global heap_t *hp)
429427
}
430428

431429
// Called by a single workitem
430+
OPTNONE
432431
NO_SANITIZE_ADDR
433432
static __global slab_t *
434433
get_current_slab(__global heap_t *hp)
435434
{
436-
for (ulong i=1;;++i) {
435+
for (;;) {
437436
ulong cs = AL(&hp->cs);
438437
if (cs)
439438
return (__global slab_t *)cs;
@@ -485,6 +484,7 @@ poison_slab(__global slab_t *sp, int aid, int na)
485484

486485
for (int i=aid; i < SLAB_BYTES / SHADOW_GRANULARITY / sizeof(ulong); i += na)
487486
ssp[i] = kAsanHeapLeftRedzoneMagicx8;
487+
RF();
488488

489489
if (!aid)
490490
AN(&sp->ap, ~(ulong)F_POISON_PENDING);
@@ -541,39 +541,47 @@ slab_malloc(ulong lsz, ulong pc)
541541

542542
ulong o = (ulong)__ockl_alisa_u32(asz);
543543

544-
ulong p = 0;
544+
ulong ap = 0;
545545
if (!aid)
546-
p = AA(&cs->ap, o);
547-
p = first(p);
548-
549-
if (!(p & (ulong)F_MASK)) {
550-
if (p + o <= (ulong)cs + SLAB_BYTES) {
551-
ret = publish_allocation(p + o - asz, (ulong)cs, pc, asz, arz, usz);
552-
go = 0;
553-
} else {
554-
if (!__ockl_activelane_u32()) {
555-
ulong e = (ulong)cs;
556-
ACE(&hp->cs, &e, 0UL);
557-
}
558-
if (p + o - asz < (ulong)cs + SLAB_BYTES) {
559-
uint unused = (uint)((ulong)cs + SLAB_BYTES - (p + o - asz));
560-
uint rb = AA(&cs->rb, unused) + unused;
561-
if (rb == SLAB_BYTES - SLAB_HEADER_BYTES) {
562-
AO(&cs->ap, (ulong)F_UNREADY);
563-
put_free_slab(hp, cs);
564-
}
565-
}
566-
}
567-
} else {
568-
ulong newp = 0;
546+
ap = AL(&cs->ap);
547+
ap = first(ap);
548+
549+
if (ap & (ulong)F_MASK) {
550+
ulong p = 0;
569551
if (!aid)
570-
newp = AN(&cs->ap, ~(ulong)F_POISON_NEEDED);
571-
newp = first(newp);
552+
p = AN(&cs->ap, ~(ulong)F_POISON_NEEDED);
553+
p = first(p);
572554

573-
if (newp & (ulong)F_POISON_NEEDED)
555+
if (p & (ulong)F_POISON_NEEDED)
574556
poison_slab(cs, aid, active_lane_count());
575557
else
576558
slab_pause();
559+
} else {
560+
ulong p = 0;
561+
if (!aid)
562+
p = AA(&cs->ap, o);
563+
p = first(p);
564+
565+
if (!(p & (ulong)F_MASK)) {
566+
if (p + o <= (ulong)cs + SLAB_BYTES) {
567+
ret = publish_allocation(p + o - asz, (ulong)cs, pc, asz, arz, usz);
568+
go = 0;
569+
} else {
570+
if (!__ockl_activelane_u32()) {
571+
ulong e = (ulong)cs;
572+
ACE(&hp->cs, &e, 0UL);
573+
AO(&cs->ap, (ulong)F_UNREADY);
574+
}
575+
if (p + o - asz < (ulong)cs + SLAB_BYTES) {
576+
uint unused = (uint)((ulong)cs + SLAB_BYTES - (p + o - asz));
577+
uint rb = AA(&cs->rb, unused) + unused;
578+
if (rb == SLAB_BYTES - SLAB_HEADER_BYTES) {
579+
put_free_slab(hp, cs);
580+
}
581+
}
582+
}
583+
} else
584+
slab_pause();
577585
}
578586
}
579587
} while (__ockl_wfany_i32(go));

0 commit comments

Comments
 (0)