Skip to content

Commit f5b97a8

Browse files
authored
Add ThreadSanitzer hooks for jl_mutex_t (#59034)
Lets us detect lock order inversions on `jl_mutex_t`s, races on mutex initialization, and shows the list of locked mutexes when some other ThreadSanitizer warning is shown.
1 parent 8ba3b11 commit f5b97a8

File tree

5 files changed

+72
-9
lines changed

5 files changed

+72
-9
lines changed

src/gc-mmtk.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
#include "mmtkMutator.h"
44
#include "threading.h"
55

6+
#ifdef _COMPILER_TSAN_ENABLED_
7+
#include <sanitizer/tsan_interface.h>
8+
#endif
9+
610
// File exists in the binding
711
#include "mmtk.h"
812

src/julia_internal.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -99,12 +99,6 @@ static inline void msan_unpoison(const volatile void *a, size_t size) JL_NOTSAFE
9999
static inline void msan_allocated_memory(const volatile void *a, size_t size) JL_NOTSAFEPOINT {}
100100
static inline void msan_unpoison_string(const volatile char *a) JL_NOTSAFEPOINT {}
101101
#endif
102-
#ifdef _COMPILER_TSAN_ENABLED_
103-
JL_DLLIMPORT void *__tsan_create_fiber(unsigned flags);
104-
JL_DLLIMPORT void *__tsan_get_current_fiber(void);
105-
JL_DLLIMPORT void __tsan_destroy_fiber(void *fiber);
106-
JL_DLLIMPORT void __tsan_switch_to_fiber(void *fiber, unsigned flags);
107-
#endif
108102

109103
#ifndef _OS_WINDOWS_
110104
#if defined(_CPU_ARM_) || defined(_CPU_PPC_) || defined(_CPU_WASM_)

src/julia_locks.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
#ifndef JL_LOCKS_H
44
#define JL_LOCKS_H
55

6+
#ifdef _COMPILER_TSAN_ENABLED_
7+
#include <sanitizer/tsan_interface.h>
8+
#endif
9+
610
#ifdef __cplusplus
711
extern "C" {
812
#endif
@@ -34,7 +38,13 @@ static inline void jl_mutex_lock_nogc(jl_mutex_t *lock) JL_NOTSAFEPOINT JL_NOTSA
3438
// Hide this body from the analyzer, otherwise it complains that we're calling
3539
// a non-safepoint from this function. The 0 arguments guarantees that we do
3640
// not reach the safepoint, but the analyzer can't figure that out
41+
#ifdef _COMPILER_TSAN_ENABLED_
42+
__tsan_mutex_pre_lock(lock, __tsan_mutex_write_reentrant);
43+
#endif
3744
jl_mutex_wait(lock, 0);
45+
#ifdef _COMPILER_TSAN_ENABLED_
46+
__tsan_mutex_post_lock(lock, __tsan_mutex_write_reentrant, 1);
47+
#endif
3848
#endif
3949
}
4050

src/task.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@
3737
#include "threading.h"
3838
#include "julia_assert.h"
3939

40+
#ifdef _COMPILER_TSAN_ENABLED_
41+
#include <sanitizer/tsan_interface.h>
42+
#endif
43+
4044
#ifdef __cplusplus
4145
extern "C" {
4246
#endif

src/threading.c

Lines changed: 54 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88
#include "julia_internal.h"
99
#include "julia_assert.h"
1010

11+
#ifdef _COMPILER_TSAN_ENABLED_
12+
#include <sanitizer/tsan_interface.h>
13+
#endif
14+
1115
#ifdef USE_ITTAPI
1216
#include "ittapi/ittnotify.h"
1317
#endif
@@ -931,7 +935,16 @@ void _jl_mutex_init(jl_mutex_t *lock, const char *name) JL_NOTSAFEPOINT
931935
{
932936
jl_atomic_store_relaxed(&lock->owner, (jl_task_t*)NULL);
933937
lock->count = 0;
938+
#if defined(_COMPILER_TSAN_ENABLED_) && defined(ENABLE_TIMINGS)
939+
__tsan_mutex_pre_divert(lock, 0);
940+
#endif
934941
jl_profile_lock_init(lock, name);
942+
#ifdef _COMPILER_TSAN_ENABLED_
943+
#ifdef ENABLE_TIMINGS
944+
__tsan_mutex_post_divert(lock, 0);
945+
#endif
946+
__tsan_mutex_create(lock, __tsan_mutex_write_reentrant);
947+
#endif
935948
}
936949

937950
void _jl_mutex_wait(jl_task_t *self, jl_mutex_t *lock, int safepoint)
@@ -941,18 +954,27 @@ void _jl_mutex_wait(jl_task_t *self, jl_mutex_t *lock, int safepoint)
941954
lock->count++;
942955
return;
943956
}
957+
#ifdef _COMPILER_TSAN_ENABLED_
958+
__tsan_mutex_pre_divert(lock, 0);
959+
#endif
944960
// Don't use JL_TIMING for instant acquires, results in large blowup of events
945961
jl_profile_lock_start_wait(lock);
946962
if (owner == NULL && jl_atomic_cmpswap(&lock->owner, &owner, self)) {
947963
lock->count = 1;
948964
jl_profile_lock_acquired(lock);
965+
#ifdef _COMPILER_TSAN_ENABLED_
966+
__tsan_mutex_post_divert(lock, 0);
967+
#endif
949968
return;
950969
}
951970
JL_TIMING(LOCK_SPIN, LOCK_SPIN);
952971
while (1) {
953972
if (owner == NULL && jl_atomic_cmpswap(&lock->owner, &owner, self)) {
954973
lock->count = 1;
955974
jl_profile_lock_acquired(lock);
975+
#ifdef _COMPILER_TSAN_ENABLED_
976+
__tsan_mutex_post_divert(lock, 0);
977+
#endif
956978
return;
957979
}
958980
if (jl_running_under_rr(0)) {
@@ -973,6 +995,9 @@ void _jl_mutex_wait(jl_task_t *self, jl_mutex_t *lock, int safepoint)
973995
jl_cpu_suspend();
974996
owner = jl_atomic_load_relaxed(&lock->owner);
975997
}
998+
#ifdef _COMPILER_TSAN_ENABLED_
999+
__tsan_mutex_post_divert(lock, 0);
1000+
#endif
9761001
}
9771002

9781003
static void jl_lock_frame_push(jl_task_t *self, jl_mutex_t *lock)
@@ -998,23 +1023,43 @@ static void jl_lock_frame_pop(jl_task_t *self)
9981023

9991024
void _jl_mutex_lock(jl_task_t *self, jl_mutex_t *lock)
10001025
{
1026+
#ifdef _COMPILER_TSAN_ENABLED_
1027+
__tsan_mutex_pre_lock(lock, __tsan_mutex_write_reentrant);
1028+
#endif
10011029
JL_SIGATOMIC_BEGIN_self();
10021030
_jl_mutex_wait(self, lock, 1);
10031031
jl_lock_frame_push(self, lock);
1032+
#ifdef _COMPILER_TSAN_ENABLED_
1033+
__tsan_mutex_post_lock(lock, __tsan_mutex_write_reentrant, 1);
1034+
#endif
10041035
}
10051036

10061037
int _jl_mutex_trylock_nogc(jl_task_t *self, jl_mutex_t *lock)
10071038
{
1039+
#ifdef _COMPILER_TSAN_ENABLED_
1040+
__tsan_mutex_pre_lock(lock, __tsan_mutex_try_lock | __tsan_mutex_write_reentrant);
1041+
#endif
10081042
jl_task_t *owner = jl_atomic_load_acquire(&lock->owner);
1043+
int ret = 0;
10091044
if (owner == self) {
10101045
lock->count++;
1011-
return 1;
1046+
ret = 1;
1047+
goto done;
10121048
}
10131049
if (owner == NULL && jl_atomic_cmpswap(&lock->owner, &owner, self)) {
10141050
lock->count = 1;
1015-
return 1;
1051+
ret = 1;
1052+
goto done;
10161053
}
1017-
return 0;
1054+
done:
1055+
#ifdef _COMPILER_TSAN_ENABLED_
1056+
__tsan_mutex_post_lock(lock,
1057+
__tsan_mutex_try_lock |
1058+
(ret ? 0 : __tsan_mutex_try_lock_failed) |
1059+
__tsan_mutex_write_reentrant,
1060+
1);
1061+
#endif
1062+
return ret;
10181063
}
10191064

10201065
int _jl_mutex_trylock(jl_task_t *self, jl_mutex_t *lock)
@@ -1030,6 +1075,9 @@ int _jl_mutex_trylock(jl_task_t *self, jl_mutex_t *lock)
10301075
void _jl_mutex_unlock_nogc(jl_mutex_t *lock)
10311076
{
10321077
#ifndef __clang_gcanalyzer__
1078+
#ifdef _COMPILER_TSAN_ENABLED_
1079+
__tsan_mutex_pre_unlock(lock, 0);
1080+
#endif
10331081
assert(jl_atomic_load_relaxed(&lock->owner) == jl_current_task &&
10341082
"Unlocking a lock in a different thread.");
10351083
if (--lock->count == 0) {
@@ -1044,6 +1092,9 @@ void _jl_mutex_unlock_nogc(jl_mutex_t *lock)
10441092
}
10451093
jl_profile_lock_release_end(lock);
10461094
}
1095+
#ifdef _COMPILER_TSAN_ENABLED_
1096+
__tsan_mutex_post_unlock(lock, 0);
1097+
#endif
10471098
#endif
10481099
}
10491100

0 commit comments

Comments
 (0)