Skip to content

Commit 956bbc6

Browse files
kpamnanyRAI CI (GitHub Action Automation)
authored andcommitted
RAI: Add heartbeat capability
1 parent b0d9b66 commit 956bbc6

File tree

9 files changed

+329
-1
lines changed

9 files changed

+329
-1
lines changed

src/gc-stock.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3392,6 +3392,9 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
33923392
return recollect;
33933393
}
33943394

3395+
extern int jl_heartbeat_pause(void);
3396+
extern int jl_heartbeat_resume(void);
3397+
33953398
JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
33963399
{
33973400
JL_PROBE_GC_BEGIN(collection);
@@ -3434,6 +3437,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
34343437
// existence of the thread in the jl_n_threads count.
34353438
//
34363439
// TODO: concurrently queue objects
3440+
jl_heartbeat_pause();
34373441
jl_fence();
34383442
gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
34393443
gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
@@ -3465,6 +3469,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
34653469

34663470
gc_n_threads = 0;
34673471
gc_all_tls_states = NULL;
3472+
jl_heartbeat_resume();
34683473
jl_safepoint_end_gc();
34693474
jl_gc_state_set(ptls, old_state, JL_GC_STATE_WAITING);
34703475
JL_PROBE_GC_END();

src/gc-stock.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -499,6 +499,9 @@ extern uv_sem_t gc_sweep_assists_needed;
499499
extern _Atomic(int) gc_n_threads_marking;
500500
extern _Atomic(int) gc_n_threads_sweeping_pools;
501501
extern _Atomic(int) n_threads_running;
502+
extern _Atomic(int) gc_n_threads_sweeping_stacks;
503+
extern _Atomic(int) gc_ptls_sweep_idx;
504+
extern _Atomic(int) gc_stack_free_idx;
502505
extern uv_barrier_t thread_init_done;
503506
void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq);
504507
void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t *fl_parent, jl_value_t **fl_begin, jl_value_t **fl_end) JL_NOTSAFEPOINT;

src/init.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -864,6 +864,8 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
864864
_finish_julia_init(rel, ptls, ct);
865865
}
866866

867+
void jl_init_heartbeat(void);
868+
867869
static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_task_t *ct)
868870
{
869871
JL_TIMING(JULIA_INIT, JULIA_INIT);
@@ -917,6 +919,11 @@ static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_
917919
jl_start_gc_threads();
918920
uv_barrier_wait(&thread_init_done);
919921

922+
if (jl_base_module != NULL) {
923+
// requires code in Base
924+
jl_init_heartbeat();
925+
}
926+
920927
jl_gc_enable(1);
921928

922929
if (jl_options.image_file && (!jl_generating_output() || jl_options.incremental) && jl_module_init_order) {

src/julia_internal.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,7 @@ extern volatile size_t profile_bt_size_max;
224224
extern volatile size_t profile_bt_size_cur;
225225
extern volatile int profile_running;
226226
extern volatile int profile_all_tasks;
227+
extern int heartbeat_tid; // Mostly used to ensure we skip this thread in the CPU profiler. XXX: not implemented on Windows
227228
// Ensures that we can safely read the `live_tasks`field of every TLS when profiling.
228229
// We want to avoid the case that a GC gets interleaved with `jl_profile_task` and shrinks
229230
// the `live_tasks` array while we are reading it or frees tasks that are being profiled.
@@ -239,6 +240,7 @@ extern uv_mutex_t bt_data_prof_lock;
239240
#define PROFILE_STATE_THREAD_NOT_SLEEPING (1)
240241
#define PROFILE_STATE_THREAD_SLEEPING (2)
241242
#define PROFILE_STATE_WALL_TIME_PROFILING (3)
243+
extern _Atomic(int) n_threads_running;
242244
void jl_profile_task(void);
243245

244246
// number of cycles since power-on

src/options.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,9 @@
144144
#define MACHINE_EXCLUSIVE_NAME "JULIA_EXCLUSIVE"
145145
#define DEFAULT_MACHINE_EXCLUSIVE 0
146146

147+
// heartbeats
148+
#define JL_HEARTBEAT_THREAD
149+
147150
// sanitizer defaults ---------------------------------------------------------
148151

149152
// Automatically enable MEMDEBUG and KEEP_BODIES for the sanitizers

src/signals-mach.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -826,6 +826,10 @@ void *mach_profile_listener(void *arg)
826826
for (int idx = nthreads; idx-- > 0; ) {
827827
// Stop the threads in random order.
828828
int i = randperm[idx];
829+
// skip heartbeat thread
830+
if (i == heartbeat_tid) {
831+
continue;
832+
}
829833
jl_profile_thread_mach(i);
830834
}
831835
}

src/signals-unix.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -872,6 +872,10 @@ static void do_profile(void *ctx)
872872
for (int idx = nthreads; idx-- > 0; ) {
873873
// Stop the threads in the random order.
874874
int tid = randperm[idx];
875+
// skip heartbeat thread
876+
if (tid == heartbeat_tid) {
877+
return;
878+
}
875879
// do backtrace for profiler
876880
if (!profile_running)
877881
return;

src/stackwalk.c

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1398,9 +1398,22 @@ JL_DLLEXPORT void jl_print_backtrace(void) JL_NOTSAFEPOINT
13981398
jlbacktrace();
13991399
}
14001400

1401-
// Print backtraces for all live tasks, for all threads, to jl_safe_printf stderr
1401+
extern int jl_inside_heartbeat_thread(void);
1402+
extern int jl_heartbeat_pause(void);
1403+
extern int jl_heartbeat_resume(void);
1404+
1405+
// Print backtraces for all live tasks, for all threads, to jl_safe_printf
1406+
// stderr. This can take a _long_ time!
14021407
JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT
14031408
{
1409+
// disable heartbeats to prevent heartbeat loss while running this,
1410+
// unless this is called from the heartbeat thread itself; in that
1411+
// situation, the thread is busy running this and it will not be
1412+
// updating the missed heartbeats counter
1413+
if (!jl_inside_heartbeat_thread()) {
1414+
jl_heartbeat_pause();
1415+
}
1416+
14041417
size_t nthreads = jl_atomic_load_acquire(&jl_n_threads);
14051418
jl_ptls_t *allstates = jl_atomic_load_relaxed(&jl_all_tls_states);
14061419
int ctid = jl_threadid() + 1;
@@ -1459,6 +1472,10 @@ JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT
14591472
jl_safe_printf("thread (%d) ==== End thread %d\n", ctid, ptls2->tid + 1);
14601473
}
14611474
jl_safe_printf("thread (%d) ++++ Done\n", ctid);
1475+
1476+
if (!jl_inside_heartbeat_thread()) {
1477+
jl_heartbeat_resume();
1478+
}
14621479
}
14631480

14641481
#ifdef __cplusplus

0 commit comments

Comments
 (0)