Skip to content

Commit bb6be2e

Browse files
kpamnanyRAI CI (GitHub Action Automation)
authored andcommitted
RAI: Add heartbeat capability
1 parent 575092d commit bb6be2e

File tree

9 files changed

+329
-1
lines changed

9 files changed

+329
-1
lines changed

src/gc-stock.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3395,6 +3395,9 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
33953395
return recollect;
33963396
}
33973397

3398+
extern int jl_heartbeat_pause(void);
3399+
extern int jl_heartbeat_resume(void);
3400+
33983401
JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
33993402
{
34003403
JL_PROBE_GC_BEGIN(collection);
@@ -3437,6 +3440,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
34373440
// existence of the thread in the jl_n_threads count.
34383441
//
34393442
// TODO: concurrently queue objects
3443+
jl_heartbeat_pause();
34403444
jl_fence();
34413445
gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
34423446
gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
@@ -3468,6 +3472,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
34683472

34693473
gc_n_threads = 0;
34703474
gc_all_tls_states = NULL;
3475+
jl_heartbeat_resume();
34713476
jl_safepoint_end_gc();
34723477
jl_gc_state_set(ptls, old_state, JL_GC_STATE_WAITING);
34733478
JL_PROBE_GC_END();

src/gc-stock.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -499,6 +499,9 @@ extern uv_sem_t gc_sweep_assists_needed;
499499
extern _Atomic(int) gc_n_threads_marking;
500500
extern _Atomic(int) gc_n_threads_sweeping_pools;
501501
extern _Atomic(int) n_threads_running;
502+
extern _Atomic(int) gc_n_threads_sweeping_stacks;
503+
extern _Atomic(int) gc_ptls_sweep_idx;
504+
extern _Atomic(int) gc_stack_free_idx;
502505
extern uv_barrier_t thread_init_done;
503506
void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq);
504507
void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t *fl_parent, jl_value_t **fl_begin, jl_value_t **fl_end) JL_NOTSAFEPOINT;

src/init.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -863,6 +863,8 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
863863
_finish_julia_init(rel, ptls, ct);
864864
}
865865

866+
void jl_init_heartbeat(void);
867+
866868
static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_task_t *ct)
867869
{
868870
JL_TIMING(JULIA_INIT, JULIA_INIT);
@@ -916,6 +918,11 @@ static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_
916918
jl_start_gc_threads();
917919
uv_barrier_wait(&thread_init_done);
918920

921+
if (jl_base_module != NULL) {
922+
// requires code in Base
923+
jl_init_heartbeat();
924+
}
925+
919926
jl_gc_enable(1);
920927

921928
if (jl_options.image_file && (!jl_generating_output() || jl_options.incremental) && jl_module_init_order) {

src/julia_internal.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,7 @@ extern volatile size_t profile_bt_size_max;
224224
extern volatile size_t profile_bt_size_cur;
225225
extern volatile int profile_running;
226226
extern volatile int profile_all_tasks;
227+
extern int heartbeat_tid; // Mostly used to ensure we skip this thread in the CPU profiler. XXX: not implemented on Windows
227228
// Ensures that we can safely read the `live_tasks`field of every TLS when profiling.
228229
// We want to avoid the case that a GC gets interleaved with `jl_profile_task` and shrinks
229230
// the `live_tasks` array while we are reading it or frees tasks that are being profiled.
@@ -239,6 +240,7 @@ extern uv_mutex_t bt_data_prof_lock;
239240
#define PROFILE_STATE_THREAD_NOT_SLEEPING (1)
240241
#define PROFILE_STATE_THREAD_SLEEPING (2)
241242
#define PROFILE_STATE_WALL_TIME_PROFILING (3)
243+
extern _Atomic(int) n_threads_running;
242244
void jl_profile_task(void);
243245

244246
// number of cycles since power-on

src/options.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,9 @@
144144
#define MACHINE_EXCLUSIVE_NAME "JULIA_EXCLUSIVE"
145145
#define DEFAULT_MACHINE_EXCLUSIVE 0
146146

147+
// heartbeats
148+
#define JL_HEARTBEAT_THREAD
149+
147150
// sanitizer defaults ---------------------------------------------------------
148151

149152
// Automatically enable MEMDEBUG and KEEP_BODIES for the sanitizers

src/signals-mach.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -826,6 +826,10 @@ void *mach_profile_listener(void *arg)
826826
for (int idx = nthreads; idx-- > 0; ) {
827827
// Stop the threads in random order.
828828
int i = randperm[idx];
829+
// skip heartbeat thread
830+
if (i == heartbeat_tid) {
831+
continue;
832+
}
829833
jl_profile_thread_mach(i);
830834
}
831835
}

src/signals-unix.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -872,6 +872,10 @@ static void do_profile(void *ctx)
872872
for (int idx = nthreads; idx-- > 0; ) {
873873
// Stop the threads in the random order.
874874
int tid = randperm[idx];
875+
// skip heartbeat thread
876+
if (tid == heartbeat_tid) {
877+
return;
878+
}
875879
// do backtrace for profiler
876880
if (!profile_running)
877881
return;

src/stackwalk.c

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1398,9 +1398,22 @@ JL_DLLEXPORT void jl_print_backtrace(void) JL_NOTSAFEPOINT
13981398
jlbacktrace();
13991399
}
14001400

1401-
// Print backtraces for all live tasks, for all threads, to jl_safe_printf stderr
1401+
extern int jl_inside_heartbeat_thread(void);
1402+
extern int jl_heartbeat_pause(void);
1403+
extern int jl_heartbeat_resume(void);
1404+
1405+
// Print backtraces for all live tasks, for all threads, to jl_safe_printf
1406+
// stderr. This can take a _long_ time!
14021407
JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT
14031408
{
1409+
// disable heartbeats to prevent heartbeat loss while running this,
1410+
// unless this is called from the heartbeat thread itself; in that
1411+
// situation, the thread is busy running this and it will not be
1412+
// updating the missed heartbeats counter
1413+
if (!jl_inside_heartbeat_thread()) {
1414+
jl_heartbeat_pause();
1415+
}
1416+
14041417
size_t nthreads = jl_atomic_load_acquire(&jl_n_threads);
14051418
jl_ptls_t *allstates = jl_atomic_load_relaxed(&jl_all_tls_states);
14061419
int ctid = jl_threadid() + 1;
@@ -1459,6 +1472,10 @@ JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT
14591472
jl_safe_printf("thread (%d) ==== End thread %d\n", ctid, ptls2->tid + 1);
14601473
}
14611474
jl_safe_printf("thread (%d) ++++ Done\n", ctid);
1475+
1476+
if (!jl_inside_heartbeat_thread()) {
1477+
jl_heartbeat_resume();
1478+
}
14621479
}
14631480

14641481
#ifdef __cplusplus

0 commit comments

Comments
 (0)