Skip to content

Commit 38c5cb3

Browse files
kpamnanyRAI CI (GitHub Action Automation)
authored andcommitted
RAI: Add heartbeat capability
1 parent 430f89e commit 38c5cb3

File tree

9 files changed

+329
-1
lines changed

9 files changed

+329
-1
lines changed

src/gc-stock.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3399,6 +3399,9 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
33993399
return recollect;
34003400
}
34013401

3402+
extern int jl_heartbeat_pause(void);
3403+
extern int jl_heartbeat_resume(void);
3404+
34023405
JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
34033406
{
34043407
JL_PROBE_GC_BEGIN(collection);
@@ -3441,6 +3444,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
34413444
// existence of the thread in the jl_n_threads count.
34423445
//
34433446
// TODO: concurrently queue objects
3447+
jl_heartbeat_pause();
34443448
jl_fence();
34453449
gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
34463450
gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
@@ -3472,6 +3476,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
34723476

34733477
gc_n_threads = 0;
34743478
gc_all_tls_states = NULL;
3479+
jl_heartbeat_resume();
34753480
jl_safepoint_end_gc();
34763481
jl_gc_state_set(ptls, old_state, JL_GC_STATE_WAITING);
34773482
JL_PROBE_GC_END();

src/gc-stock.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -499,6 +499,9 @@ extern uv_sem_t gc_sweep_assists_needed;
499499
extern _Atomic(int) gc_n_threads_marking;
500500
extern _Atomic(int) gc_n_threads_sweeping_pools;
501501
extern _Atomic(int) n_threads_running;
502+
extern _Atomic(int) gc_n_threads_sweeping_stacks;
503+
extern _Atomic(int) gc_ptls_sweep_idx;
504+
extern _Atomic(int) gc_stack_free_idx;
502505
extern uv_barrier_t thread_init_done;
503506
void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq);
504507
void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t *fl_parent, jl_value_t **fl_begin, jl_value_t **fl_end) JL_NOTSAFEPOINT;

src/init.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -864,6 +864,8 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
864864
_finish_julia_init(rel, ptls, ct);
865865
}
866866

867+
void jl_init_heartbeat(void);
868+
867869
static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_task_t *ct)
868870
{
869871
JL_TIMING(JULIA_INIT, JULIA_INIT);
@@ -917,6 +919,11 @@ static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_
917919
jl_start_gc_threads();
918920
uv_barrier_wait(&thread_init_done);
919921

922+
if (jl_base_module != NULL) {
923+
// requires code in Base
924+
jl_init_heartbeat();
925+
}
926+
920927
jl_gc_enable(1);
921928

922929
if (jl_options.image_file && (!jl_generating_output() || jl_options.incremental) && jl_module_init_order) {

src/julia_internal.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,7 @@ extern volatile size_t profile_bt_size_max;
224224
extern volatile size_t profile_bt_size_cur;
225225
extern volatile int profile_running;
226226
extern volatile int profile_all_tasks;
227+
extern int heartbeat_tid; // Mostly used to ensure we skip this thread in the CPU profiler. XXX: not implemented on Windows
227228
// Ensures that we can safely read the `live_tasks`field of every TLS when profiling.
228229
// We want to avoid the case that a GC gets interleaved with `jl_profile_task` and shrinks
229230
// the `live_tasks` array while we are reading it or frees tasks that are being profiled.
@@ -239,6 +240,7 @@ extern uv_mutex_t bt_data_prof_lock;
239240
#define PROFILE_STATE_THREAD_NOT_SLEEPING (1)
240241
#define PROFILE_STATE_THREAD_SLEEPING (2)
241242
#define PROFILE_STATE_WALL_TIME_PROFILING (3)
243+
extern _Atomic(int) n_threads_running;
242244
void jl_profile_task(void);
243245

244246
// number of cycles since power-on

src/options.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,9 @@
144144
#define MACHINE_EXCLUSIVE_NAME "JULIA_EXCLUSIVE"
145145
#define DEFAULT_MACHINE_EXCLUSIVE 0
146146

147+
// heartbeats
148+
#define JL_HEARTBEAT_THREAD
149+
147150
// sanitizer defaults ---------------------------------------------------------
148151

149152
// Automatically enable MEMDEBUG and KEEP_BODIES for the sanitizers

src/signals-mach.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -826,6 +826,10 @@ void *mach_profile_listener(void *arg)
826826
for (int idx = nthreads; idx-- > 0; ) {
827827
// Stop the threads in random order.
828828
int i = randperm[idx];
829+
// skip heartbeat thread
830+
if (i == heartbeat_tid) {
831+
continue;
832+
}
829833
jl_profile_thread_mach(i);
830834
}
831835
}

src/signals-unix.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -852,6 +852,10 @@ static void do_profile(void *ctx)
852852
for (int idx = nthreads; idx-- > 0; ) {
853853
// Stop the threads in the random order.
854854
int tid = randperm[idx];
855+
// skip heartbeat thread
856+
if (tid == heartbeat_tid) {
857+
return;
858+
}
855859
// do backtrace for profiler
856860
if (!profile_running)
857861
return;

src/stackwalk.c

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1398,9 +1398,22 @@ JL_DLLEXPORT void jl_print_backtrace(void) JL_NOTSAFEPOINT
13981398
jlbacktrace();
13991399
}
14001400

1401-
// Print backtraces for all live tasks, for all threads, to jl_safe_printf stderr
1401+
extern int jl_inside_heartbeat_thread(void);
1402+
extern int jl_heartbeat_pause(void);
1403+
extern int jl_heartbeat_resume(void);
1404+
1405+
// Print backtraces for all live tasks, for all threads, to jl_safe_printf
1406+
// stderr. This can take a _long_ time!
14021407
JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT
14031408
{
1409+
// disable heartbeats to prevent heartbeat loss while running this,
1410+
// unless this is called from the heartbeat thread itself; in that
1411+
// situation, the thread is busy running this and it will not be
1412+
// updating the missed heartbeats counter
1413+
if (!jl_inside_heartbeat_thread()) {
1414+
jl_heartbeat_pause();
1415+
}
1416+
14041417
size_t nthreads = jl_atomic_load_acquire(&jl_n_threads);
14051418
jl_ptls_t *allstates = jl_atomic_load_relaxed(&jl_all_tls_states);
14061419
int ctid = jl_threadid() + 1;
@@ -1459,6 +1472,10 @@ JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT
14591472
jl_safe_printf("thread (%d) ==== End thread %d\n", ctid, ptls2->tid + 1);
14601473
}
14611474
jl_safe_printf("thread (%d) ++++ Done\n", ctid);
1475+
1476+
if (!jl_inside_heartbeat_thread()) {
1477+
jl_heartbeat_resume();
1478+
}
14621479
}
14631480

14641481
#ifdef __cplusplus

0 commit comments

Comments
 (0)