Skip to content

Commit f65e6ce

Browse files
kpamnanyRAI CI (GitHub Action Automation)
authored andcommitted
RAI: Add heartbeat capability
1 parent da68c78 commit f65e6ce

File tree

9 files changed

+329
-1
lines changed

9 files changed

+329
-1
lines changed

src/gc-stock.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3397,6 +3397,9 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
33973397
return recollect;
33983398
}
33993399

3400+
extern int jl_heartbeat_pause(void);
3401+
extern int jl_heartbeat_resume(void);
3402+
34003403
JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
34013404
{
34023405
JL_PROBE_GC_BEGIN(collection);
@@ -3439,6 +3442,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
34393442
// existence of the thread in the jl_n_threads count.
34403443
//
34413444
// TODO: concurrently queue objects
3445+
jl_heartbeat_pause();
34423446
jl_fence();
34433447
gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
34443448
gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
@@ -3470,6 +3474,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
34703474

34713475
gc_n_threads = 0;
34723476
gc_all_tls_states = NULL;
3477+
jl_heartbeat_resume();
34733478
jl_safepoint_end_gc();
34743479
jl_gc_state_set(ptls, old_state, JL_GC_STATE_WAITING);
34753480
JL_PROBE_GC_END();

src/gc-stock.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -499,6 +499,9 @@ extern uv_sem_t gc_sweep_assists_needed;
499499
extern _Atomic(int) gc_n_threads_marking;
500500
extern _Atomic(int) gc_n_threads_sweeping_pools;
501501
extern _Atomic(int) n_threads_running;
502+
extern _Atomic(int) gc_n_threads_sweeping_stacks;
503+
extern _Atomic(int) gc_ptls_sweep_idx;
504+
extern _Atomic(int) gc_stack_free_idx;
502505
extern uv_barrier_t thread_init_done;
503506
void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq);
504507
void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t *fl_parent, jl_value_t **fl_begin, jl_value_t **fl_end) JL_NOTSAFEPOINT;

src/init.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -553,6 +553,8 @@ extern jl_mutex_t newly_inferred_mutex;
553553
extern jl_mutex_t global_roots_lock;
554554
extern jl_mutex_t profile_show_peek_cond_lock;
555555

556+
extern void jl_init_heartbeat(void);
557+
556558
static void restore_fp_env(void)
557559
{
558560
if (jl_set_zero_subnormals(0) || jl_set_default_nans(0)) {
@@ -612,6 +614,11 @@ static NOINLINE void _finish_jl_init_(jl_image_buf_t sysimage, jl_ptls_t ptls, j
612614
jl_start_gc_threads();
613615
uv_barrier_wait(&thread_init_done);
614616

617+
if (jl_base_module != NULL) {
618+
// requires code in Base
619+
jl_init_heartbeat();
620+
}
621+
615622
jl_gc_enable(1);
616623

617624
if ((sysimage.kind != JL_IMAGE_KIND_NONE) &&

src/julia_internal.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,7 @@ extern volatile size_t profile_bt_size_max;
230230
extern volatile size_t profile_bt_size_cur;
231231
extern volatile int profile_running;
232232
extern volatile int profile_all_tasks;
233+
extern int heartbeat_tid; // Mostly used to ensure we skip this thread in the CPU profiler. XXX: not implemented on Windows
233234
// Ensures that we can safely read the `live_tasks`field of every TLS when profiling.
234235
// We want to avoid the case that a GC gets interleaved with `jl_profile_task` and shrinks
235236
// the `live_tasks` array while we are reading it or frees tasks that are being profiled.
@@ -245,6 +246,7 @@ extern uv_mutex_t bt_data_prof_lock;
245246
#define PROFILE_STATE_THREAD_NOT_SLEEPING (1)
246247
#define PROFILE_STATE_THREAD_SLEEPING (2)
247248
#define PROFILE_STATE_WALL_TIME_PROFILING (3)
249+
extern _Atomic(int) n_threads_running;
248250
void jl_profile_task(void);
249251

250252
// number of cycles since power-on

src/options.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,9 @@
144144
#define MACHINE_EXCLUSIVE_NAME "JULIA_EXCLUSIVE"
145145
#define DEFAULT_MACHINE_EXCLUSIVE 0
146146

147+
// heartbeats
148+
#define JL_HEARTBEAT_THREAD
149+
147150
// sanitizer defaults ---------------------------------------------------------
148151

149152
// Automatically enable MEMDEBUG and KEEP_BODIES for the sanitizers

src/signals-mach.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -824,6 +824,10 @@ void *mach_profile_listener(void *arg)
824824
for (int idx = nthreads; idx-- > 0; ) {
825825
// Stop the threads in random order.
826826
int i = randperm[idx];
827+
// skip heartbeat thread
828+
if (i == heartbeat_tid) {
829+
continue;
830+
}
827831
jl_profile_thread_mach(i);
828832
}
829833
}

src/signals-unix.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -873,6 +873,10 @@ static void do_profile(void *ctx)
873873
for (int idx = nthreads; idx-- > 0; ) {
874874
// Stop the threads in the random order.
875875
int tid = randperm[idx];
876+
// skip heartbeat thread
877+
if (tid == heartbeat_tid) {
878+
return;
879+
}
876880
// do backtrace for profiler
877881
if (!profile_running)
878882
return;

src/stackwalk.c

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1410,9 +1410,22 @@ JL_DLLEXPORT void jl_print_backtrace(void) JL_NOTSAFEPOINT
14101410
jlbacktrace();
14111411
}
14121412

1413-
// Print backtraces for all live tasks, for all threads, to jl_safe_printf stderr
1413+
extern int jl_inside_heartbeat_thread(void);
1414+
extern int jl_heartbeat_pause(void);
1415+
extern int jl_heartbeat_resume(void);
1416+
1417+
// Print backtraces for all live tasks, for all threads, to jl_safe_printf
1418+
// stderr. This can take a _long_ time!
14141419
JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT
14151420
{
1421+
// disable heartbeats to prevent heartbeat loss while running this,
1422+
// unless this is called from the heartbeat thread itself; in that
1423+
// situation, the thread is busy running this and it will not be
1424+
// updating the missed heartbeats counter
1425+
if (!jl_inside_heartbeat_thread()) {
1426+
jl_heartbeat_pause();
1427+
}
1428+
14161429
size_t nthreads = jl_atomic_load_acquire(&jl_n_threads);
14171430
jl_ptls_t *allstates = jl_atomic_load_relaxed(&jl_all_tls_states);
14181431
int ctid = jl_threadid() + 1;
@@ -1471,6 +1484,10 @@ JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT
14711484
jl_safe_printf("thread (%d) ==== End thread %d\n", ctid, ptls2->tid + 1);
14721485
}
14731486
jl_safe_printf("thread (%d) ++++ Done\n", ctid);
1487+
1488+
if (!jl_inside_heartbeat_thread()) {
1489+
jl_heartbeat_resume();
1490+
}
14741491
}
14751492

14761493
#ifdef __cplusplus

0 commit comments

Comments
 (0)