Skip to content

Commit faf17eb

Browse files
authored
create GC TLS (#55086)
Encapsulates all relevant GC thread-local-state into a separate structure. Motivation is that MMTk will have its own version of GC thread-local-state, so doesn't need all of the Julia GC TLS. In the future, folks who would be using MMTk would be setting a pre-processor flag which would lead to either the stock Julia GC TLS or MMTk's GC TLS to be included in `julia_threads.h`. I.e., we would have something like: ```C #ifdef MMTK_GC jl_gc_mmtk_tls_states mmtk_gc_tls; #else jl_gc_tls_states gc_tls; #endif ```
1 parent 3ab8fef commit faf17eb

File tree

10 files changed

+301
-277
lines changed

10 files changed

+301
-277
lines changed

src/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ ifeq ($(USE_SYSTEM_LIBUV),0)
103103
UV_HEADERS += uv.h
104104
UV_HEADERS += uv/*.h
105105
endif
106-
PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,work-stealing-queue.h julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h)
106+
PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,work-stealing-queue.h gc-tls.h julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h)
107107
ifeq ($(OS),WINNT)
108108
PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,win32_ucontext.h)
109109
endif

src/array.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,7 @@ JL_DLLEXPORT jl_value_t *jl_alloc_string(size_t len)
307307
const size_t allocsz = sz + sizeof(jl_taggedvalue_t);
308308
if (sz <= GC_MAX_SZCLASS) {
309309
int pool_id = jl_gc_szclass_align8(allocsz);
310-
jl_gc_pool_t *p = &ptls->heap.norm_pools[pool_id];
310+
jl_gc_pool_t *p = &ptls->gc_tls.heap.norm_pools[pool_id];
311311
int osize = jl_gc_sizeclasses[pool_id];
312312
// We call `jl_gc_pool_alloc_noinline` instead of `jl_gc_pool_alloc` to avoid double-counting in
313313
// the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.)

src/gc-debug.c

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ static arraylist_t bits_save[4];
9999
static void gc_clear_mark_page(jl_gc_pagemeta_t *pg, int bits)
100100
{
101101
jl_ptls_t ptls2 = gc_all_tls_states[pg->thread_n];
102-
jl_gc_pool_t *pool = &ptls2->heap.norm_pools[pg->pool_n];
102+
jl_gc_pool_t *pool = &ptls2->gc_tls.heap.norm_pools[pg->pool_n];
103103
jl_taggedvalue_t *pv = (jl_taggedvalue_t*)(pg->data + GC_PAGE_OFFSET);
104104
char *lim = (char*)pv + GC_PAGE_SZ - GC_PAGE_OFFSET - pool->osize;
105105
while ((char*)pv <= lim) {
@@ -114,7 +114,7 @@ static void gc_clear_mark_outer(int bits)
114114
{
115115
for (int i = 0; i < gc_n_threads; i++) {
116116
jl_ptls_t ptls2 = gc_all_tls_states[i];
117-
jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->page_metadata_allocd.bottom);
117+
jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->gc_tls.page_metadata_allocd.bottom);
118118
while (pg != NULL) {
119119
gc_clear_mark_page(pg, bits);
120120
pg = pg->next;
@@ -134,7 +134,7 @@ static void clear_mark(int bits)
134134
}
135135
bigval_t *v;
136136
for (int i = 0; i < gc_n_threads; i++) {
137-
v = gc_all_tls_states[i]->heap.big_objects;
137+
v = gc_all_tls_states[i]->gc_tls.heap.big_objects;
138138
while (v != NULL) {
139139
void *gcv = &v->header;
140140
if (!gc_verifying)
@@ -172,7 +172,7 @@ static void gc_verify_track(jl_ptls_t ptls)
172172
return;
173173
do {
174174
jl_gc_markqueue_t mq;
175-
jl_gc_markqueue_t *mq2 = &ptls->mark_queue;
175+
jl_gc_markqueue_t *mq2 = &ptls->gc_tls.mark_queue;
176176
ws_queue_t *cq = &mq.chunk_queue;
177177
ws_queue_t *q = &mq.ptr_queue;
178178
jl_atomic_store_relaxed(&cq->top, 0);
@@ -232,7 +232,7 @@ void gc_verify(jl_ptls_t ptls)
232232
return;
233233
}
234234
jl_gc_markqueue_t mq;
235-
jl_gc_markqueue_t *mq2 = &ptls->mark_queue;
235+
jl_gc_markqueue_t *mq2 = &ptls->gc_tls.mark_queue;
236236
ws_queue_t *cq = &mq.chunk_queue;
237237
ws_queue_t *q = &mq.ptr_queue;
238238
jl_atomic_store_relaxed(&cq->top, 0);
@@ -291,7 +291,7 @@ static void gc_verify_tags_page(jl_gc_pagemeta_t *pg)
291291
int p_n = pg->pool_n;
292292
int t_n = pg->thread_n;
293293
jl_ptls_t ptls2 = gc_all_tls_states[t_n];
294-
jl_gc_pool_t *p = &ptls2->heap.norm_pools[p_n];
294+
jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[p_n];
295295
int osize = pg->osize;
296296
char *data = pg->data;
297297
char *page_begin = data + GC_PAGE_OFFSET;
@@ -353,7 +353,7 @@ static void gc_verify_tags_pagestack(void)
353353
{
354354
for (int i = 0; i < gc_n_threads; i++) {
355355
jl_ptls_t ptls2 = gc_all_tls_states[i];
356-
jl_gc_page_stack_t *pgstk = &ptls2->page_metadata_allocd;
356+
jl_gc_page_stack_t *pgstk = &ptls2->gc_tls.page_metadata_allocd;
357357
jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&pgstk->bottom);
358358
while (pg != NULL) {
359359
gc_verify_tags_page(pg);
@@ -369,7 +369,7 @@ void gc_verify_tags(void)
369369
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
370370
for (int i = 0; i < JL_GC_N_POOLS; i++) {
371371
// for all pools, iterate its freelist
372-
jl_gc_pool_t *p = &ptls2->heap.norm_pools[i];
372+
jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[i];
373373
jl_taggedvalue_t *next = p->freelist;
374374
jl_taggedvalue_t *last = NULL;
375375
char *allocating = gc_page_data(next);
@@ -811,8 +811,8 @@ void gc_time_mark_pause(int64_t t0, int64_t scanned_bytes,
811811
int64_t remset_nptr = 0;
812812
for (int t_i = 0; t_i < gc_n_threads; t_i++) {
813813
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
814-
last_remset_len += ptls2->heap.last_remset->len;
815-
remset_nptr = ptls2->heap.remset_nptr;
814+
last_remset_len += ptls2->gc_tls.heap.last_remset->len;
815+
remset_nptr = ptls2->gc_tls.heap.remset_nptr;
816816
}
817817
jl_safe_printf("GC mark pause %.2f ms | "
818818
"scanned %" PRId64 " kB = %" PRId64 " + %" PRId64 " | "
@@ -967,13 +967,13 @@ void gc_stats_all_pool(void)
967967
for (int i = 0; i < JL_GC_N_POOLS; i++) {
968968
for (int t_i = 0; t_i < gc_n_threads; t_i++) {
969969
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
970-
size_t b = pool_stats(&ptls2->heap.norm_pools[i], &w, &np, &nol);
970+
size_t b = pool_stats(&ptls2->gc_tls.heap.norm_pools[i], &w, &np, &nol);
971971
nb += b;
972-
no += (b / ptls2->heap.norm_pools[i].osize);
972+
no += (b / ptls2->gc_tls.heap.norm_pools[i].osize);
973973
tw += w;
974974
tp += np;
975975
nold += nol;
976-
noldbytes += nol * ptls2->heap.norm_pools[i].osize;
976+
noldbytes += nol * ptls2->gc_tls.heap.norm_pools[i].osize;
977977
}
978978
}
979979
jl_safe_printf("%lld objects (%lld%% old), %lld kB (%lld%% old) total allocated, "
@@ -992,7 +992,7 @@ void gc_stats_big_obj(void)
992992
size_t nused=0, nbytes=0, nused_old=0, nbytes_old=0;
993993
for (int t_i = 0; t_i < gc_n_threads; t_i++) {
994994
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
995-
bigval_t *v = ptls2->heap.big_objects;
995+
bigval_t *v = ptls2->gc_tls.heap.big_objects;
996996
while (v != NULL) {
997997
if (gc_marked(v->bits.gc)) {
998998
nused++;
@@ -1009,7 +1009,7 @@ void gc_stats_big_obj(void)
10091009
v = v->next;
10101010
}
10111011

1012-
mallocarray_t *ma = ptls2->heap.mallocarrays;
1012+
mallocarray_t *ma = ptls2->gc_tls.heap.mallocarrays;
10131013
while (ma != NULL) {
10141014
if (gc_marked(jl_astaggedvalue(ma->a)->bits.gc)) {
10151015
nused++;
@@ -1055,7 +1055,7 @@ static void gc_count_pool_pagetable(void)
10551055
{
10561056
for (int i = 0; i < gc_n_threads; i++) {
10571057
jl_ptls_t ptls2 = gc_all_tls_states[i];
1058-
jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->page_metadata_allocd.bottom);
1058+
jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->gc_tls.page_metadata_allocd.bottom);
10591059
while (pg != NULL) {
10601060
if (gc_alloc_map_is_set(pg->data)) {
10611061
gc_count_pool_page(pg);

src/gc-stacks.c

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ static void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz) JL_NOTSAF
167167
if (bufsz <= pool_sizes[JL_N_STACK_POOLS - 1]) {
168168
unsigned pool_id = select_pool(bufsz);
169169
if (pool_sizes[pool_id] == bufsz) {
170-
small_arraylist_push(&ptls->heap.free_stacks[pool_id], stkbuf);
170+
small_arraylist_push(&ptls->gc_tls.heap.free_stacks[pool_id], stkbuf);
171171
return;
172172
}
173173
}
@@ -196,7 +196,7 @@ void jl_release_task_stack(jl_ptls_t ptls, jl_task_t *task)
196196
#ifdef _COMPILER_ASAN_ENABLED_
197197
__asan_unpoison_stack_memory((uintptr_t)stkbuf, bufsz);
198198
#endif
199-
small_arraylist_push(&ptls->heap.free_stacks[pool_id], stkbuf);
199+
small_arraylist_push(&ptls->gc_tls.heap.free_stacks[pool_id], stkbuf);
200200
}
201201
}
202202
}
@@ -211,7 +211,7 @@ JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner) JL_NOTSAFEPO
211211
if (ssize <= pool_sizes[JL_N_STACK_POOLS - 1]) {
212212
unsigned pool_id = select_pool(ssize);
213213
ssize = pool_sizes[pool_id];
214-
small_arraylist_t *pool = &ptls->heap.free_stacks[pool_id];
214+
small_arraylist_t *pool = &ptls->gc_tls.heap.free_stacks[pool_id];
215215
if (pool->len > 0) {
216216
stk = small_arraylist_pop(pool);
217217
}
@@ -232,7 +232,7 @@ JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner) JL_NOTSAFEPO
232232
}
233233
*bufsz = ssize;
234234
if (owner) {
235-
small_arraylist_t *live_tasks = &ptls->heap.live_tasks;
235+
small_arraylist_t *live_tasks = &ptls->gc_tls.heap.live_tasks;
236236
mtarraylist_push(live_tasks, owner);
237237
}
238238
return stk;
@@ -259,7 +259,7 @@ void sweep_stack_pools(void) JL_NOTSAFEPOINT
259259

260260
// free half of stacks that remain unused since last sweep
261261
for (int p = 0; p < JL_N_STACK_POOLS; p++) {
262-
small_arraylist_t *al = &ptls2->heap.free_stacks[p];
262+
small_arraylist_t *al = &ptls2->gc_tls.heap.free_stacks[p];
263263
size_t n_to_free;
264264
if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) {
265265
n_to_free = al->len; // not alive yet or dead, so it does not need these anymore
@@ -281,10 +281,10 @@ void sweep_stack_pools(void) JL_NOTSAFEPOINT
281281
}
282282
}
283283
if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) {
284-
small_arraylist_free(ptls2->heap.free_stacks);
284+
small_arraylist_free(ptls2->gc_tls.heap.free_stacks);
285285
}
286286

287-
small_arraylist_t *live_tasks = &ptls2->heap.live_tasks;
287+
small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks;
288288
size_t n = 0;
289289
size_t ndel = 0;
290290
size_t l = live_tasks->len;
@@ -339,7 +339,7 @@ JL_DLLEXPORT jl_array_t *jl_live_tasks(void)
339339
jl_ptls_t ptls2 = allstates[i];
340340
if (ptls2 == NULL)
341341
continue;
342-
small_arraylist_t *live_tasks = &ptls2->heap.live_tasks;
342+
small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks;
343343
size_t n = mtarraylist_length(live_tasks);
344344
l += n + (ptls2->root_task->stkbuf != NULL);
345345
}
@@ -362,7 +362,7 @@ JL_DLLEXPORT jl_array_t *jl_live_tasks(void)
362362
goto restart;
363363
jl_array_data(a,void*)[j++] = t;
364364
}
365-
small_arraylist_t *live_tasks = &ptls2->heap.live_tasks;
365+
small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks;
366366
size_t n = mtarraylist_length(live_tasks);
367367
for (size_t i = 0; i < n; i++) {
368368
jl_task_t *t = (jl_task_t*)mtarraylist_get(live_tasks, i);

src/gc-tls.h

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
// This file is a part of Julia. License is MIT: https://julialang.org/license
2+
3+
// Meant to be included in "julia_threads.h"
4+
#ifndef JL_GC_TLS_H
5+
#define JL_GC_TLS_H
6+
7+
#include "julia_atomics.h"
8+
#include "work-stealing-queue.h"
9+
// GC threading ------------------------------------------------------------------
10+
11+
#include "arraylist.h"
12+
13+
#ifdef __cplusplus
14+
extern "C" {
15+
#endif
16+
17+
typedef struct {
18+
struct _jl_taggedvalue_t *freelist; // root of list of free objects
19+
struct _jl_taggedvalue_t *newpages; // root of list of chunks of free objects
20+
uint16_t osize; // size of objects in this pool
21+
} jl_gc_pool_t;
22+
23+
typedef struct {
24+
// variable for tracking weak references
25+
small_arraylist_t weak_refs;
26+
// live tasks started on this thread
27+
// that are holding onto a stack from the pool
28+
small_arraylist_t live_tasks;
29+
30+
// variables for tracking malloc'd arrays
31+
struct _mallocarray_t *mallocarrays;
32+
struct _mallocarray_t *mafreelist;
33+
34+
// variables for tracking big objects
35+
struct _bigval_t *big_objects;
36+
37+
// lower bound of the number of pointers inside remembered values
38+
int remset_nptr;
39+
// remembered set
40+
arraylist_t remset;
41+
42+
// variables for allocating objects from pools
43+
#define JL_GC_N_MAX_POOLS 51 // conservative. must be kept in sync with `src/julia_internal.h`
44+
jl_gc_pool_t norm_pools[JL_GC_N_MAX_POOLS];
45+
46+
#define JL_N_STACK_POOLS 16
47+
small_arraylist_t free_stacks[JL_N_STACK_POOLS];
48+
} jl_thread_heap_t;
49+
50+
typedef struct {
51+
_Atomic(int64_t) allocd;
52+
_Atomic(int64_t) pool_live_bytes;
53+
_Atomic(uint64_t) malloc;
54+
_Atomic(uint64_t) realloc;
55+
_Atomic(uint64_t) poolalloc;
56+
_Atomic(uint64_t) bigalloc;
57+
_Atomic(int64_t) free_acc;
58+
_Atomic(uint64_t) alloc_acc;
59+
} jl_thread_gc_num_t;
60+
61+
typedef struct {
62+
ws_queue_t chunk_queue;
63+
ws_queue_t ptr_queue;
64+
arraylist_t reclaim_set;
65+
} jl_gc_markqueue_t;
66+
67+
typedef struct {
68+
// thread local increment of `perm_scanned_bytes`
69+
size_t perm_scanned_bytes;
70+
// thread local increment of `scanned_bytes`
71+
size_t scanned_bytes;
72+
// Number of queued big objects (<= 1024)
73+
size_t nbig_obj;
74+
// Array of queued big objects to be moved between the young list
75+
// and the old list.
76+
// A set low bit means that the object should be moved from the old list
77+
// to the young list (`mark_reset_age`).
78+
// Objects can only be put into this list when the mark bit is flipped to
79+
// `1` (atomically). Combining with the sync after marking,
80+
// this makes sure that a single objects can only appear once in
81+
// the lists (the mark bit cannot be flipped to `0` without sweeping)
82+
void *big_obj[1024];
83+
} jl_gc_mark_cache_t;
84+
85+
typedef struct {
86+
_Atomic(struct _jl_gc_pagemeta_t *) bottom;
87+
} jl_gc_page_stack_t;
88+
89+
typedef struct {
90+
jl_thread_heap_t heap;
91+
jl_gc_page_stack_t page_metadata_allocd;
92+
jl_thread_gc_num_t gc_num;
93+
jl_gc_markqueue_t mark_queue;
94+
jl_gc_mark_cache_t gc_cache;
95+
_Atomic(size_t) gc_sweeps_requested;
96+
arraylist_t sweep_objs;
97+
} jl_gc_tls_states_t;
98+
99+
#ifdef __cplusplus
100+
}
101+
#endif
102+
103+
#endif // JL_GC_TLS_H

0 commit comments

Comments
 (0)