Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions src/gc-debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -745,13 +745,6 @@ void gc_time_pool_end(int sweep_full)
sweep_full ? "full" : "quick");
}

void gc_time_sysimg_end(uint64_t t0)
{
double sweep_pool_sec = (jl_hrtime() - t0) / 1e9;
jl_safe_printf("GC sweep sysimg end %.2f ms\n",
sweep_pool_sec * 1000);
}

static int64_t big_total;
static int64_t big_freed;
static int64_t big_reset;
Expand Down
47 changes: 38 additions & 9 deletions src/gc-stock.c
Original file line number Diff line number Diff line change
Expand Up @@ -1483,13 +1483,6 @@ static void gc_sweep_pool(void) JL_NOTSAFEPOINT
gc_time_pool_end(current_sweep_full);
}

static void gc_sweep_perm_alloc(void) JL_NOTSAFEPOINT
{
uint64_t t0 = jl_hrtime();
gc_sweep_sysimg();
gc_time_sysimg_end(t0);
}

// mark phase

JL_DLLEXPORT void jl_gc_queue_root(const jl_value_t *ptr)
Expand All @@ -1504,6 +1497,18 @@ JL_DLLEXPORT void jl_gc_queue_root(const jl_value_t *ptr)
if (header & GC_OLD) { // write barrier has not been triggered in this object yet
arraylist_push(&ptls->gc_tls.heap.remset, (jl_value_t*)ptr);
ptls->gc_tls.heap.remset_nptr++; // conservative
// Permanently-marked image objects that are mutated need to be
// persistently tracked, since they would otherwise be skipped
// during the mark phase. The image_remset is append-only, so
// this object will be re-scanned every GC cycle hereafter.
// Deduplication via image_remset prevents unbounded growth
// from repeated mutations of the same image object.
if (__unlikely(o->bits.in_image)) {
JL_LOCK_NOGC(&image_remset_lock);
if (ptrhash_get(&image_remset, (void*)ptr) == HT_NOTFOUND)
ptrhash_put(&image_remset, (void*)ptr, (void*)ptr);
JL_UNLOCK_NOGC(&image_remset_lock);
}
}
}

Expand Down Expand Up @@ -2827,6 +2832,24 @@ static void gc_queue_remset(jl_gc_markqueue_t *mq, jl_ptls_t ptls2) JL_NOTSAFEPO
ptls2->gc_tls.heap.remset_nptr = 0;
}

// Queue image objects with cross-heap references for marking.
// These are persistent (never cleared) so that image objects that reference
// non-image objects are always re-scanned, even though the image objects
// themselves are permanently marked and would otherwise be skipped.
static void gc_queue_image_remset(jl_gc_markqueue_t *mq) JL_NOTSAFEPOINT
{
size_t sz = image_remset.size;
void **table = image_remset.table;
for (size_t i = 0; i < sz; i += 2) {
void *_v = table[i];
if (_v != HT_NOTFOUND && _v != NULL) {
jl_astaggedvalue(_v)->bits.gc = GC_OLD_MARKED;
jl_value_t *v = (jl_value_t *)((uintptr_t)_v | GC_REMSET_PTR_TAG);
gc_ptr_queue_push(mq, v);
}
}
}

static void gc_check_all_remsets_are_empty(void) JL_NOTSAFEPOINT
{
for (int i = 0; i < gc_n_threads; i++) {
Expand Down Expand Up @@ -3086,6 +3109,12 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) JL_NOTS
}
}
gc_check_all_remsets_are_empty();
// 1.4. queue image objects with cross-heap references.
// Only needed after a full sweep (which clears non-image objects'
// mark bits). After quick sweeps, old objects retain their marks,
// so children of image_remset entries survive without re-tracing.
if (prev_sweep_full)
gc_queue_image_remset(mq);

// 2. walk roots
gc_mark_roots(mq);
Expand Down Expand Up @@ -3213,8 +3242,6 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) JL_NOTS
gc_scrub();
gc_verify_tags();
gc_sweep_pool();
if (sweep_full)
gc_sweep_perm_alloc();
}

JL_PROBE_GC_SWEEP_END();
Expand Down Expand Up @@ -3737,6 +3764,8 @@ void jl_gc_init(void)
{
JL_MUTEX_INIT(&heapsnapshot_lock, "heapsnapshot_lock");
JL_MUTEX_INIT(&finalizers_lock, "finalizers_lock");
JL_MUTEX_INIT(&image_remset_lock, "image_remset_lock");
htable_new(&image_remset, 0);
uv_mutex_init(&page_profile_lock);
uv_mutex_init(&gc_perm_lock);
uv_mutex_init(&gc_pages_lock);
Expand Down
3 changes: 0 additions & 3 deletions src/gc-stock.h
Original file line number Diff line number Diff line change
Expand Up @@ -607,8 +607,6 @@ void gc_final_pause_end(int64_t t0, int64_t tend);
void gc_time_pool_start(void) JL_NOTSAFEPOINT;
void gc_time_count_page(int freedall, int pg_skpd) JL_NOTSAFEPOINT;
void gc_time_pool_end(int sweep_full) JL_NOTSAFEPOINT;
void gc_time_sysimg_end(uint64_t t0) JL_NOTSAFEPOINT;

void gc_time_big_start(void) JL_NOTSAFEPOINT;
void gc_time_count_big(int old_bits, int bits) JL_NOTSAFEPOINT;
void gc_time_big_end(void) JL_NOTSAFEPOINT;
Expand Down Expand Up @@ -641,7 +639,6 @@ STATIC_INLINE void gc_time_count_page(int freedall, int pg_skpd) JL_NOTSAFEPOINT
(void)pg_skpd;
}
#define gc_time_pool_end(sweep_full) (void)(sweep_full)
#define gc_time_sysimg_end(t0) (void)(t0)
#define gc_time_big_start()
STATIC_INLINE void gc_time_count_big(int old_bits, int bits) JL_NOTSAFEPOINT
{
Expand Down
21 changes: 12 additions & 9 deletions src/gc-wb-stock.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@ extern "C" {
STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT
{
// parent and ptr isa jl_value_t*
if (__unlikely(jl_astaggedvalue(parent)->bits.gc == 3 /* GC_OLD_MARKED */ && // parent is old and not in remset
(jl_astaggedvalue(ptr)->bits.gc & 1 /* GC_MARKED */) == 0)) // ptr is young
if (__unlikely(jl_astaggedvalue(parent)->bits.gc == 3 /* GC_OLD_MARKED */ &&
(jl_astaggedvalue(parent)->bits.in_image || // image parents are never fully traced
(jl_astaggedvalue(ptr)->bits.gc & 1 /* GC_MARKED */) == 0))) // ptr is young
jl_gc_queue_root((jl_value_t*)parent);
}

Expand All @@ -33,7 +34,7 @@ STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_
// ptr is an immutable object
if (__likely(jl_astaggedvalue(parent)->bits.gc != 3))
return; // parent is young or in remset
if (__likely(jl_astaggedvalue(ptr)->bits.gc == 3))
if (__likely(jl_astaggedvalue(ptr)->bits.gc == 3 && !jl_astaggedvalue(parent)->bits.in_image))
return; // ptr is old and not in remset (thus it does not point to young)
jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(ptr);
const jl_datatype_layout_t *ly = dt->layout;
Expand All @@ -48,13 +49,14 @@ STATIC_INLINE void jl_gc_wb_genericmemory_copy_boxed(const jl_value_t *dest_owne
if (__unlikely(jl_astaggedvalue(dest_owner)->bits.gc == 3 /* GC_OLD_MARKED */ )) {
jl_value_t *src_owner = jl_genericmemory_owner(src);
size_t done = 0;
if (jl_astaggedvalue(src_owner)->bits.gc != 3 /* GC_OLD_MARKED */) {
int in_image = jl_astaggedvalue(dest_owner)->bits.in_image;
if (in_image || jl_astaggedvalue(src_owner)->bits.gc != 3 /* GC_OLD_MARKED */) {
if (dest_p < src_p || dest_p > src_p + (*n)) {
for (; done < (*n); done++) { // copy forwards
void *val = jl_atomic_load_relaxed(src_p + done);
jl_atomic_store_release(dest_p + done, val);
// `val` is young or old-unmarked
if (val && !(jl_astaggedvalue(val)->bits.gc & 1 /* GC_MARKED */)) {
// `val` is young or old-unmarked (or dest is image and val is non-image)
if (val && (in_image || !(jl_astaggedvalue(val)->bits.gc & 1 /* GC_MARKED */))) {
jl_gc_queue_root(dest_owner);
break;
}
Expand All @@ -66,8 +68,8 @@ STATIC_INLINE void jl_gc_wb_genericmemory_copy_boxed(const jl_value_t *dest_owne
for (; done < (*n); done++) { // copy backwards
void *val = jl_atomic_load_relaxed(src_p + (*n) - done - 1);
jl_atomic_store_release(dest_p + (*n) - done - 1, val);
// `val` is young or old-unmarked
if (val && !(jl_astaggedvalue(val)->bits.gc & 1 /* GC_MARKED */)) {
// `val` is young or old-unmarked (or dest is image and val is non-image)
if (val && (in_image || !(jl_astaggedvalue(val)->bits.gc & 1 /* GC_MARKED */))) {
jl_gc_queue_root(dest_owner);
break;
}
Expand All @@ -84,7 +86,8 @@ STATIC_INLINE void jl_gc_wb_genericmemory_copy_ptr(const jl_value_t *owner, jl_g
if (__unlikely(jl_astaggedvalue(owner)->bits.gc == 3 /* GC_OLD_MARKED */)) {
jl_value_t *src_owner = jl_genericmemory_owner(src);
size_t elsz = dt->layout->size;
if (jl_astaggedvalue(src_owner)->bits.gc != 3 /* GC_OLD_MARKED */) {
if (jl_astaggedvalue(owner)->bits.in_image ||
jl_astaggedvalue(src_owner)->bits.gc != 3 /* GC_OLD_MARKED */) {
dt = (jl_datatype_t*)jl_tparam1(dt);
for (size_t done = 0; done < n; done++) { // copy forwards
char* s = (char*)src_p+done*elsz;
Expand Down
5 changes: 3 additions & 2 deletions src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -534,8 +534,9 @@ jl_value_t *jl_gc_small_alloc_noinline(jl_ptls_t ptls, int offset,
int osize);
jl_value_t *jl_gc_big_alloc_noinline(jl_ptls_t ptls, size_t allocsz);
JL_DLLEXPORT int jl_gc_classify_pools(size_t sz, int *osize) JL_NOTSAFEPOINT;
void gc_sweep_sysimg(void) JL_NOTSAFEPOINT;

void gc_scan_sysimg_remset(void) JL_NOTSAFEPOINT;
extern htable_t image_remset;
extern jl_mutex_t image_remset_lock;

// pools are 16376 bytes large (GC_POOL_SZ - GC_PAGE_OFFSET)
static const int jl_gc_sizeclasses[] = {
Expand Down
10 changes: 8 additions & 2 deletions src/llvm-final-gc-lowering-stock.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,16 @@ void FinalLowerGC::lowerWriteBarrier(CallInst *target, Function &F) {
auto parent = target->getArgOperand(0);
IRBuilder<> builder(target);
builder.SetCurrentDebugLocation(target->getDebugLoc());
auto parBits = builder.CreateAnd(EmitLoadTag(builder, T_size, parent, tbaa_tag), GC_OLD_MARKED, "parent_bits");
auto parTag = EmitLoadTag(builder, T_size, parent, tbaa_tag);
auto parBits = builder.CreateAnd(parTag, GC_OLD_MARKED, "parent_bits");
auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, GC_OLD_MARKED), "parent_old_marked");
auto mayTrigTerm = SplitBlockAndInsertIfThen(parOldMarked, target, false);
builder.SetInsertPoint(mayTrigTerm);
mayTrigTerm->getParent()->setName("may_trigger_wb");
// Image parents are never fully traced by the mark phase, so we must
// always trigger the write barrier regardless of the child's mark bits.
auto parInImage = builder.CreateAnd(parTag, ConstantInt::get(T_size, GC_IN_IMAGE), "parent_in_image");
auto parIsImage = builder.CreateICmpNE(parInImage, ConstantInt::get(T_size, 0), "parent_is_image");
Value *anyChldNotMarked = NULL;
for (unsigned i = 1; i < target->arg_size(); i++) {
Value *child = target->getArgOperand(i);
Expand All @@ -65,9 +70,10 @@ void FinalLowerGC::lowerWriteBarrier(CallInst *target, Function &F) {
anyChldNotMarked = anyChldNotMarked ? builder.CreateOr(anyChldNotMarked, chldNotMarked) : chldNotMarked;
}
assert(anyChldNotMarked); // handled by all_of test above
auto shouldTrigger = builder.CreateOr(parIsImage, anyChldNotMarked, "should_trigger_wb");
MDBuilder MDB(parent->getContext());
SmallVector<uint32_t, 2> Weights{1, 9};
auto trigTerm = SplitBlockAndInsertIfThen(anyChldNotMarked, mayTrigTerm, false,
auto trigTerm = SplitBlockAndInsertIfThen(shouldTrigger, mayTrigTerm, false,
MDB.createBranchWeights(Weights));
trigTerm->getParent()->setName("trigger_wb");
builder.SetInsertPoint(trigTerm);
Expand Down
Loading
Loading