Skip to content

Commit b64c1e4

Browse files
committed
Final changes to support the binding
1 parent f9f38df commit b64c1e4

File tree

6 files changed

+41
-45
lines changed

6 files changed

+41
-45
lines changed

src/gc-common.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,20 @@ void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, i
477477
ptls->gc_tls.heap.mallocarrays = ma;
478478
}
479479

480+
JL_DLLEXPORT int64_t jl_gc_pool_live_bytes(void)
481+
{
482+
int n_threads = jl_atomic_load_acquire(&jl_n_threads);
483+
jl_ptls_t *all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
484+
int64_t pool_live_bytes = 0;
485+
for (int i = 0; i < n_threads; i++) {
486+
jl_ptls_t ptls2 = all_tls_states[i];
487+
if (ptls2 != NULL) {
488+
pool_live_bytes += jl_atomic_load_relaxed(&ptls2->gc_tls.gc_num.pool_live_bytes);
489+
}
490+
}
491+
return pool_live_bytes;
492+
}
493+
480494
void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT
481495
{
482496
jl_ptls_t ptls = jl_current_task->ptls;

src/gc.c

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2789,20 +2789,6 @@ static void sweep_finalizer_list(arraylist_t *list)
27892789
list->len = j;
27902790
}
27912791

2792-
JL_DLLEXPORT int64_t jl_gc_pool_live_bytes(void)
2793-
{
2794-
int n_threads = jl_atomic_load_acquire(&jl_n_threads);
2795-
jl_ptls_t *all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
2796-
int64_t pool_live_bytes = 0;
2797-
for (int i = 0; i < n_threads; i++) {
2798-
jl_ptls_t ptls2 = all_tls_states[i];
2799-
if (ptls2 != NULL) {
2800-
pool_live_bytes += jl_atomic_load_relaxed(&ptls2->gc_tls.gc_num.pool_live_bytes);
2801-
}
2802-
}
2803-
return pool_live_bytes;
2804-
}
2805-
28062792
uint64_t jl_gc_smooth(uint64_t old_val, uint64_t new_val, double factor)
28072793
{
28082794
double est = factor * old_val + (1 - factor) * new_val;

src/gc.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@ extern jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset, int o
4747
extern void jl_rng_split(uint64_t to[JL_RNG_SIZE], uint64_t from[JL_RNG_SIZE]);
4848
extern void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t oldsz,
4949
int isaligned, jl_value_t *owner, int8_t can_collect);
50-
extern size_t jl_array_nbytes(jl_array_t *a);
5150
extern void run_finalizers(jl_task_t *ct, int finalizers_thread);
5251

5352
#define malloc_cache_align(sz) jl_malloc_aligned(sz, JL_CACHE_BYTE_ALIGNMENT)

src/genericmemory.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ jl_genericmemory_t *_new_genericmemory_(jl_value_t *mtype, size_t nel, int8_t is
5454
tot = sizeof(jl_genericmemory_t) + sizeof(void*);
5555
}
5656
m = (jl_genericmemory_t*)jl_gc_alloc(ct->ptls, tot, mtype);
57+
5758
if (pooled) {
5859
data = (char*)m + JL_SMALL_BYTE_ALIGNMENT;
5960
}

src/julia.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -646,11 +646,12 @@ typedef struct _jl_binding_t {
646646
_Atomic(struct _jl_binding_t*) owner; // for individual imported bindings (NULL until 'resolved')
647647
_Atomic(jl_value_t*) ty; // binding type
648648
uint8_t constp:1;
649-
uint8_t exportp:1;
649+
uint8_t exportp:1; // `public foo` sets `publicp`, `export foo` sets both `publicp` and `exportp`
650+
uint8_t publicp:1; // exportp without publicp is not allowed.
650651
uint8_t imported:1;
651652
uint8_t usingfailed:1;
652653
uint8_t deprecated:2; // 0=not deprecated, 1=renamed, 2=moved to another package
653-
uint8_t padding:2;
654+
uint8_t padding:1;
654655
} jl_binding_t;
655656

656657
typedef struct {
@@ -809,7 +810,7 @@ static inline jl_value_t *jl_to_typeof(uintptr_t t)
809810
return (jl_value_t*)t;
810811
}
811812
#else
812-
extern JL_HIDDEN jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)];
813+
extern JL_DLLEXPORT jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)];
813814
static inline jl_value_t *jl_to_typeof(uintptr_t t)
814815
{
815816
if (t < (jl_max_tags << 4))

src/llvm-final-gc-lowering.cpp

Lines changed: 22 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,6 @@ void FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F)
103103
builder.CreateMemSet(gcframe, Constant::getNullValue(Type::getInt8Ty(F.getContext())), ptrsize * (nRoots + 2), Align(16), tbaa_gcframe);
104104

105105
target->replaceAllUsesWith(gcframe);
106-
target->eraseFromParent();
107106
}
108107

109108
void FinalLowerGC::lowerPushGCFrame(CallInst *target, Function &F)
@@ -131,7 +130,6 @@ void FinalLowerGC::lowerPushGCFrame(CallInst *target, Function &F)
131130
gcframe,
132131
pgcstack,
133132
Align(sizeof(void*)));
134-
target->eraseFromParent();
135133
}
136134

137135
void FinalLowerGC::lowerPopGCFrame(CallInst *target, Function &F)
@@ -150,7 +148,6 @@ void FinalLowerGC::lowerPopGCFrame(CallInst *target, Function &F)
150148
pgcstack,
151149
Align(sizeof(void*)));
152150
inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
153-
target->eraseFromParent();
154151
}
155152

156153
void FinalLowerGC::lowerGetGCFrameSlot(CallInst *target, Function &F)
@@ -170,7 +167,6 @@ void FinalLowerGC::lowerGetGCFrameSlot(CallInst *target, Function &F)
170167
auto gep = builder.CreateInBoundsGEP(T_prjlvalue, gcframe, index);
171168
gep->takeName(target);
172169
target->replaceAllUsesWith(gep);
173-
target->eraseFromParent();
174170
}
175171

176172
void FinalLowerGC::lowerQueueGCRoot(CallInst *target, Function &F)
@@ -187,7 +183,6 @@ void FinalLowerGC::lowerSafepoint(CallInst *target, Function &F)
187183
IRBuilder<> builder(target);
188184
Value* signal_page = target->getOperand(0);
189185
builder.CreateLoad(T_size, signal_page, true);
190-
target->eraseFromParent();
191186
}
192187

193188
#ifdef MMTK_GC
@@ -252,7 +247,7 @@ void FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
252247

253248
// Should we generate fastpath allocation sequence here? We should always generate fastpath here for MMTk.
254249
// Setting this to false will increase allocation overhead a lot, and should only be used for debugging.
255-
const bool INLINE_FASTPATH_ALLOCATION = true;
250+
const bool INLINE_FASTPATH_ALLOCATION = false;
256251

257252
if (INLINE_FASTPATH_ALLOCATION) {
258253
// Assuming we use the first immix allocator.
@@ -307,12 +302,12 @@ void FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
307302
builder.CreateStore(new_cursor, cursor_ptr);
308303

309304
// ptls->gc_num.allocd += osize;
310-
auto pool_alloc_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, gc_tls) + offsetof(jl_gc_tls_states_t, gc_num));
311-
auto pool_alloc_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, pool_alloc_pos);
312-
auto pool_alloc_tls = builder.CreateBitCast(pool_alloc_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "pool_alloc");
313-
auto pool_allocd = builder.CreateLoad(Type::getInt64Ty(target->getContext()), pool_alloc_tls);
314-
auto pool_allocd_total = builder.CreateAdd(pool_allocd, pool_osize);
315-
builder.CreateStore(pool_allocd_total, pool_alloc_tls);
305+
// auto pool_alloc_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, gc_tls) + offsetof(jl_gc_tls_states_t, gc_num));
306+
// auto pool_alloc_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, pool_alloc_pos);
307+
// auto pool_alloc_tls = builder.CreateBitCast(pool_alloc_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "pool_alloc");
308+
// auto pool_allocd = builder.CreateLoad(Type::getInt64Ty(target->getContext()), pool_alloc_tls);
309+
// auto pool_allocd_total = builder.CreateAdd(pool_allocd, pool_osize);
310+
// builder.CreateStore(pool_allocd_total, pool_alloc_tls);
316311

317312
auto v_raw = builder.CreateNSWAdd(result, ConstantInt::get(Type::getInt64Ty(target->getContext()), sizeof(jl_taggedvalue_t)));
318313
auto v_as_ptr = builder.CreateIntToPtr(v_raw, poolAllocFunc->getReturnType());
@@ -321,14 +316,14 @@ void FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
321316
phiNode->addIncoming(new_call, slowpath);
322317
phiNode->addIncoming(v_as_ptr, fastpath);
323318
phiNode->takeName(target);
324-
319+
325320
target->replaceAllUsesWith(phiNode);
326-
target->eraseFromParent();
327321
return;
328322
} else {
329323
auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1);
330324
newI = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize_i32, type });
331-
derefBytes = sizeof(void*);
325+
if (sz > 0)
326+
derefBytes = sz;
332327
}
333328
#endif // MMTK_GC
334329
}
@@ -346,7 +341,6 @@ void FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
346341
newI->addDereferenceableRetAttr(derefBytes);
347342
newI->takeName(target);
348343
target->replaceAllUsesWith(newI);
349-
target->eraseFromParent();
350344
}
351345

352346
bool FinalLowerGC::runOnFunction(Function &F)
@@ -372,21 +366,23 @@ bool FinalLowerGC::runOnFunction(Function &F)
372366

373367
// Lower all calls to supported intrinsics.
374368
for (auto &BB : F) {
375-
for (auto &I : make_early_inc_range(BB)) {
376-
auto *CI = dyn_cast<CallInst>(&I);
377-
if (!CI)
369+
for (auto it = BB.begin(); it != BB.end();) {
370+
auto *CI = dyn_cast<CallInst>(&*it);
371+
if (!CI) {
372+
++it;
378373
continue;
374+
}
379375

380376
Value *callee = CI->getCalledOperand();
381377
assert(callee);
382378

383379
#define LOWER_INTRINSIC(INTRINSIC, LOWER_INTRINSIC_FUNC) \
384-
do { \
385-
auto intrinsic = getOrNull(jl_intrinsics::INTRINSIC); \
386-
if (intrinsic == callee) { \
387-
LOWER_INTRINSIC_FUNC(CI, F); \
388-
} \
389-
} while (0)
380+
auto INTRINSIC = getOrNull(jl_intrinsics::INTRINSIC); \
381+
if (INTRINSIC == callee) { \
382+
LOWER_INTRINSIC_FUNC(CI, F); \
383+
it = CI->eraseFromParent(); \
384+
continue; \
385+
} \
390386

391387
LOWER_INTRINSIC(newGCFrame, lowerNewGCFrame);
392388
LOWER_INTRINSIC(pushGCFrame, lowerPushGCFrame);
@@ -396,14 +392,13 @@ bool FinalLowerGC::runOnFunction(Function &F)
396392
LOWER_INTRINSIC(queueGCRoot, lowerQueueGCRoot);
397393
LOWER_INTRINSIC(safepoint, lowerSafepoint);
398394

399-
400395
#ifdef MMTK_GC
401396
LOWER_INTRINSIC(writeBarrier1, lowerWriteBarrier1);
402397
LOWER_INTRINSIC(writeBarrier2, lowerWriteBarrier2);
403398
LOWER_INTRINSIC(writeBarrier1Slow, lowerWriteBarrier1Slow);
404399
LOWER_INTRINSIC(writeBarrier2Slow, lowerWriteBarrier2Slow);
405400
#endif
406-
401+
++it;
407402

408403
#undef LOWER_INTRINSIC
409404
}

0 commit comments

Comments
 (0)