@@ -103,6 +103,7 @@ void FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F)
103
103
builder.CreateMemSet (gcframe, Constant::getNullValue (Type::getInt8Ty (F.getContext ())), ptrsize * (nRoots + 2 ), Align (16 ), tbaa_gcframe);
104
104
105
105
target->replaceAllUsesWith (gcframe);
106
+ target->eraseFromParent ();
106
107
}
107
108
108
109
void FinalLowerGC::lowerPushGCFrame (CallInst *target, Function &F)
@@ -130,6 +131,7 @@ void FinalLowerGC::lowerPushGCFrame(CallInst *target, Function &F)
130
131
gcframe,
131
132
pgcstack,
132
133
Align (sizeof (void *)));
134
+ target->eraseFromParent ();
133
135
}
134
136
135
137
void FinalLowerGC::lowerPopGCFrame (CallInst *target, Function &F)
@@ -148,6 +150,7 @@ void FinalLowerGC::lowerPopGCFrame(CallInst *target, Function &F)
148
150
pgcstack,
149
151
Align (sizeof (void *)));
150
152
inst->setMetadata (LLVMContext::MD_tbaa, tbaa_gcframe);
153
+ target->eraseFromParent ();
151
154
}
152
155
153
156
void FinalLowerGC::lowerGetGCFrameSlot (CallInst *target, Function &F)
@@ -167,6 +170,7 @@ void FinalLowerGC::lowerGetGCFrameSlot(CallInst *target, Function &F)
167
170
auto gep = builder.CreateInBoundsGEP (T_prjlvalue, gcframe, index);
168
171
gep->takeName (target);
169
172
target->replaceAllUsesWith (gep);
173
+ target->eraseFromParent ();
170
174
}
171
175
172
176
void FinalLowerGC::lowerQueueGCRoot (CallInst *target, Function &F)
@@ -183,6 +187,7 @@ void FinalLowerGC::lowerSafepoint(CallInst *target, Function &F)
183
187
IRBuilder<> builder (target);
184
188
Value* signal_page = target->getOperand (0 );
185
189
builder.CreateLoad (T_size, signal_page, true );
190
+ target->eraseFromParent ();
186
191
}
187
192
188
193
#ifdef MMTK_GC
@@ -209,7 +214,6 @@ void FinalLowerGC::lowerWriteBarrier2Slow(CallInst *target, Function &F)
209
214
assert (target->arg_size () == 2 );
210
215
target->setCalledFunction (writeBarrier2SlowFunc);
211
216
}
212
-
213
217
#endif
214
218
215
219
void FinalLowerGC::lowerGCAllocBytes (CallInst *target, Function &F)
@@ -235,112 +239,26 @@ void FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
235
239
derefBytes = sz;
236
240
}
237
241
else {
238
- #ifndef MMTK_GC
239
242
auto pool_offs = ConstantInt::get (Type::getInt32Ty (F.getContext ()), offset);
240
243
auto pool_osize = ConstantInt::get (Type::getInt32Ty (F.getContext ()), osize);
241
244
newI = builder.CreateCall (poolAllocFunc, { ptls, pool_offs, pool_osize, type });
242
245
if (sz > 0 )
243
246
derefBytes = sz;
244
- #else // MMTK_GC
245
- auto pool_osize_i32 = ConstantInt::get (Type::getInt32Ty (F.getContext ()), osize);
246
- auto pool_osize = ConstantInt::get (Type::getInt64Ty (F.getContext ()), osize);
247
-
248
- // Should we generate fastpath allocation sequence here? We should always generate fastpath here for MMTk.
249
- // Setting this to false will increase allocation overhead a lot, and should only be used for debugging.
250
- const bool INLINE_FASTPATH_ALLOCATION = true ;
251
-
252
- if (INLINE_FASTPATH_ALLOCATION) {
253
- // Assuming we use the first immix allocator.
254
- // FIXME: We should get the allocator index and type from MMTk.
255
- auto allocator_offset = offsetof (jl_tls_states_t , mmtk_mutator) + offsetof (MMTkMutatorContext, allocators) + offsetof (Allocators, immix);
256
-
257
- auto cursor_pos = ConstantInt::get (Type::getInt64Ty (target->getContext ()), allocator_offset + offsetof (ImmixAllocator, cursor));
258
- auto limit_pos = ConstantInt::get (Type::getInt64Ty (target->getContext ()), allocator_offset + offsetof (ImmixAllocator, limit));
259
-
260
- auto cursor_tls_i8 = builder.CreateGEP (Type::getInt8Ty (target->getContext ()), ptls, cursor_pos);
261
- auto cursor_ptr = builder.CreateBitCast (cursor_tls_i8, PointerType::get (Type::getInt64Ty (target->getContext ()), 0 ), " cursor_ptr" );
262
- auto cursor = builder.CreateLoad (Type::getInt64Ty (target->getContext ()), cursor_ptr, " cursor" );
263
-
264
- // offset = 8
265
- auto delta_offset = builder.CreateNSWSub (ConstantInt::get (Type::getInt64Ty (target->getContext ()), 0 ), ConstantInt::get (Type::getInt64Ty (target->getContext ()), 8 ));
266
- auto delta_cursor = builder.CreateNSWSub (ConstantInt::get (Type::getInt64Ty (target->getContext ()), 0 ), cursor);
267
- auto delta_op = builder.CreateNSWAdd (delta_offset, delta_cursor);
268
- // alignment 16 (15 = 16 - 1)
269
- auto delta = builder.CreateAnd (delta_op, ConstantInt::get (Type::getInt64Ty (target->getContext ()), 15 ), " delta" );
270
- auto result = builder.CreateNSWAdd (cursor, delta, " result" );
271
-
272
- auto new_cursor = builder.CreateNSWAdd (result, pool_osize);
273
-
274
- auto limit_tls_i8 = builder.CreateGEP (Type::getInt8Ty (target->getContext ()), ptls, limit_pos);
275
- auto limit_ptr = builder.CreateBitCast (limit_tls_i8, PointerType::get (Type::getInt64Ty (target->getContext ()), 0 ), " limit_ptr" );
276
- auto limit = builder.CreateLoad (Type::getInt64Ty (target->getContext ()), limit_ptr, " limit" );
277
-
278
- auto gt_limit = builder.CreateICmpSGT (new_cursor, limit);
279
-
280
- auto current_block = target->getParent ();
281
- builder.SetInsertPoint (target->getNextNode ());
282
- auto phiNode = builder.CreatePHI (poolAllocFunc->getReturnType (), 2 , " phi_fast_slow" );
283
- auto top_cont = current_block->splitBasicBlock (target->getNextNode (), " top_cont" );
284
-
285
- auto slowpath = BasicBlock::Create (target->getContext (), " slowpath" , target->getFunction ());
286
- auto fastpath = BasicBlock::Create (target->getContext (), " fastpath" , target->getFunction (), top_cont);
287
-
288
- auto next_br = current_block->getTerminator ();
289
- next_br->eraseFromParent ();
290
- builder.SetInsertPoint (current_block);
291
- builder.CreateCondBr (gt_limit, slowpath, fastpath);
292
-
293
- // slowpath
294
- builder.SetInsertPoint (slowpath);
295
- auto pool_offs = ConstantInt::get (Type::getInt32Ty (F.getContext ()), 1 );
296
- auto new_call = builder.CreateCall (poolAllocFunc, { ptls, pool_offs, pool_osize_i32, type });
297
- new_call->setAttributes (new_call->getCalledFunction ()->getAttributes ());
298
- builder.CreateBr (top_cont);
299
-
300
- // // fastpath
301
- builder.SetInsertPoint (fastpath);
302
- builder.CreateStore (new_cursor, cursor_ptr);
303
-
304
- // ptls->gc_num.allocd += osize;
305
- auto pool_alloc_pos = ConstantInt::get (Type::getInt64Ty (target->getContext ()), offsetof (jl_tls_states_t , gc_tls) + offsetof (jl_gc_tls_states_t , gc_num));
306
- auto pool_alloc_i8 = builder.CreateGEP (Type::getInt8Ty (target->getContext ()), ptls, pool_alloc_pos);
307
- auto pool_alloc_tls = builder.CreateBitCast (pool_alloc_i8, PointerType::get (Type::getInt64Ty (target->getContext ()), 0 ), " pool_alloc" );
308
- auto pool_allocd = builder.CreateLoad (Type::getInt64Ty (target->getContext ()), pool_alloc_tls);
309
- auto pool_allocd_total = builder.CreateAdd (pool_allocd, pool_osize);
310
- builder.CreateStore (pool_allocd_total, pool_alloc_tls);
311
-
312
- auto v_raw = builder.CreateNSWAdd (result, ConstantInt::get (Type::getInt64Ty (target->getContext ()), sizeof (jl_taggedvalue_t )));
313
- auto v_as_ptr = builder.CreateIntToPtr (v_raw, poolAllocFunc->getReturnType ());
314
- builder.CreateBr (top_cont);
315
-
316
- phiNode->addIncoming (new_call, slowpath);
317
- phiNode->addIncoming (v_as_ptr, fastpath);
318
- phiNode->takeName (target);
319
-
320
- target->replaceAllUsesWith (phiNode);
321
- return ;
322
- } else {
323
- auto pool_offs = ConstantInt::get (Type::getInt32Ty (F.getContext ()), 1 );
324
- newI = builder.CreateCall (poolAllocFunc, { ptls, pool_offs, pool_osize_i32, type });
325
- if (sz > 0 )
326
- derefBytes = sz;
327
- }
328
- #endif // MMTK_GC
329
247
}
330
248
} else {
331
249
auto size = builder.CreateZExtOrTrunc (target->getArgOperand (1 ), T_size);
332
250
// allocTypedFunc does not include the type tag in the allocation size!
333
251
newI = builder.CreateCall (allocTypedFunc, { ptls, size, type });
334
252
derefBytes = sizeof (void *);
335
253
}
336
-
337
254
newI->setAttributes (newI->getCalledFunction ()->getAttributes ());
338
255
unsigned align = std::max ((unsigned )target->getRetAlign ().valueOrOne ().value (), (unsigned )sizeof (void *));
339
256
newI->addRetAttr (Attribute::getWithAlignment (F.getContext (), Align (align)));
340
257
if (derefBytes > 0 )
341
258
newI->addDereferenceableRetAttr (derefBytes);
342
259
newI->takeName (target);
343
260
target->replaceAllUsesWith (newI);
261
+ target->eraseFromParent ();
344
262
}
345
263
346
264
bool FinalLowerGC::runOnFunction (Function &F)
@@ -362,63 +280,48 @@ bool FinalLowerGC::runOnFunction(Function &F)
362
280
poolAllocFunc = getOrDeclare (jl_well_known::GCPoolAlloc);
363
281
bigAllocFunc = getOrDeclare (jl_well_known::GCBigAlloc);
364
282
allocTypedFunc = getOrDeclare (jl_well_known::GCAllocTyped);
365
- T_size = F.getParent ()->getDataLayout ().getIntPtrType (F.getContext ());
366
-
367
283
#ifdef MMTK_GC
368
284
writeBarrier1Func = getOrDeclare (jl_well_known::GCWriteBarrier1);
369
285
writeBarrier2Func = getOrDeclare (jl_well_known::GCWriteBarrier2);
370
286
writeBarrier1SlowFunc = getOrDeclare (jl_well_known::GCWriteBarrier1Slow);
371
287
writeBarrier2SlowFunc = getOrDeclare (jl_well_known::GCWriteBarrier2Slow);
372
288
#endif
289
+ T_size = F.getParent ()->getDataLayout ().getIntPtrType (F.getContext ());
373
290
374
291
// Lower all calls to supported intrinsics.
375
292
for (auto &BB : F) {
376
- for (auto it = BB.begin (); it != BB.end ();) {
377
- auto *CI = dyn_cast<CallInst>(&*it);
378
- if (!CI) {
379
- ++it;
293
+ for (auto &I : make_early_inc_range (BB)) {
294
+ auto *CI = dyn_cast<CallInst>(&I);
295
+ if (!CI)
380
296
continue ;
381
- }
382
297
383
298
Value *callee = CI->getCalledOperand ();
384
299
assert (callee);
385
300
386
301
#define LOWER_INTRINSIC (INTRINSIC, LOWER_INTRINSIC_FUNC ) \
387
- auto INTRINSIC = getOrNull (jl_intrinsics::INTRINSIC); \
388
- if (INTRINSIC == callee) { \
389
- LOWER_INTRINSIC_FUNC (CI, F); \
390
- it = CI-> eraseFromParent ( ); \
391
- continue ; \
392
- } \
302
+ do { \
303
+ auto intrinsic = getOrNull (jl_intrinsics::INTRINSIC); \
304
+ if (intrinsic == callee) { \
305
+ LOWER_INTRINSIC_FUNC (CI, F ); \
306
+ } \
307
+ } while ( 0 )
393
308
394
309
LOWER_INTRINSIC (newGCFrame, lowerNewGCFrame);
395
310
LOWER_INTRINSIC (pushGCFrame, lowerPushGCFrame);
396
311
LOWER_INTRINSIC (popGCFrame, lowerPopGCFrame);
397
312
LOWER_INTRINSIC (getGCFrameSlot, lowerGetGCFrameSlot);
398
313
LOWER_INTRINSIC (GCAllocBytes, lowerGCAllocBytes);
314
+ LOWER_INTRINSIC (queueGCRoot, lowerQueueGCRoot);
399
315
LOWER_INTRINSIC (safepoint, lowerSafepoint);
400
316
401
- // These lowerings preserve the CI and do not erase them from the parent
402
- #define LOWER_WB_INTRINSIC (INTRINSIC, LOWER_INTRINSIC_FUNC ) \
403
- auto INTRINSIC = getOrNull (jl_intrinsics::INTRINSIC); \
404
- if (INTRINSIC == callee) { \
405
- LOWER_INTRINSIC_FUNC (CI, F); \
406
- ++it; \
407
- continue ; \
408
- } \
409
-
410
- LOWER_WB_INTRINSIC (queueGCRoot, lowerQueueGCRoot);
411
-
412
317
#ifdef MMTK_GC
413
- LOWER_WB_INTRINSIC (writeBarrier1, lowerWriteBarrier1 );
414
- LOWER_WB_INTRINSIC (writeBarrier2, lowerWriteBarrier2 );
415
- LOWER_WB_INTRINSIC (writeBarrier1Slow, lowerWriteBarrier1Slow );
416
- LOWER_WB_INTRINSIC (writeBarrier2Slow, lowerWriteBarrier2Slow );
318
+ LOWER_INTRINSIC (writeBarrier1, lowerNewGCFrame );
319
+ LOWER_INTRINSIC (writeBarrier2, lowerNewGCFrame );
320
+ LOWER_INTRINSIC (writeBarrier1Slow, lowerNewGCFrame );
321
+ LOWER_INTRINSIC (writeBarrier2Slow, lowerNewGCFrame );
417
322
#endif
418
- ++it;
419
323
420
324
#undef LOWER_INTRINSIC
421
- #undef LOWER_WB_INTRINSIC
422
325
}
423
326
}
424
327
0 commit comments