@@ -4373,7 +4373,8 @@ void IRGenFunction::emitCoroutineOrAsyncExit(bool isUnwind) {
4373
4373
static void emitReturnInst (IRGenSILFunction &IGF,
4374
4374
SILType resultTy,
4375
4375
Explosion &result,
4376
- CanSILFunctionType fnType) {
4376
+ CanSILFunctionType fnType,
4377
+ bool mayPeepholeLoad) {
4377
4378
SILFunctionConventions conv (IGF.CurSILFn ->getLoweredFunctionType (),
4378
4379
IGF.getSILModule ());
4379
4380
@@ -4448,10 +4449,37 @@ static void emitReturnInst(IRGenSILFunction &IGF,
4448
4449
assert (swiftCCReturn ||
4449
4450
funcLang == SILFunctionLanguage::C && " Need to handle all cases" );
4450
4451
IGF.emitScalarReturn (resultTy, funcResultType, result, swiftCCReturn,
4451
- false );
4452
+ false , mayPeepholeLoad );
4452
4453
}
4453
4454
}
4454
4455
4456
+ static bool canPeepholeLoadToReturn (IRGenModule &IGM, swift::ReturnInst *r) {
4457
+ auto *load = dyn_cast<LoadInst>(r->getOperand ());
4458
+ if (!load)
4459
+ return false ;
4460
+
4461
+ // Later code can't deal with projections.
4462
+ if (!isa<AllocStackInst>(load->getOperand ()))
4463
+ return false ;
4464
+
4465
+ if (load->getParent () != r->getParent ())
4466
+ return false ;
4467
+
4468
+ for (auto it = ++load->getIterator (), e = r->getIterator (); it != e; ++it) {
4469
+ if (it->mayHaveSideEffects ()) {
4470
+ if (auto *dealloc = dyn_cast<DeallocStackInst>(&*it)) {
4471
+ auto &ti = IGM.getTypeInfo (
4472
+ dealloc->getOperand ()->getType ().getObjectType ());
4473
+ if (!ti.isLoadable ())
4474
+ return false ;
4475
+ continue ;
4476
+ }
4477
+ return false ;
4478
+ }
4479
+ }
4480
+ return true ;
4481
+ }
4482
+
4455
4483
void IRGenSILFunction::visitReturnInst (swift::ReturnInst *i) {
4456
4484
Explosion result = getLoweredExplosion (i->getOperand ());
4457
4485
@@ -4466,8 +4494,11 @@ void IRGenSILFunction::visitReturnInst(swift::ReturnInst *i) {
4466
4494
result = std::move (temp);
4467
4495
}
4468
4496
4497
+ bool mayPeepholeLoad = canPeepholeLoadToReturn (IGM, i);
4498
+
4469
4499
emitReturnInst (*this , i->getOperand ()->getType (), result,
4470
- i->getFunction ()->getLoweredFunctionType ());
4500
+ i->getFunction ()->getLoweredFunctionType (),
4501
+ mayPeepholeLoad);
4471
4502
}
4472
4503
4473
4504
void IRGenSILFunction::visitThrowInst (swift::ThrowInst *i) {
@@ -5524,6 +5555,81 @@ void IRGenSILFunction::visitLoadInst(swift::LoadInst *i) {
5524
5555
}
5525
5556
setLoweredExplosion (i, lowered);
5526
5557
}
5558
+ static Address isSafeForMemCpyPeephole (const TypeInfo &TI, SILArgument *arg,
5559
+ Explosion &argSrc, AllocStackInst *dst,
5560
+ Address storeDst,
5561
+ StoreInst *store,
5562
+ llvm::Instruction * &insertPt) {
5563
+ if (!arg || !dst)
5564
+ return Address ();
5565
+
5566
+ // Store of function argument.
5567
+ if (store->getParent () != store->getFunction ()->getEntryBlock ())
5568
+ return Address ();
5569
+
5570
+ auto explosionSize = TI.getSchema ().size ();
5571
+ if (argSrc.size () < 1 || explosionSize < 4 )
5572
+ return Address ();
5573
+
5574
+ auto *load = dyn_cast<llvm::LoadInst>(*argSrc.begin ());
5575
+ if (!load)
5576
+ return Address ();
5577
+
5578
+ auto *gep = dyn_cast<llvm::GetElementPtrInst>(load->getPointerOperand ());
5579
+ if (!gep)
5580
+ return Address ();
5581
+
5582
+ auto *alloca = dyn_cast<llvm::AllocaInst>(getUnderlyingObject (gep));
5583
+ if (!alloca)
5584
+ return Address ();
5585
+
5586
+ // Check all the other loads.
5587
+ for (size_t i = 1 , e = explosionSize; i != e; ++i) {
5588
+ auto *load = dyn_cast<llvm::LoadInst>(*(argSrc.begin () + i));
5589
+ if (!load)
5590
+ return Address ();
5591
+ auto *alloca2 = dyn_cast<llvm::AllocaInst>(
5592
+ getUnderlyingObject (load->getPointerOperand ()));
5593
+ if (!alloca2 || alloca2 != alloca)
5594
+ return Address ();
5595
+ }
5596
+
5597
+ auto *dstAlloca = dyn_cast<llvm::AllocaInst>(storeDst.getAddress ());
5598
+ if (!dstAlloca)
5599
+ return Address ();
5600
+
5601
+ // Move the lifetime.begin above the load instruction (where we eventually
5602
+ // will insert the memcpy.
5603
+ llvm::Instruction *lifetimeBegin = nullptr ;
5604
+ for (const auto &use : dstAlloca->uses ()) {
5605
+ auto *begin = dyn_cast<llvm::LifetimeIntrinsic>(use.getUser ());
5606
+ if (!begin)
5607
+ continue ;
5608
+ if (begin->getParent () != alloca->getParent ())
5609
+ continue ;
5610
+ if (begin->getIntrinsicID () != llvm::Intrinsic::lifetime_start)
5611
+ continue ;
5612
+
5613
+ if (lifetimeBegin) {
5614
+ // Seen a second lifetime.begin in the entry block.
5615
+ lifetimeBegin = nullptr ;
5616
+ break ;
5617
+ }
5618
+ lifetimeBegin = begin;
5619
+ }
5620
+
5621
+ if (!lifetimeBegin) {
5622
+ return Address ();
5623
+ }
5624
+
5625
+ lifetimeBegin->moveBefore (load);
5626
+
5627
+ // Set insertPt to the first load such that we are within the lifetime of the
5628
+ // alloca marked by the lifetime intrinsic.
5629
+ insertPt = load;
5630
+
5631
+ return TI.getAddressForPointer (alloca);
5632
+ }
5527
5633
5528
5634
static Address canForwardIndirectResultAlloca (const TypeInfo &TI,
5529
5635
StoreInst *store,
@@ -5576,7 +5682,6 @@ void IRGenSILFunction::visitStoreInst(swift::StoreInst *i) {
5576
5682
SILType objType = i->getSrc ()->getType ().getObjectType ();
5577
5683
const auto &typeInfo = cast<LoadableTypeInfo>(getTypeInfo (objType));
5578
5684
5579
-
5580
5685
llvm::Instruction *insertPt = nullptr ;
5581
5686
auto forwardAddr = canForwardIndirectResultAlloca (typeInfo, i, source,
5582
5687
insertPt);
@@ -5585,12 +5690,33 @@ void IRGenSILFunction::visitStoreInst(swift::StoreInst *i) {
5585
5690
// Set the insert point to the first load instruction. We need to be with
5586
5691
// the lifetime of the alloca.
5587
5692
IRBuilder::SavedInsertionPointRAII insertRAII (this ->Builder , insertPt);
5693
+ ArtificialLocation Loc (getDebugScope (), IGM.DebugInfo .get (), Builder);
5588
5694
addrTI.initializeWithTake (*this , dest, forwardAddr, i->getDest ()->getType (),
5589
5695
false , /* zeroizeIfSensitive=*/ true );
5590
5696
(void )source.claimAll ();
5591
5697
return ;
5592
5698
}
5593
5699
5700
+ // See if we can forward a load from an alloca we have created for the purpose
5701
+ // of argument coercion.
5702
+ auto argSrc = dyn_cast<SILArgument>(i->getSrc ());
5703
+ auto stackDst = dyn_cast<AllocStackInst>(i->getDest ());
5704
+ const auto &addrTI = getTypeInfo (i->getDest ()->getType ());
5705
+ insertPt = nullptr ;
5706
+
5707
+ auto srcAddr = isSafeForMemCpyPeephole (addrTI, argSrc, source, stackDst, dest,
5708
+ i, insertPt);
5709
+ if (srcAddr.isValid () &&
5710
+ (i->getOwnershipQualifier () == StoreOwnershipQualifier::Trivial ||
5711
+ i->getOwnershipQualifier () == StoreOwnershipQualifier::Unqualified)) {
5712
+ IRBuilder::SavedInsertionPointRAII insertRAII (this ->Builder , insertPt);
5713
+ ArtificialLocation Loc (getDebugScope (), IGM.DebugInfo .get (), Builder);
5714
+ addrTI.initializeWithTake (*this , dest, srcAddr, i->getDest ()->getType (),
5715
+ false , /* zeroizeIfSensitive*/ true );
5716
+ (void )source.claimAll ();
5717
+ return ;
5718
+ }
5719
+
5594
5720
switch (i->getOwnershipQualifier ()) {
5595
5721
case StoreOwnershipQualifier::Unqualified:
5596
5722
case StoreOwnershipQualifier::Init:
0 commit comments