Skip to content

Commit 5f396cd

Browse files
committed
IRGen: Add indirect peepholes for arguments and return value coercion
Large tuples of values (e.g char[32]) can be passed directly at the abi boundry but expand to a big explosion of values. Peephole this explosion at argument passing and return value passing points to avoid code size growth associated with the explosion.
1 parent 3ab4484 commit 5f396cd

File tree

6 files changed

+273
-9
lines changed

6 files changed

+273
-9
lines changed

lib/IRGen/GenCall.cpp

Lines changed: 82 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5346,7 +5346,8 @@ Explosion NativeConventionSchema::mapIntoNative(IRGenModule &IGM,
53465346
IRGenFunction &IGF,
53475347
Explosion &fromNonNative,
53485348
SILType type,
5349-
bool isOutlined) const {
5349+
bool isOutlined,
5350+
bool mayPeepholeLoad) const {
53505351
if (fromNonNative.empty()) {
53515352
assert(empty() && "Empty explosion must match the native convention");
53525353
return Explosion();
@@ -5425,6 +5426,82 @@ Explosion NativeConventionSchema::mapIntoNative(IRGenModule &IGM,
54255426
? coercionTy
54265427
: overlappedCoercionTy;
54275428

5429+
if (mayPeepholeLoad) {
5430+
auto succeeded = [&]() -> bool {
5431+
if (!overlappedCoercionTy->isEmptyTy())
5432+
return false;
5433+
auto load = dyn_cast<llvm::LoadInst>(*fromNonNative.begin());
5434+
if (!load)
5435+
return false;
5436+
auto *gep = dyn_cast<llvm::GetElementPtrInst>(load->getPointerOperand());
5437+
if (!gep)
5438+
return false;
5439+
auto *alloca = dyn_cast<llvm::AllocaInst>(getUnderlyingObject(gep));
5440+
if (!alloca)
5441+
return false;
5442+
auto numExplosions = fromNonNative.size();
5443+
if (numExplosions < 2)
5444+
return false;
5445+
for (unsigned i = 0, e = numExplosions; i < e; ++i) {
5446+
auto *otherLoad = dyn_cast<llvm::LoadInst>(*(fromNonNative.begin() + i));
5447+
if (!otherLoad)
5448+
return false;
5449+
auto otherAlloca = dyn_cast<llvm::AllocaInst>(
5450+
getUnderlyingObject(otherLoad->getPointerOperand()));
5451+
if (!otherAlloca || otherAlloca != alloca)
5452+
return false;
5453+
load = otherLoad;
5454+
}
5455+
auto allocaSize =
5456+
DataLayout.getTypeSizeInBits(alloca->getAllocatedType());
5457+
5458+
Address origAlloca(alloca, alloca->getAllocatedType(),
5459+
Alignment(alloca->getAlign().value()));
5460+
5461+
IRBuilder Builder(*IGM.LLVMContext, false);
5462+
Builder.SetInsertPoint(load);
5463+
5464+
if (allocaSize < coercionSize) {
5465+
auto coerced = IGF.createAlloca(coercionTy, Alignment(alloca->getAlign().value()) , "tmp.coerce");
5466+
// Copy the defined bytes.
5467+
Builder.CreateMemCpy(coerced, origAlloca, Size(allocaSize/8));
5468+
origAlloca = coerced;
5469+
}
5470+
5471+
adjustAllocaAlignment(DataLayout, origAlloca, coercionTy);
5472+
5473+
5474+
unsigned expandedMapIdx = 0;
5475+
SmallVector<llvm::Value *, 8> expandedElts(expandedTys.size(), nullptr);
5476+
auto structAddr = Builder.CreateElementBitCast(origAlloca, coercionTy);
5477+
for (auto eltIndex : indices(coercionTy->elements())) {
5478+
auto layout = DataLayout.getStructLayout(coercionTy);
5479+
auto eltTy = coercionTy->getElementType(eltIndex);
5480+
// Skip padding fields.
5481+
if (eltTy->isArrayTy())
5482+
continue;
5483+
Address eltAddr = Builder.CreateStructGEP(structAddr, eltIndex, layout);
5484+
llvm::Value *elt = Builder.CreateLoad(eltAddr);
5485+
auto index = expandedTyIndicesMap[expandedMapIdx];
5486+
assert(expandedElts[index] == nullptr);
5487+
expandedElts[index] = elt;
5488+
++expandedMapIdx;
5489+
}
5490+
5491+
// Add the values to the explosion.
5492+
for (auto *val : expandedElts)
5493+
nativeExplosion.add(val);
5494+
assert(expandedTys.size() == nativeExplosion.size());
5495+
5496+
return true;
5497+
}();
5498+
5499+
if (succeeded) {
5500+
(void)fromNonNative.claimAll();
5501+
return nativeExplosion;
5502+
}
5503+
}
5504+
54285505
// Allocate a temporary for the coercion.
54295506
Address temporary;
54305507
Size tempSize;
@@ -5514,7 +5591,8 @@ Explosion IRGenFunction::coerceValueTo(SILType fromTy, Explosion &from,
55145591

55155592
void IRGenFunction::emitScalarReturn(SILType returnResultType,
55165593
SILType funcResultType, Explosion &result,
5517-
bool isSwiftCCReturn, bool isOutlined) {
5594+
bool isSwiftCCReturn, bool isOutlined,
5595+
bool mayPeepholeLoad) {
55185596
if (result.empty()) {
55195597
assert(IGM.getTypeInfo(returnResultType)
55205598
.nativeReturnValueSchema(IGM)
@@ -5533,7 +5611,8 @@ void IRGenFunction::emitScalarReturn(SILType returnResultType,
55335611
assert(!nativeSchema.requiresIndirect());
55345612

55355613
Explosion native = nativeSchema.mapIntoNative(IGM, *this, result,
5536-
funcResultType, isOutlined);
5614+
funcResultType, isOutlined,
5615+
mayPeepholeLoad);
55375616
if (native.size() == 1) {
55385617
Builder.CreateRet(native.claimNext());
55395618
return;

lib/IRGen/IRGenFunction.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ class IRGenFunction {
102102
Explosion collectParameters();
103103
void emitScalarReturn(SILType returnResultType, SILType funcResultType,
104104
Explosion &scalars, bool isSwiftCCReturn,
105-
bool isOutlined);
105+
bool isOutlined, bool mayPeepholeLoad = false);
106106
void emitScalarReturn(llvm::Type *resultTy, Explosion &scalars);
107107

108108
void emitBBForReturn();

lib/IRGen/IRGenSIL.cpp

Lines changed: 130 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4373,7 +4373,8 @@ void IRGenFunction::emitCoroutineOrAsyncExit(bool isUnwind) {
43734373
static void emitReturnInst(IRGenSILFunction &IGF,
43744374
SILType resultTy,
43754375
Explosion &result,
4376-
CanSILFunctionType fnType) {
4376+
CanSILFunctionType fnType,
4377+
bool mayPeepholeLoad) {
43774378
SILFunctionConventions conv(IGF.CurSILFn->getLoweredFunctionType(),
43784379
IGF.getSILModule());
43794380

@@ -4448,10 +4449,37 @@ static void emitReturnInst(IRGenSILFunction &IGF,
44484449
assert(swiftCCReturn ||
44494450
funcLang == SILFunctionLanguage::C && "Need to handle all cases");
44504451
IGF.emitScalarReturn(resultTy, funcResultType, result, swiftCCReturn,
4451-
false);
4452+
false, mayPeepholeLoad);
44524453
}
44534454
}
44544455

4456+
static bool canPeepholeLoadToReturn(IRGenModule &IGM, swift::ReturnInst *r) {
4457+
auto *load = dyn_cast<LoadInst>(r->getOperand());
4458+
if (!load)
4459+
return false;
4460+
4461+
// Later code can't deal with projections.
4462+
if (!isa<AllocStackInst>(load->getOperand()))
4463+
return false;
4464+
4465+
if (load->getParent() != r->getParent())
4466+
return false;
4467+
4468+
for (auto it = ++load->getIterator(), e = r->getIterator(); it != e; ++it) {
4469+
if (it->mayHaveSideEffects()) {
4470+
if (auto *dealloc = dyn_cast<DeallocStackInst>(&*it)) {
4471+
auto &ti = IGM.getTypeInfo(
4472+
dealloc->getOperand()->getType().getObjectType());
4473+
if (!ti.isLoadable())
4474+
return false;
4475+
continue;
4476+
}
4477+
return false;
4478+
}
4479+
}
4480+
return true;
4481+
}
4482+
44554483
void IRGenSILFunction::visitReturnInst(swift::ReturnInst *i) {
44564484
Explosion result = getLoweredExplosion(i->getOperand());
44574485

@@ -4466,8 +4494,11 @@ void IRGenSILFunction::visitReturnInst(swift::ReturnInst *i) {
44664494
result = std::move(temp);
44674495
}
44684496

4497+
bool mayPeepholeLoad = canPeepholeLoadToReturn(IGM, i);
4498+
44694499
emitReturnInst(*this, i->getOperand()->getType(), result,
4470-
i->getFunction()->getLoweredFunctionType());
4500+
i->getFunction()->getLoweredFunctionType(),
4501+
mayPeepholeLoad);
44714502
}
44724503

44734504
void IRGenSILFunction::visitThrowInst(swift::ThrowInst *i) {
@@ -5524,6 +5555,81 @@ void IRGenSILFunction::visitLoadInst(swift::LoadInst *i) {
55245555
}
55255556
setLoweredExplosion(i, lowered);
55265557
}
5558+
static Address isSafeForMemCpyPeephole(const TypeInfo &TI, SILArgument *arg,
5559+
Explosion &argSrc, AllocStackInst *dst,
5560+
Address storeDst,
5561+
StoreInst *store,
5562+
llvm::Instruction * &insertPt) {
5563+
if (!arg || !dst)
5564+
return Address();
5565+
5566+
// Store of function argument.
5567+
if (store->getParent() != store->getFunction()->getEntryBlock())
5568+
return Address();
5569+
5570+
auto explosionSize = TI.getSchema().size();
5571+
if (argSrc.size() < 1 || explosionSize < 4)
5572+
return Address();
5573+
5574+
auto *load = dyn_cast<llvm::LoadInst>(*argSrc.begin());
5575+
if (!load)
5576+
return Address();
5577+
5578+
auto *gep = dyn_cast<llvm::GetElementPtrInst>(load->getPointerOperand());
5579+
if (!gep)
5580+
return Address();
5581+
5582+
auto *alloca = dyn_cast<llvm::AllocaInst>(getUnderlyingObject(gep));
5583+
if (!alloca)
5584+
return Address();
5585+
5586+
// Check all the other loads.
5587+
for (size_t i = 1, e = explosionSize; i != e; ++i) {
5588+
auto *load = dyn_cast<llvm::LoadInst>(*(argSrc.begin() + i));
5589+
if (!load)
5590+
return Address();
5591+
auto *alloca2 = dyn_cast<llvm::AllocaInst>(
5592+
getUnderlyingObject(load->getPointerOperand()));
5593+
if (!alloca2 || alloca2 != alloca)
5594+
return Address();
5595+
}
5596+
5597+
auto *dstAlloca = dyn_cast<llvm::AllocaInst>(storeDst.getAddress());
5598+
if (!dstAlloca)
5599+
return Address();
5600+
5601+
// Move the lifetime.begin above the load instruction (where we eventually
5602+
// will insert the memcpy.
5603+
llvm::Instruction *lifetimeBegin = nullptr;
5604+
for (const auto &use : dstAlloca->uses()) {
5605+
auto *begin = dyn_cast<llvm::LifetimeIntrinsic>(use.getUser());
5606+
if (!begin)
5607+
continue;
5608+
if (begin->getParent() != alloca->getParent())
5609+
continue;
5610+
if (begin->getIntrinsicID() != llvm::Intrinsic::lifetime_start)
5611+
continue;
5612+
5613+
if (lifetimeBegin) {
5614+
// Seen a second lifetime.begin in the entry block.
5615+
lifetimeBegin = nullptr;
5616+
break;
5617+
}
5618+
lifetimeBegin = begin;
5619+
}
5620+
5621+
if (!lifetimeBegin) {
5622+
return Address();
5623+
}
5624+
5625+
lifetimeBegin->moveBefore(load);
5626+
5627+
// Set insertPt to the first load such that we are within the lifetime of the
5628+
// alloca marked by the lifetime intrinsic.
5629+
insertPt = load;
5630+
5631+
return TI.getAddressForPointer(alloca);
5632+
}
55275633

55285634
static Address canForwardIndirectResultAlloca(const TypeInfo &TI,
55295635
StoreInst *store,
@@ -5576,7 +5682,6 @@ void IRGenSILFunction::visitStoreInst(swift::StoreInst *i) {
55765682
SILType objType = i->getSrc()->getType().getObjectType();
55775683
const auto &typeInfo = cast<LoadableTypeInfo>(getTypeInfo(objType));
55785684

5579-
55805685
llvm::Instruction *insertPt = nullptr;
55815686
auto forwardAddr = canForwardIndirectResultAlloca(typeInfo, i, source,
55825687
insertPt);
@@ -5585,12 +5690,33 @@ void IRGenSILFunction::visitStoreInst(swift::StoreInst *i) {
55855690
// Set the insert point to the first load instruction. We need to be with
55865691
// the lifetime of the alloca.
55875692
IRBuilder::SavedInsertionPointRAII insertRAII(this->Builder, insertPt);
5693+
ArtificialLocation Loc(getDebugScope(), IGM.DebugInfo.get(), Builder);
55885694
addrTI.initializeWithTake(*this, dest, forwardAddr, i->getDest()->getType(),
55895695
false, /*zeroizeIfSensitive=*/ true);
55905696
(void)source.claimAll();
55915697
return;
55925698
}
55935699

5700+
// See if we can forward a load from an alloca we have created for the purpose
5701+
// of argument coercion.
5702+
auto argSrc = dyn_cast<SILArgument>(i->getSrc());
5703+
auto stackDst = dyn_cast<AllocStackInst>(i->getDest());
5704+
const auto &addrTI = getTypeInfo(i->getDest()->getType());
5705+
insertPt = nullptr;
5706+
5707+
auto srcAddr = isSafeForMemCpyPeephole(addrTI, argSrc, source, stackDst, dest,
5708+
i, insertPt);
5709+
if (srcAddr.isValid() &&
5710+
(i->getOwnershipQualifier() == StoreOwnershipQualifier::Trivial ||
5711+
i->getOwnershipQualifier() == StoreOwnershipQualifier::Unqualified)) {
5712+
IRBuilder::SavedInsertionPointRAII insertRAII(this->Builder, insertPt);
5713+
ArtificialLocation Loc(getDebugScope(), IGM.DebugInfo.get(), Builder);
5714+
addrTI.initializeWithTake(*this, dest, srcAddr, i->getDest()->getType(),
5715+
false, /*zeroizeIfSensitive*/true);
5716+
(void)source.claimAll();
5717+
return;
5718+
}
5719+
55945720
switch (i->getOwnershipQualifier()) {
55955721
case StoreOwnershipQualifier::Unqualified:
55965722
case StoreOwnershipQualifier::Init:

lib/IRGen/NativeConventionSchema.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ class NativeConventionSchema {
5757
/// calling convention's schema.
5858
Explosion mapIntoNative(IRGenModule &IGM, IRGenFunction &IGF,
5959
Explosion &fromNonNative, SILType type,
60-
bool isOutlined) const;
60+
bool isOutlined, bool mayPeepholeLoad = false) const;
6161

6262
/// Map form a native explosion that follows the native calling convention's
6363
/// schema to a non-native explosion whose schema is described by
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#pragma once
2+
3+
struct BigStruct {
4+
char a[32];
5+
};
6+
7+
struct BigStruct useBigStruct(struct BigStruct x);
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
// RUN: %target-swift-frontend %s -import-objc-header %S/Inputs/arg_and_result_peepholes.h -emit-ir 2>&1 | %FileCheck %s
2+
3+
// REQUIRES: PTRSIZE=64
4+
5+
// CHECK: define{{.*}} swiftcc { i64, i64, i64, i64 } @"$s24arg_and_result_peepholes05test_D0ySo9BigStructVADF"(i64 %0, i64 %1, i64 %2, i64 %3)
6+
// CHECK: entry:
7+
// CHECK: [[TMP:%.*]] = alloca { i64, i64, i64, i64 }
8+
// CHECK: [[RES_MEM:%.*]] = alloca %TSo9BigStructV
9+
// CHECK: [[ARG_MEM:%.*]] = alloca %TSo9BigStructV
10+
// CHECK: [[TMP2:%.*]] = alloca %TSo9BigStructV
11+
// CHECK: [[CALL:%.*]] = alloca %TSo9BigStructV
12+
// CHECK: call void @llvm.lifetime.start.p0(i64 256, ptr [[TMP]])
13+
// CHECK: [[A1:%.*]] = getelementptr inbounds { i64, i64, i64, i64 }, ptr [[TMP]], i32 0, i32 0
14+
// CHECK: store i64 %0, ptr [[A1]]
15+
// CHECK: [[A2:%.*]] = getelementptr inbounds { i64, i64, i64, i64 }, ptr [[TMP]], i32 0, i32 1
16+
// CHECK: store i64 %1, ptr [[A2]]
17+
// CHECK: [[A3:%.*]] = getelementptr inbounds { i64, i64, i64, i64 }, ptr [[TMP]], i32 0, i32 2
18+
// CHECK: store i64 %2, ptr [[A3]]
19+
// CHECK: [[A4:%.*]] = getelementptr inbounds { i64, i64, i64, i64 }, ptr [[TMP]], i32 0, i32 3
20+
// CHECK: store i64 %3, ptr [[A4]]
21+
22+
// CHECK: call void @llvm.lifetime.start.p0(i64 32, ptr [[ARG_MEM]])
23+
// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr{{.*}} [[ARG_MEM]], ptr{{.*}} [[TMP]], i64 32, i1 false)
24+
25+
// CHECK: call void @llvm.lifetime.start.p0(i64 32, ptr [[TMP2]])
26+
// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr{{.*}} [[TMP2]], ptr{{.*}} [[ARG_MEM]], i64 32, i1 false)
27+
// CHECK: call void @llvm.lifetime.start.p0(i64 32, ptr [[CALL]])
28+
// CHECK: call void @useBigStruct(ptr{{.*}} [[CALL]], ptr{{.*}} [[TMP2]])
29+
// CHECK: call void @llvm.lifetime.end.p0(i64 32, ptr [[TMP2]])
30+
31+
// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr{{.*}} [[RES_MEM]], ptr{{.*}} [[CALL]], i64 32, i1 false)
32+
33+
// CHECK: [[A5:%.*]] = getelementptr inbounds { i64, i64, i64, i64 }, ptr [[RES_MEM]], i32 0, i32 0
34+
// CHECK: [[R1:%.*]] = load i64, ptr [[A5]]
35+
// CHECK: [[A6:%.*]] = getelementptr inbounds { i64, i64, i64, i64 }, ptr [[RES_MEM]], i32 0, i32 1
36+
// CHECK: [[R2:%.*]] = load i64, ptr [[A6]]
37+
// CHECK: [[A7:%.*]] = getelementptr inbounds { i64, i64, i64, i64 }, ptr [[RES_MEM]], i32 0, i32 2
38+
// CHECK: [[R3:%.*]] = load i64, ptr [[A7]]
39+
// CHECK: [[A8:%.*]] = getelementptr inbounds { i64, i64, i64, i64 }, ptr [[RES_MEM]], i32 0, i32 3
40+
// CHECK: [[R4:%.*]] = load i64, ptr [[A8]]
41+
// CHECK: call void @llvm.lifetime.end.p0(i64 32, ptr [[ARG_MEM]])
42+
// CHECK: call void @llvm.lifetime.end.p0(i64 32, ptr [[RES_MEM]])
43+
44+
// CHECK: [[R5:%.*]] = insertvalue { i64, i64, i64, i64 } undef, i64 [[R1]], 0
45+
// CHECK: [[R6:%.*]] = insertvalue { i64, i64, i64, i64 } [[R5]], i64 [[R2]], 1
46+
// CHECK: [[R7:%.*]] = insertvalue { i64, i64, i64, i64 } [[R6]], i64 [[R3]], 2
47+
// CHECK: [[R8:%.*]] = insertvalue { i64, i64, i64, i64 } [[R7]], i64 [[R4]], 3
48+
// CHECK: ret { i64, i64, i64, i64 } [[R8]]
49+
50+
public func test_peepholes(_ v: BigStruct) -> BigStruct {
51+
return useBigStruct(v)
52+
}

0 commit comments

Comments
 (0)