Skip to content

Commit 7c89aba

Browse files
gbaraldivtjnash
andauthored
Make late_gc_lowering more robust (#57380)
There are cases where we optimize the SRet more than the pass expected so try and handle those. I'm tryin to get a test for this, this is separated from #52850 to make merging both easier --------- Co-authored-by: Jameson Nash <[email protected]>
1 parent e331deb commit 7c89aba

File tree

4 files changed

+153
-36
lines changed

4 files changed

+153
-36
lines changed

src/llvm-final-gc-lowering.cpp

Lines changed: 45 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -161,21 +161,26 @@ void FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
161161
target->replaceAllUsesWith(newI);
162162
target->eraseFromParent();
163163
}
164+
bool FinalLowerGC::shouldRunFinalGC(Function &F)
165+
{
166+
bool should_run = 0;
167+
should_run |= getOrNull(jl_intrinsics ::newGCFrame) != nullptr;
168+
should_run |= getOrNull(jl_intrinsics ::getGCFrameSlot) != nullptr;
169+
should_run |= getOrNull(jl_intrinsics ::pushGCFrame) != nullptr;
170+
should_run |= getOrNull(jl_intrinsics ::popGCFrame) != nullptr;
171+
should_run |= getOrNull(jl_intrinsics ::GCAllocBytes) != nullptr;
172+
should_run |= getOrNull(jl_intrinsics ::queueGCRoot) != nullptr;
173+
should_run |= getOrNull(jl_intrinsics ::safepoint) != nullptr;
174+
return should_run;
175+
}
164176

165177
bool FinalLowerGC::runOnFunction(Function &F)
166178
{
167179
initAll(*F.getParent());
168-
if (!pgcstack_getter && !adoptthread_func) {
169-
LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Skipping function " << F.getName() << "\n");
170-
return false;
171-
}
172-
173-
// Look for a call to 'julia.get_pgcstack'.
174180
pgcstack = getPGCstack(F);
175-
if (!pgcstack) {
176-
LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Skipping function " << F.getName() << " no pgcstack\n");
177-
return false;
178-
}
181+
if (!shouldRunFinalGC(F))
182+
goto verify_skip;
183+
179184
LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Processing function " << F.getName() << "\n");
180185
queueRootFunc = getOrDeclare(jl_well_known::GCQueueRoot);
181186
smallAllocFunc = getOrDeclare(jl_well_known::GCSmallAlloc);
@@ -212,8 +217,37 @@ bool FinalLowerGC::runOnFunction(Function &F)
212217
#undef LOWER_INTRINSIC
213218
}
214219
}
215-
216220
return true;
221+
// Verify that skipping was in fact correct
222+
verify_skip:
223+
#ifdef JL_VERIFY_PASSES
224+
for (auto &BB : F) {
225+
for (auto &I : make_early_inc_range(BB)) {
226+
auto *CI = dyn_cast<CallInst>(&I);
227+
if (!CI)
228+
continue;
229+
230+
Value *callee = CI->getCalledOperand();
231+
assert(callee);
232+
auto IS_INTRINSIC = [&](auto intrinsic) {
233+
auto intrinsic2 = getOrNull(intrinsic);
234+
if (intrinsic2 == callee) {
235+
errs() << "Final-GC-lowering didn't eliminate all intrinsics'" << F.getName() << "', dumping entire module!\n\n";
236+
errs() << *F.getParent() << "\n";
237+
abort();
238+
}
239+
};
240+
IS_INTRINSIC(jl_intrinsics::newGCFrame);
241+
IS_INTRINSIC(jl_intrinsics::pushGCFrame);
242+
IS_INTRINSIC(jl_intrinsics::popGCFrame);
243+
IS_INTRINSIC(jl_intrinsics::getGCFrameSlot);
244+
IS_INTRINSIC(jl_intrinsics::GCAllocBytes);
245+
IS_INTRINSIC(jl_intrinsics::queueGCRoot);
246+
IS_INTRINSIC(jl_intrinsics::safepoint);
247+
}
248+
}
249+
#endif
250+
return false;
217251
}
218252

219253
PreservedAnalyses FinalLowerGCPass::run(Function &F, FunctionAnalysisManager &AM)

src/llvm-gc-interface-passes.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,8 @@ struct FinalLowerGC: private JuliaPassContext {
411411

412412
// Lowers a `julia.safepoint` intrinsic.
413413
void lowerSafepoint(CallInst *target, Function &F);
414+
// Check if the pass should be run
415+
bool shouldRunFinalGC(Function &F);
414416
};
415417

416418
#endif // LLVM_GC_PASSES_H

src/llvm-late-gc-lowering.cpp

Lines changed: 57 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1114,6 +1114,47 @@ void LateLowerGCFrame::FixUpRefinements(ArrayRef<int> PHINumbers, State &S)
11141114
}
11151115
}
11161116

1117+
// Look through instructions to find all possible allocas that might become the sret argument
1118+
static SmallSetVector<AllocaInst *, 8> FindSretAllocas(Value* SRetArg) {
1119+
SmallSetVector<AllocaInst *, 8> allocas;
1120+
if (AllocaInst *OneSRet = dyn_cast<AllocaInst>(SRetArg)) {
1121+
allocas.insert(OneSRet); // Found it directly
1122+
} else {
1123+
SmallSetVector<Value *, 8> worklist;
1124+
worklist.insert(SRetArg);
1125+
while (!worklist.empty()) {
1126+
Value *V = worklist.pop_back_val();
1127+
if (AllocaInst *Alloca = dyn_cast<AllocaInst>(V->stripInBoundsOffsets())) {
1128+
allocas.insert(Alloca); // Found a candidate
1129+
} else if (PHINode *Phi = dyn_cast<PHINode>(V)) {
1130+
for (Value *Incoming : Phi->incoming_values()) {
1131+
worklist.insert(Incoming);
1132+
}
1133+
} else if (SelectInst *SI = dyn_cast<SelectInst>(SRetArg)) {
1134+
auto TrueBranch = SI->getTrueValue();
1135+
auto FalseBranch = SI->getFalseValue();
1136+
if (TrueBranch && FalseBranch) {
1137+
worklist.insert(TrueBranch);
1138+
worklist.insert(FalseBranch);
1139+
} else {
1140+
llvm_dump(SI);
1141+
assert(false && "Malformed Select");
1142+
}
1143+
} else {
1144+
llvm_dump(V);
1145+
assert(false && "Unexpected SRet argument");
1146+
}
1147+
}
1148+
}
1149+
assert(allocas.size() > 0);
1150+
assert(std::all_of(allocas.begin(), allocas.end(), [&] (AllocaInst* SRetAlloca) JL_NOTSAFEPOINT {
1151+
return (SRetAlloca->getArraySize() == allocas[0]->getArraySize() &&
1152+
SRetAlloca->getAllocatedType() == allocas[0]->getAllocatedType());
1153+
}
1154+
));
1155+
return allocas;
1156+
}
1157+
11171158
State LateLowerGCFrame::LocalScan(Function &F) {
11181159
State S(F);
11191160
SmallVector<int, 8> PHINumbers;
@@ -1165,46 +1206,35 @@ State LateLowerGCFrame::LocalScan(Function &F) {
11651206
Type *ElT = getAttributeAtIndex(CI->getAttributes(), 1, Attribute::StructRet).getValueAsType();
11661207
auto tracked = CountTrackedPointers(ElT, true);
11671208
if (tracked.count) {
1168-
AllocaInst *SRet = dyn_cast<AllocaInst>((CI->arg_begin()[0])->stripInBoundsOffsets());
1169-
assert(SRet);
1170-
{
1209+
SmallSetVector<AllocaInst *, 8> allocas = FindSretAllocas((CI->arg_begin()[0])->stripInBoundsOffsets());
1210+
// We know that with the right optimizations we can forward a sret directly from an argument
1211+
// This hasn't been seen without adding IPO effects to julia functions but it's possible we need to handle that too
1212+
// If they are tracked.all we can just pass through but if they have a roots bundle it's possible we need to emit some copies ¯\_(ツ)_/¯
1213+
for (AllocaInst *SRet : allocas) {
11711214
if (!(SRet->isStaticAlloca() && isa<PointerType>(ElT) && ElT->getPointerAddressSpace() == AddressSpace::Tracked)) {
11721215
assert(!tracked.derived);
11731216
if (tracked.all) {
11741217
S.ArrayAllocas[SRet] = tracked.count * cast<ConstantInt>(SRet->getArraySize())->getZExtValue();
11751218
}
11761219
else {
11771220
Value *arg1 = (CI->arg_begin()[1])->stripInBoundsOffsets();
1221+
SmallSetVector<AllocaInst *, 8> gc_allocas = FindSretAllocas(arg1);
11781222
AllocaInst *SRet_gc = nullptr;
1179-
if (PHINode *Phi = dyn_cast<PHINode>(arg1)) {
1180-
for (Value *V : Phi->incoming_values()) {
1181-
if (AllocaInst *Alloca = dyn_cast<AllocaInst>(V->stripInBoundsOffsets())) {
1182-
if (SRet_gc == nullptr) {
1183-
SRet_gc = Alloca;
1184-
} else if (SRet_gc == Alloca) {
1185-
continue;
1186-
} else {
1187-
llvm_dump(Alloca);
1188-
llvm_dump(SRet_gc);
1189-
assert(false && "Allocas in Phi node should match");
1190-
}
1191-
} else {
1192-
llvm_dump(V->stripInBoundsOffsets());
1193-
assert(false && "Expected alloca");
1194-
}
1195-
}
1196-
} else {
1197-
SRet_gc = dyn_cast<AllocaInst>(arg1);
1223+
if (gc_allocas.size() == 1) {
1224+
SRet_gc = gc_allocas.pop_back_val();
11981225
}
1199-
if (!SRet_gc) {
1226+
else {
12001227
llvm_dump(CI);
1201-
llvm_dump(arg1);
1202-
assert(false && "Expected alloca");
1228+
for (AllocaInst *Alloca : gc_allocas) {
1229+
llvm_dump(Alloca);
1230+
}
1231+
assert(false && "Expected single alloca");
12031232
}
12041233
Type *ElT = SRet_gc->getAllocatedType();
12051234
if (!(SRet_gc->isStaticAlloca() && isa<PointerType>(ElT) && ElT->getPointerAddressSpace() == AddressSpace::Tracked)) {
12061235
S.ArrayAllocas[SRet_gc] = tracked.count * cast<ConstantInt>(SRet_gc->getArraySize())->getZExtValue();
12071236
}
1237+
break; // Found our gc roots
12081238
}
12091239
}
12101240
}
@@ -1401,6 +1431,8 @@ State LateLowerGCFrame::LocalScan(Function &F) {
14011431
return S;
14021432
}
14031433

1434+
1435+
14041436
static Value *ExtractScalar(Value *V, Type *VTy, bool isptr, ArrayRef<unsigned> Idxs, IRBuilder<> &irbuilder) {
14051437
Type *T_int32 = Type::getInt32Ty(V->getContext());
14061438
if (isptr) {

test/llvmpasses/late-lower-gc-sret.ll

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
; This file is a part of Julia. License is MIT: https://julialang.org/license
2+
3+
; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s
4+
5+
declare ptr @julia.get_pgcstack()
6+
7+
declare swiftcc void @sret_call(ptr noalias nocapture noundef nonnull sret([3 x ptr addrspace(10)]), ptr nonnull swiftself, ptr addrspace(10) nonnull)
8+
9+
define hidden swiftcc nonnull ptr addrspace(10) @sret_select(ptr nonnull swiftself %0, ptr addrspace(10) noundef nonnull align 8 dereferenceable(88) %1, i1 %unpredictable) {
10+
; CHECK-LABEL: @sret_select
11+
; CHECK: %gcframe = call ptr @julia.new_gc_frame(i32 6)
12+
; CHECK: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 3)
13+
; CHECK: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 0)
14+
; CHECK: %pgcstack = call ptr @julia.get_pgcstack()
15+
; CHECK: call void @julia.push_gc_frame(ptr %gcframe, i32 6)
16+
%pgcstack = call ptr @julia.get_pgcstack()
17+
%3 = alloca [3 x i64], align 8
18+
%4 = alloca [3 x i64], align 8
19+
%5 = select i1 %unpredictable, ptr %3, ptr %4
20+
call swiftcc void @sret_call(ptr noalias nocapture noundef nonnull sret([3 x ptr addrspace(10)]) %5, ptr nonnull swiftself %0, ptr addrspace(10) nonnull %1)
21+
; CHECK: call void @julia.pop_gc_frame(ptr %gcframe)
22+
ret ptr addrspace(10) %1
23+
}
24+
25+
define hidden swiftcc nonnull ptr addrspace(10) @sret_phi(ptr nonnull swiftself %0, ptr addrspace(10) noundef nonnull align 8 dereferenceable(88) %1, i1 %unpredictable) {
26+
top:
27+
; CHECK-LABEL: @sret_phi
28+
; CHECK: %gcframe = call ptr @julia.new_gc_frame(i32 6)
29+
; CHECK: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 3)
30+
; CHECK: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 0)
31+
; CHECK: %pgcstack = call ptr @julia.get_pgcstack()
32+
; CHECK: call void @julia.push_gc_frame(ptr %gcframe, i32 6)
33+
%pgcstack = call ptr @julia.get_pgcstack()
34+
%2 = alloca [3 x i64], align 8
35+
%3 = alloca [3 x i64], align 8
36+
br i1 %unpredictable, label %true, label %false
37+
38+
true: ; preds = %top
39+
br label %ret
40+
41+
false: ; preds = %top
42+
br label %ret
43+
44+
ret: ; preds = %false, %true
45+
%4 = phi ptr [ %2, %true ], [ %3, %false ]
46+
call swiftcc void @sret_call(ptr noalias nocapture noundef nonnull sret([3 x ptr addrspace(10)]) %4, ptr nonnull swiftself %0, ptr addrspace(10) nonnull %1)
47+
; CHECK: call void @julia.pop_gc_frame(ptr %gcframe)
48+
ret ptr addrspace(10) %1
49+
}

0 commit comments

Comments
 (0)