-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[Inline][WinEH] Fix try scopes leaking to caller on inline #164170
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-llvm-transforms Author: Mirko (MuellerMP) ChangesThis fixes issue #164169 When inlining functions compiled with -EHa, try scope terminators might need to be inserted before inlined returns. Patch is 27.24 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/164170.diff 4 Files Affected:
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index f49fbf8807bac..08a66530b67ad 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -21,6 +21,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/CtxProfAnalysis.h"
@@ -2210,6 +2211,138 @@ inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind,
}
}
+struct SehTryScope {
+ llvm::BasicBlock *Start;
+ llvm::InvokeInst *Invoke;
+};
+
+// Determine SEH try scopes and their terminators and order them by dominance.
+static SmallVector<SehTryScope, 1> GetOrderedSehTryScopes(Function *Func,
+ DominatorTree &DT) {
+ SmallVector<SehTryScope, 1> Scopes{};
+ bool DTCalculated = false;
+
+ for (auto &BB : *Func) {
+ auto *TI = BB.getTerminator();
+ if (auto *II = dyn_cast<InvokeInst>(TI)) {
+ auto *Call = cast<CallBase>(II);
+ const auto *Fn = Call->getCalledFunction();
+ if (!Fn || !Fn->isIntrinsic())
+ continue;
+
+ if (Fn->getIntrinsicID() != Intrinsic::seh_try_begin)
+ continue;
+
+ if (!DTCalculated) {
+ DT.recalculate(*Func);
+ DTCalculated = true;
+ }
+
+ auto InsertIt = Scopes.end();
+ if (!Scopes.empty())
+ for (auto ScopeIt = Scopes.begin(); ScopeIt != Scopes.end(); ++ScopeIt)
+ if (DT.dominates(ScopeIt->Start, &BB))
+ InsertIt = std::next(ScopeIt);
+
+ Scopes.insert(InsertIt, {&BB, II});
+ }
+ }
+
+ return Scopes;
+}
+
+// Find, if present, the outermost unterminated try scope for the input block.
+static SehTryScope *
+GetOutermostUnterminatedTryScope(llvm::BasicBlock *Block,
+ SmallVector<SehTryScope, 1> &Scopes,
+ DominatorTree &DT) {
+ SehTryScope *UnterminatedScope{nullptr};
+
+ for (auto ScopeIt = Scopes.rbegin(); ScopeIt != Scopes.rend(); ++ScopeIt) {
+ // Return might not be in a try scope.
+ if (!DT.dominates(ScopeIt->Start, Block))
+ continue;
+
+ // If there is a catch which connects to the return, the try scope is
+ // considered to be terminated.
+ if (isPotentiallyReachable(ScopeIt->Invoke->getUnwindDest(), Block, nullptr,
+ &DT))
+ continue;
+
+ UnterminatedScope = &*ScopeIt;
+ }
+
+ return UnterminatedScope;
+}
+
+struct UnterminatedTryScope {
+ llvm::BasicBlock *ReturnBlock;
+ llvm::BasicBlock *TryStart;
+ llvm::BasicBlock *UnwindDestination;
+};
+
+// For Windows -EHa there may be the case of try scopes spanning over a return
+// instruction. If no other return out of the function is given (e.g. by letting
+// all other blocks terminate with unreachable) the try scope is considered
+// unterminated upon inlining. To avoid leaking the try scopes over to a caller
+// we need to add a terminator for the outermost unterminated try scope.
+static SmallVector<UnterminatedTryScope, 1>
+GetUnterminatedTryScopes(Function *CalledFunc) {
+ SmallVector<UnterminatedTryScope, 1> UnterminatedScopes;
+ DominatorTree DT;
+ auto Scopes = GetOrderedSehTryScopes(CalledFunc, DT);
+ if (Scopes.empty())
+ return UnterminatedScopes;
+
+ SmallVector<ReturnInst *, 8> Returns;
+ for (auto &BB : *CalledFunc)
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator()))
+ Returns.push_back(RI);
+
+ for (auto *RI : Returns) {
+ auto *Block = RI->getParent();
+ auto *OutermostScope = GetOutermostUnterminatedTryScope(Block, Scopes, DT);
+ if (!OutermostScope)
+ continue;
+
+ UnterminatedScopes.push_back({Block, OutermostScope->Invoke->getParent(),
+ OutermostScope->Invoke->getUnwindDest()});
+ }
+
+ return UnterminatedScopes;
+}
+
+// Insert terminator for unterminated try scopes
+static void HandleUnterminatedTryScopes(
+ Function *Caller,
+ SmallVector<UnterminatedTryScope, 1> &UnterminatedTryScopes,
+ ValueToValueMapTy &VMap) {
+ for (auto &Scope : UnterminatedTryScopes) {
+ auto *ReturnBlock = cast<BasicBlock>(VMap[Scope.ReturnBlock]);
+ auto *TryStart = cast<BasicBlock>(VMap[Scope.TryStart]);
+ auto *UnwindDestination = cast<BasicBlock>(VMap[Scope.UnwindDestination]);
+ // did not survive (partial) inlining - ignore
+ if (!ReturnBlock || !TryStart || !UnwindDestination)
+ continue;
+
+ auto *Mod = (llvm::Module *)Caller->getParent();
+ auto *SehTryEndFn =
+ Intrinsic::getOrInsertDeclaration(Mod, Intrinsic::seh_try_end);
+ auto *BB = ReturnBlock->splitBasicBlockBefore(ReturnBlock->getTerminator(),
+ "try.end");
+ BB->getTerminator()->eraseFromParent();
+ IRBuilder<>(BB).CreateInvoke(SehTryEndFn, ReturnBlock, UnwindDestination);
+
+ for (auto &Insn : *UnwindDestination) {
+ if (auto *Phi = dyn_cast<PHINode>(&Insn)) {
+ auto *ValType = Phi->getIncomingValueForBlock(TryStart)->getType();
+ auto *Val = PoisonValue::get(ValType);
+ Phi->addIncoming(Val, BB);
+ }
+ }
+ }
+}
+
// In contextual profiling, when an inline succeeds, we want to remap the
// indices of the callee into the index space of the caller. We can't just leave
// them as-is because the same callee may appear in other places in this caller
@@ -2625,6 +2758,7 @@ void llvm::InlineFunctionImpl(CallBase &CB, InlineFunctionInfo &IFI,
SmallVector<ReturnInst*, 8> Returns;
ClonedCodeInfo InlinedFunctionInfo;
Function::iterator FirstNewBlock;
+ SmallVector<UnterminatedTryScope, 1> UnterminatedTryScopes;
// GC poses two hazards to inlining, which only occur when the callee has GC:
// 1. If the caller has no GC, then the callee's GC must be propagated to the
@@ -2648,6 +2782,11 @@ void llvm::InlineFunctionImpl(CallBase &CB, InlineFunctionInfo &IFI,
assert(Caller->getPersonalityFn()->stripPointerCasts() ==
CalledPersonality &&
"CanInlineCallSite should have verified compatible personality");
+
+ const llvm::Module* M = CalledFunc->getParent();
+ if (M->getModuleFlag("eh-asynch")) {
+ UnterminatedTryScopes = GetUnterminatedTryScopes(CalledFunc);
+ }
}
{ // Scope to destroy VMap after cloning.
@@ -2837,6 +2976,8 @@ void llvm::InlineFunctionImpl(CallBase &CB, InlineFunctionInfo &IFI,
for (Instruction &I : NewBlock)
if (auto *II = dyn_cast<AssumeInst>(&I))
IFI.GetAssumptionCache(*Caller).registerAssumption(II);
+
+ HandleUnterminatedTryScopes(Caller, UnterminatedTryScopes, VMap);
}
if (IFI.ConvergenceControlToken) {
diff --git a/llvm/test/Transforms/Inline/eha-try-fixup-multiple-scopes.ll b/llvm/test/Transforms/Inline/eha-try-fixup-multiple-scopes.ll
new file mode 100644
index 0000000000000..21e88308a848f
--- /dev/null
+++ b/llvm/test/Transforms/Inline/eha-try-fixup-multiple-scopes.ll
@@ -0,0 +1,207 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes=inline -S | FileCheck %s
+; Check that both unterminated try scopes of ExitOnThrow are terminated upon inlining into main.
+; The try scope of main should not get an additional terminator.
+
+define i32 @ExitOnThrow(i32 %argc) #0 personality ptr @__CxxFrameHandler3 {
+; CHECK-LABEL: define i32 @ExitOnThrow(
+; CHECK-SAME: i32 [[ARGC:%.*]]) #[[ATTR0:[0-9]+]] personality ptr @__CxxFrameHandler3 {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: invoke void @llvm.seh.try.begin()
+; CHECK-NEXT: to label %[[INVOKE_CONT_I:.*]] unwind label %[[CATCH_DISPATCH_I:.*]]
+; CHECK: [[INVOKE_CONT_I]]:
+; CHECK-NEXT: [[CALL_I:%.*]] = invoke i32 @ThrowException()
+; CHECK-NEXT: to label %[[TRY_CONT_I:.*]] unwind label %[[CATCH_DISPATCH_I]]
+; CHECK: [[CATCH_DISPATCH_I]]:
+; CHECK-NEXT: [[TMP0:%.*]] = catchswitch within none [label %catch.i] unwind to caller
+; CHECK: [[CATCH_I:.*:]]
+; CHECK-NEXT: [[TMP1:%.*]] = catchpad within [[TMP0]] [ptr null, i32 0, ptr null]
+; CHECK-NEXT: catchret from [[TMP1]] to label %[[TRY_CONT_I]]
+; CHECK: [[TRY_CONT_I]]:
+; CHECK-NEXT: invoke void @llvm.seh.try.begin()
+; CHECK-NEXT: to label %[[INVOKE_CONT2_I:.*]] unwind label %[[CATCH_DISPATCH3:.*]]
+; CHECK: [[INVOKE_CONT2_I]]:
+; CHECK-NEXT: invoke void @llvm.seh.try.begin()
+; CHECK-NEXT: to label %[[INVOKE_CONT1:.*]] unwind label %[[CATCH_DISPATCH:.*]]
+; CHECK: [[INVOKE_CONT1]]:
+; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[ARGC]], 0
+; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label %[[IF_THEN_I:.*]], label %[[IF_END_I:.*]]
+; CHECK: [[IF_THEN_I]]:
+; CHECK-NEXT: invoke void @ThrowException()
+; CHECK-NEXT: to label %[[UNREACHABLE:.*]] unwind label %[[CATCH_DISPATCH]]
+; CHECK: [[CATCH_DISPATCH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = catchswitch within none [label %catch] unwind label %[[CATCH_DISPATCH3]]
+; CHECK: [[CATCH:.*:]]
+; CHECK-NEXT: [[TMP3:%.*]] = catchpad within [[TMP2]] [ptr null, i32 0, ptr null]
+; CHECK-NEXT: invoke void @Exit() #[[ATTR0]] [ "funclet"(token [[TMP3]]) ]
+; CHECK-NEXT: to label %[[INVOKE_CONT2:.*]] unwind label %[[CATCH_DISPATCH3]]
+; CHECK: [[CATCH_DISPATCH3]]:
+; CHECK-NEXT: [[TMP4:%.*]] = catchswitch within none [label %catch4] unwind to caller
+; CHECK: [[CATCH4:.*:]]
+; CHECK-NEXT: [[TMP5:%.*]] = catchpad within [[TMP4]] [ptr null, i32 0, ptr null]
+; CHECK-NEXT: catchret from [[TMP5]] to label %[[TRY_CONT5:.*]]
+; CHECK: [[TRY_CONT5]]:
+; CHECK-NEXT: call void @Exit() #[[ATTR0]]
+; CHECK-NEXT: unreachable
+; CHECK: [[INVOKE_CONT2]]:
+; CHECK-NEXT: unreachable
+; CHECK: [[IF_END_I]]:
+; CHECK-NEXT: ret i32 [[ARGC]]
+; CHECK: [[UNREACHABLE]]:
+; CHECK-NEXT: unreachable
+;
+entry:
+ invoke void @llvm.seh.try.begin()
+ to label %invoke.cont.i unwind label %catch.dispatch.i
+
+invoke.cont.i: ; preds = %entry
+ %call.i = invoke i32 @ThrowException()
+ to label %try.cont.i unwind label %catch.dispatch.i
+
+catch.dispatch.i: ; preds = %try.cont9.i, %invoke.cont.i, %entry
+ %0 = catchswitch within none [label %catch.i] unwind to caller
+
+catch.i: ; preds = %catch.dispatch.i
+ %1 = catchpad within %0 [ptr null, i32 0, ptr null]
+ catchret from %1 to label %try.cont.i
+
+try.cont.i: ; preds = %catch.i, invoke.cont.i
+ invoke void @llvm.seh.try.begin()
+ to label %invoke.cont2.i unwind label %catch.dispatch3
+
+invoke.cont2.i: ; preds = %try.cont.i
+ invoke void @llvm.seh.try.begin()
+ to label %invoke.cont1 unwind label %catch.dispatch
+
+invoke.cont1: ; preds = %invoke.cont2.i
+ %tobool.not = icmp eq i32 %argc, 0
+ br i1 %tobool.not, label %if.then.i, label %if.end.i
+
+if.then.i: ; preds = %invoke.cont1
+ invoke void @ThrowException()
+ to label %unreachable unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %if.then.i, %invoke.cont2.i
+ %2 = catchswitch within none [label %catch] unwind label %catch.dispatch3
+
+catch: ; preds = %catch.dispatch
+ %3 = catchpad within %2 [ptr null, i32 0, ptr null]
+ invoke void @Exit() #0 [ "funclet"(token %3) ]
+ to label %invoke.cont2 unwind label %catch.dispatch3
+
+catch.dispatch3: ; preds = %catch, %catch.dispatch, %entry
+ %4 = catchswitch within none [label %catch4] unwind to caller
+
+catch4: ; preds = %catch.dispatch3
+ %5 = catchpad within %4 [ptr null, i32 0, ptr null]
+ catchret from %5 to label %try.cont5
+
+try.cont5: ; preds = %catch4
+ call void @Exit() #0
+ unreachable
+
+invoke.cont2: ; preds = %catch
+ unreachable
+
+if.end.i: ; preds = %invoke.cont1
+ ret i32 %argc
+
+unreachable: ; preds = %if.then
+ unreachable
+}
+
+define i32 @main(i32 noundef %argc, ptr noundef %argv) personality ptr @__CxxFrameHandler3 {
+; CHECK-LABEL: define i32 @main(
+; CHECK-SAME: i32 noundef [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) personality ptr @__CxxFrameHandler3 {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: invoke void @llvm.seh.try.begin()
+; CHECK-NEXT: to label %[[INVOKE_CONT:.*]] unwind label %[[CATCH_DISPATCH:.*]]
+; CHECK: [[CATCH_DISPATCH]]:
+; CHECK-NEXT: [[TMP0:%.*]] = catchswitch within none [label %catch] unwind to caller
+; CHECK: [[CATCH:.*]]:
+; CHECK-NEXT: [[TMP1:%.*]] = catchpad within [[TMP0]] [ptr null, i32 0, ptr null]
+; CHECK-NEXT: catchret from [[TMP1]] to label %[[CLEANUP:.*]]
+; CHECK: [[INVOKE_CONT]]:
+; CHECK-NEXT: invoke void @llvm.seh.try.begin()
+; CHECK-NEXT: to label %[[INVOKE_CONT_I_I:.*]] unwind label %[[CATCH_DISPATCH_I_I:.*]]
+; CHECK: [[INVOKE_CONT_I_I]]:
+; CHECK-NEXT: [[CALL_I_I:%.*]] = invoke i32 @ThrowException()
+; CHECK-NEXT: to label %[[TRY_CONT_I_I:.*]] unwind label %[[CATCH_DISPATCH_I_I]]
+; CHECK: [[CATCH_DISPATCH_I_I]]:
+; CHECK-NEXT: [[TMP2:%.*]] = catchswitch within none [label %catch.i.i] unwind to caller
+; CHECK: [[CATCH_I_I:.*:]]
+; CHECK-NEXT: [[TMP3:%.*]] = catchpad within [[TMP2]] [ptr null, i32 0, ptr null]
+; CHECK-NEXT: catchret from [[TMP3]] to label %[[TRY_CONT_I_I]]
+; CHECK: [[TRY_CONT_I_I]]:
+; CHECK-NEXT: invoke void @llvm.seh.try.begin()
+; CHECK-NEXT: to label %[[INVOKE_CONT2_I_I:.*]] unwind label %[[CATCH_DISPATCH3_I:.*]]
+; CHECK: [[INVOKE_CONT2_I_I]]:
+; CHECK-NEXT: invoke void @llvm.seh.try.begin()
+; CHECK-NEXT: to label %[[INVOKE_CONT1_I:.*]] unwind label %[[CATCH_DISPATCH_I:.*]]
+; CHECK: [[INVOKE_CONT1_I]]:
+; CHECK-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[ARGC]], 0
+; CHECK-NEXT: br i1 [[TOBOOL_NOT_I]], label %[[IF_THEN_I_I:.*]], label %[[TRY_END:.*]]
+; CHECK: [[IF_THEN_I_I]]:
+; CHECK-NEXT: invoke void @ThrowException()
+; CHECK-NEXT: to label %[[UNREACHABLE_I:.*]] unwind label %[[CATCH_DISPATCH_I]]
+; CHECK: [[CATCH_DISPATCH_I]]:
+; CHECK-NEXT: [[TMP4:%.*]] = catchswitch within none [label %catch.i] unwind label %[[CATCH_DISPATCH3_I]]
+; CHECK: [[CATCH_I:.*:]]
+; CHECK-NEXT: [[TMP5:%.*]] = catchpad within [[TMP4]] [ptr null, i32 0, ptr null]
+; CHECK-NEXT: invoke void @Exit() #[[ATTR0]] [ "funclet"(token [[TMP5]]) ]
+; CHECK-NEXT: to label %[[INVOKE_CONT2_I:.*]] unwind label %[[CATCH_DISPATCH3_I]]
+; CHECK: [[CATCH_DISPATCH3_I]]:
+; CHECK-NEXT: [[TMP6:%.*]] = catchswitch within none [label %catch4.i] unwind to caller
+; CHECK: [[CATCH4_I:.*:]]
+; CHECK-NEXT: [[TMP7:%.*]] = catchpad within [[TMP6]] [ptr null, i32 0, ptr null]
+; CHECK-NEXT: catchret from [[TMP7]] to label %[[TRY_CONT5_I:.*]]
+; CHECK: [[TRY_CONT5_I]]:
+; CHECK-NEXT: call void @Exit() #[[ATTR0]]
+; CHECK-NEXT: unreachable
+; CHECK: [[INVOKE_CONT2_I]]:
+; CHECK-NEXT: unreachable
+; CHECK: [[TRY_END]]:
+; CHECK-NEXT: invoke void @llvm.seh.try.end()
+; CHECK-NEXT: to label %[[EXITONTHROW_EXIT:.*]] unwind label %[[CATCH_DISPATCH3_I]]
+; CHECK: [[UNREACHABLE_I]]:
+; CHECK-NEXT: unreachable
+; CHECK: [[EXITONTHROW_EXIT]]:
+; CHECK-NEXT: [[CALL1:%.*]] = call noundef i32 @AlwaysThrows(i32 noundef [[ARGC]])
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[ARGC]], [[CALL1]]
+; CHECK-NEXT: br label %[[CLEANUP]]
+; CHECK: [[CLEANUP]]:
+; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[ADD]], %[[EXITONTHROW_EXIT]] ], [ 123, %[[CATCH]] ]
+; CHECK-NEXT: ret i32 [[RETVAL_0]]
+;
+entry:
+ invoke void @llvm.seh.try.begin()
+ to label %invoke.cont unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %entry
+ %0 = catchswitch within none [label %catch] unwind to caller
+
+catch: ; preds = %catch.dispatch
+ %1 = catchpad within %0 [ptr null, i32 0, ptr null]
+ catchret from %1 to label %cleanup
+
+invoke.cont: ; preds = %entry
+ %call = call noundef i32 @ExitOnThrow(i32 noundef %argc)
+ %call1 = call noundef i32 @AlwaysThrows(i32 noundef %call)
+ %add = add nsw i32 %call, %call1
+ br label %cleanup
+
+cleanup: ; preds = %catch, %invoke.cont
+ %retval.0 = phi i32 [ %add, %invoke.cont ], [ 123, %catch ]
+ ret i32 %retval.0
+}
+
+declare void @llvm.seh.try.begin()
+declare i32 @__CxxFrameHandler3(...)
+declare void @ThrowException()
+declare void @Exit() #0
+declare i32 @AlwaysThrows(i32 %id)
+
+attributes #0 = { noreturn }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 2, !"eh-asynch", i32 1}
diff --git a/llvm/test/Transforms/Inline/eha-try-fixup-phis.ll b/llvm/test/Transforms/Inline/eha-try-fixup-phis.ll
new file mode 100644
index 0000000000000..2fcf787239579
--- /dev/null
+++ b/llvm/test/Transforms/Inline/eha-try-fixup-phis.ll
@@ -0,0 +1,119 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes=inline -S | FileCheck %s
+; Check that the phi in catch.dispatch is adjusted for the try scope end upon inlining.
+
+define i32 @ExitOnThrow(i32 %argc) #3 personality ptr @__CxxFrameHandler3 {
+; CHECK-LABEL: define i32 @ExitOnThrow(
+; CHECK-SAME: i32 [[ARGC:%.*]]) #[[ATTR0:[0-9]+]] personality ptr @__CxxFrameHandler3 {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: invoke void @llvm.seh.try.begin()
+; CHECK-NEXT: to label %[[INVOKE_CONT:.*]] unwind label %[[CATCH_DISPATCH:.*]]
+; CHECK: [[INVOKE_CONT]]:
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[ARGC]], 0
+; CHECK-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
+; CHECK: [[IF_THEN]]:
+; CHECK-NEXT: invoke void @ThrowException() #[[ATTR2:[0-9]+]]
+; CHECK-NEXT: to label %[[UNREACHABLE:.*]] unwind label %[[CATCH_DISPATCH]]
+; CHECK: [[CATCH_DISPATCH]]:
+; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 1, %[[IF_THEN]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = catchswitch within none [label %catch] unwind to caller
+; CHECK: [[CATCH:.*:]]
+; CHECK-NEXT: [[TMP1:%.*]] = catchpad within [[TMP0]] [ptr null, i32 0, ptr null]
+; CHECK-NEXT: call void @DoSth(i32 noundef [[PHI]]) #[[ATTR3:[0-9]+]] [ "funclet"(token [[TMP1]]) ]
+; CHECK-NEXT: catchret from [[TMP1]] to label %[[TRY_CONT:.*]]
+; CHECK: [[TRY_CONT]]:
+; CHECK-NEXT: call void @Exit() #[[ATTR4:[0-9]+]]
+; CHECK-NEXT: unreachable
+; CHECK: [[IF_END]]:
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[ARGC]], 5
+; CHECK-NEXT: ret i32 [[ADD]]
+; CHECK: [[UNREACHABLE]]:
+; CHECK-NEXT: unreachable
+;
+entry:
+ invoke void @llvm.seh.try.begin()
+ to label %invoke.cont unwind label %catch.dispatch
+
+invoke.cont: ; preds = %entry
+ %cmp = icmp sgt i32 %argc, 0
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %invoke.cont
+ invoke void @ThrowException() #0
+ to label %unreachable unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %if.then, %entry
+ %phi = phi i32 [ 1, %if.then ], [ 0, %entry ]
+ %0 = catchswitch within none [label %catch] unwind to caller
+
+catch: ; preds = %catch.dispatch
+ %1 = catchpad within %0 [ptr null, i32 0, ptr null]
+ call void @DoSth(i32 noundef %phi) #1 [ "funclet"(token %1) ]
+ catchret from %1 to label %try.cont
+
+try.cont: ; preds = %catch, %invoke.cont
+ call void @Exit() #2
+ unreachable
+
+if.end: ; preds = %invoke.cont
+ %add = add nsw i32...
[truncated]
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
8de5e7c to
5393090
Compare
|
Lin AArch64 CI seems to have failed due to infrastructure issues (cant retrigger sadly). Additional Info: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can rebase/merge main to force a rebuild; arm64 bots should be fixed now, I think.
Please investigate why clang is emitting ret instructions without emitting an appropriate seh.try.end first.
|
|
||
| auto InsertIt = Scopes.end(); | ||
| if (!Scopes.empty()) | ||
| for (auto ScopeIt = Scopes.begin(); ScopeIt != Scopes.end(); ++ScopeIt) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks like it's quadratic in the number of seh.try.begin calls.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
removed this loop
should perform better now
| continue; | ||
|
|
||
| // If there is a catch which connects to the return, the try scope is | ||
| // considered to be terminated. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This seems strange at first glance.
The algorithm I'd expect would be to assign every basic block to a corresponding scope; the normal edge of seh.try.begin pushes a scope, the normal edge of seh.try.end pops a scope, all other edges make the destination inherit the scope of the predecessor. Follow the edges to assign a scope to every basic block. If anything disagrees, your scopes are broken.
Then you just need to check whether the block with the return has a null scope.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You are right that I missed the seh.try.end here.
The seh.try.end is actually only emitted for SEH (__try /__finally / __except) and not Cpp EH (try/catch).
This will also only be emitted for non cxx unwind personalities.
But since I didn't explicitly check for a CXX personality the algorithm is currently wrong.
Since this fix also works for SEH and non cpp personalities (as one can see with this sample https://godbolt.org/z/Eco55qYMv), this should also handle seh.try.end's correctly.
As to when seh.try.end's are emitted: usually for ctors/invokes inside of a __try scope.
Also in case of a __finally. But will need to do look in deeper as to why.
Now regarding the catch connection:
So the calculateCXXStateForAsynchEH function in WinEHPrepare tells us that cleanupreturns and catch returns can also be an explicit scope end. (the ToState represents the parent state - as seen in calculateCXXStateNumbers)
After a catchret or cleanupret there might be a non trivial connection back to the return block, thats why i chose the isPotentiallyReachable here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If you can make the computation line up with the way WinEHPrepare reasons about it, that's fine.
Note that WinEHPrepare is linear in the number of CFG edges, not quadratic.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
seh.try.end invokes are now also integrated if they exist.
I also added a seh specific test which on one hand makes sure that we find the seh.try.end terminator and on the other hand also asserts a correct termination for seh.try.begin invokes for a inlined callee with non cxx personality.
| auto *BB = ReturnBlock->splitBasicBlockBefore(ReturnBlock->getTerminator(), | ||
| "try.end"); | ||
| BB->getTerminator()->eraseFromParent(); | ||
| IRBuilder<>(BB).CreateInvoke(SehTryEndFn, ReturnBlock, UnwindDestination); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The strategy of inserting an seh.try.end is better than nothing, I guess. But I think it's also worth looking at clang to see if we're accidentally skipping the emission of seh.try.end in some cases; it's much easier to reason about the scopes if they're properly balanced when they're first emitted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure, but I think in the pariticular case of the sample in my issue (https://godbolt.org/z/8Gbqd6Gha) where the try ends on a return, I can at least see why there is no seh.try.end emitted.
Because where could you place it?
The return still has to be inside of the try (because in the case of async eh we are instruction precise i think and a ret could still cause a segfault - which can be handled by the catch) and emitting it after would end up in a bogus control flow / killing the terminator.
This also brings up another issue i see in my sample:
An add instruction is still being hoisted out of the try range - even if you apply my fix:
https://godbolt.org/z/nnTW1vYz6
I think this could be a bug in machine-sink, which probably isn't informed that it cannot move code out of async-eh ranges.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess this is a bit of an exaggerated example but there exists the possibility to catch a segfault on a ret right here:
https://godbolt.org/z/s13aqhfro
Now i believe this example should also hold for mutlithreaded cases, where Thread A changes the memory protection of the ret instruction while Thread B will execute it next - thus causing unwinding (catchable with EHa - as long as the stack is still valid).
Another single threaded example (also exaggerated) to reproduce this is the use of nop instructions till a page boudary is hit at a ret which is in an RW page (possible via VirtualProtect in the same thread).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The "ret" instruction in the assembly doesn't actually need to be inside the try block for compatibility, as far as I can tell. Both MSVC and LLVM are perfectly happy to emit the epilogue outside of the try block. Trying to ensure the LLVM IR ret instruction comes before the seh.try.end is not at all necessary.
For other code motion... we currently don't strictly enforce code motion in a try block with EHa. Trying to reconcile the notion of a trap with the way LLVM usually reasons about code is very hard; usually there's no restrictions on generated code if it triggers undefined behavior. LLVM tries to catch exceptions that aren't explicitly raised using RaiseException etc., but only to the extent that it can without interfering with the way optimization normally works. There's probably room for improvement, but -EHa is a pretty niche feature; nobody has really been interested in investing time to improve it beyond what's already there.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Well i acutally tried now to create a nop sled to just move the epilog and just the ret into an RW page:
The unwinder (with the help of unwindV2 or the legacy epilog handling of the unwinder i assume) will then detect that we are in an epiloge and unwind this frame virtually and pass the exception to be handled in the caller. (I can share the code if wanted - but this is obviously highly compiler version dependent how many NOP's you need to insert - according to the preceeding opcode bytes emitted)
This behavior is also described in Unwind procedure 3a) here.
Since MSVC and LLVM are always emitting the unwind range described in ip2state spanning over the epilog and ret, I assumed that in order to process if we are in an epilog, we first need to identify that we are in an unwind range.
I could be mistaken though - will need to look further into the unwinder for that.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think I'm mistaken and you are actually right -> SEH at least does not incorporate the ret in unwind range metadata.
This fixes issue llvm#164169 When inlining functions compiled with -EHa, try scope terminators might need to be inserted before inlined returns. This prevents leaking try scopes over to the caller.
| DominatorTree DT; | ||
| DT.recalculate(*ReturnBlock->getParent()); | ||
|
|
||
| for (auto &Scope : Scopes) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is still quadratic; it's O(number of ret instructions * number of scopes)
|
Thanks for your help @efriedma-quic - very much appreciated! Will close this and come back if I have a better fix. |
|
I spent a minute looking at this issue and the code, but I didn't get very far before I had to move on. It seems that the core of the issue is that the /EHa state numbering algorithm isn't resilient to unreachable block removal eliminating the scope end/close operation, which is a problem, since Clang/LLVM like to do that whenever they can. I'm not sure fixing this in the frontend is the right move. It will probably require significant careful time and though to revisit the EHa numbering algorithm. |
|
I don't think messing with the numbering algorithm helps. Fundamentally, the issue here is that after inlining, we can't tell where the scope ends: we don't explicitly end the scope, and the ret instruction goes away, so there's no marker for the end of the region. The simplest way to handle this is in the frontend, I think, explicitly closing the region before we "ret". There are other issues with unreachable code, like, we don't enforce nesting like we do catchpad/cleanuppad regions. But I don't think those are relevant for this specific testcase. |
This fixes issue #164169
When inlining functions compiled with -EHa, try scope terminators might need to be inserted before inlined returns.
This prevents leaking try scopes over to the caller.