Skip to content

Commit 8047eb8

Browse files
authored
Merge branch 'main' into strong-siv-overflow-2
2 parents 90f30bb + 15bbdd1 commit 8047eb8

File tree

13 files changed

+998
-91
lines changed

13 files changed

+998
-91
lines changed

lldb/CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,12 @@ if (LLDB_ENABLE_PYTHON)
8787
set(LLDB_PYTHON_EXT_SUFFIX "_d${LLDB_PYTHON_EXT_SUFFIX}")
8888
endif()
8989
endif()
90+
if(TARGET Python3::Python)
91+
get_target_property(_Python3_LIB_PATH Python3::Python IMPORTED_LIBRARY_LOCATION)
92+
if(_Python3_LIB_PATH)
93+
get_filename_component(LLDB_PYTHON_RUNTIME_LIBRARY_FILENAME "${_Python3_LIB_PATH}" NAME)
94+
endif()
95+
endif()
9096
endif ()
9197

9298
if (LLDB_ENABLE_LUA)

lldb/tools/driver/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ add_dependencies(lldb
3737
if(DEFINED LLDB_PYTHON_DLL_RELATIVE_PATH)
3838
target_compile_definitions(lldb PRIVATE LLDB_PYTHON_DLL_RELATIVE_PATH="${LLDB_PYTHON_DLL_RELATIVE_PATH}")
3939
endif()
40+
if(DEFINED LLDB_PYTHON_RUNTIME_LIBRARY_FILENAME)
41+
target_compile_definitions(lldb PRIVATE LLDB_PYTHON_RUNTIME_LIBRARY_FILENAME="${LLDB_PYTHON_RUNTIME_LIBRARY_FILENAME}")
42+
endif()
4043

4144
if(LLDB_BUILD_FRAMEWORK)
4245
# In the build-tree, we know the exact path to the framework directory.

lldb/tools/driver/Driver.cpp

Lines changed: 56 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -433,7 +433,8 @@ SBError Driver::ProcessArgs(const opt::InputArgList &args, bool &exiting) {
433433
return error;
434434
}
435435

436-
#if defined(_WIN32) && defined(LLDB_PYTHON_DLL_RELATIVE_PATH)
436+
#ifdef _WIN32
437+
#ifdef LLDB_PYTHON_DLL_RELATIVE_PATH
437438
/// Returns the full path to the lldb.exe executable.
438439
inline std::wstring GetPathToExecutableW() {
439440
// Iterate until we reach the Windows API maximum path length (32,767).
@@ -447,30 +448,73 @@ inline std::wstring GetPathToExecutableW() {
447448
return L"";
448449
}
449450

450-
/// Resolve the full path of the directory defined by
451+
/// \brief Resolve the full path of the directory defined by
451452
/// LLDB_PYTHON_DLL_RELATIVE_PATH. If it exists, add it to the list of DLL
452453
/// search directories.
453-
void AddPythonDLLToSearchPath() {
454+
/// \return `true` if the library was added to the search path.
455+
/// `false` otherwise.
456+
bool AddPythonDLLToSearchPath() {
454457
std::wstring modulePath = GetPathToExecutableW();
455-
if (modulePath.empty()) {
456-
llvm::errs() << "error: unable to find python.dll." << '\n';
457-
return;
458-
}
458+
if (modulePath.empty())
459+
return false;
459460

460461
SmallVector<char, MAX_PATH> utf8Path;
461462
if (sys::windows::UTF16ToUTF8(modulePath.c_str(), modulePath.length(),
462463
utf8Path))
463-
return;
464+
return false;
464465
sys::path::remove_filename(utf8Path);
465466
sys::path::append(utf8Path, LLDB_PYTHON_DLL_RELATIVE_PATH);
466467
sys::fs::make_absolute(utf8Path);
467468

468469
SmallVector<wchar_t, 1> widePath;
469470
if (sys::windows::widenPath(utf8Path.data(), widePath))
470-
return;
471+
return false;
471472

472473
if (sys::fs::exists(utf8Path))
473-
SetDllDirectoryW(widePath.data());
474+
return SetDllDirectoryW(widePath.data());
475+
return false;
476+
}
477+
#endif
478+
479+
#ifdef LLDB_PYTHON_RUNTIME_LIBRARY_FILENAME
480+
/// Returns whether `python3x.dll` is in the DLL search path.
481+
bool IsPythonDLLInPath() {
482+
#define WIDEN2(x) L##x
483+
#define WIDEN(x) WIDEN2(x)
484+
WCHAR foundPath[MAX_PATH];
485+
DWORD result =
486+
SearchPathW(nullptr, WIDEN(LLDB_PYTHON_RUNTIME_LIBRARY_FILENAME), nullptr,
487+
MAX_PATH, foundPath, nullptr);
488+
#undef WIDEN2
489+
#undef WIDEN
490+
491+
return result > 0;
492+
}
493+
#endif
494+
495+
/// Try to setup the DLL search path for the Python Runtime Library
496+
/// (python3xx.dll).
497+
///
498+
/// If `LLDB_PYTHON_RUNTIME_LIBRARY_FILENAME` is set, we first check if
499+
/// python3xx.dll is in the search path. If it's not, we try to add it and
500+
/// check for it a second time.
501+
/// If only `LLDB_PYTHON_DLL_RELATIVE_PATH` is set, we try to add python3xx.dll
502+
/// to the search path python.dll is already in the search path or not.
503+
void SetupPythonRuntimeLibrary() {
504+
#ifdef LLDB_PYTHON_RUNTIME_LIBRARY_FILENAME
505+
if (IsPythonDLLInPath())
506+
return;
507+
#ifdef LLDB_PYTHON_DLL_RELATIVE_PATH
508+
if (AddPythonDLLToSearchPath() && IsPythonDLLInPath())
509+
return;
510+
#endif
511+
llvm::errs() << "error: unable to find '"
512+
<< LLDB_PYTHON_RUNTIME_LIBRARY_FILENAME << "'.\n";
513+
return;
514+
#elif defined(LLDB_PYTHON_DLL_RELATIVE_PATH)
515+
if (!AddPythonDLLToSearchPath())
516+
llvm::errs() << "error: unable to find the Python runtime library.\n";
517+
#endif
474518
}
475519
#endif
476520

@@ -776,8 +820,8 @@ int main(int argc, char const *argv[]) {
776820
"~/Library/Logs/DiagnosticReports/.\n");
777821
#endif
778822

779-
#if defined(_WIN32) && defined(LLDB_PYTHON_DLL_RELATIVE_PATH)
780-
AddPythonDLLToSearchPath();
823+
#ifdef _WIN32
824+
SetupPythonRuntimeLibrary();
781825
#endif
782826

783827
// Parse arguments.

llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp

Lines changed: 55 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -181,14 +181,52 @@ BasicBlock *AMDGPUUnifyDivergentExitNodesImpl::unifyReturnBlockSet(
181181
return NewRetBlock;
182182
}
183183

184+
static BasicBlock *
185+
createDummyReturnBlock(Function &F,
186+
SmallVector<BasicBlock *, 4> &ReturningBlocks) {
187+
BasicBlock *DummyReturnBB =
188+
BasicBlock::Create(F.getContext(), "DummyReturnBlock", &F);
189+
Type *RetTy = F.getReturnType();
190+
Value *RetVal = RetTy->isVoidTy() ? nullptr : PoisonValue::get(RetTy);
191+
ReturnInst::Create(F.getContext(), RetVal, DummyReturnBB);
192+
ReturningBlocks.push_back(DummyReturnBB);
193+
return DummyReturnBB;
194+
}
195+
196+
/// Handle conditional branch instructions (-> 2 targets) and callbr
197+
/// instructions with N targets.
198+
static void handleNBranch(Function &F, BasicBlock *BB, Instruction *BI,
199+
BasicBlock *DummyReturnBB,
200+
std::vector<DominatorTree::UpdateType> &Updates) {
201+
SmallVector<BasicBlock *, 2> Successors(successors(BB));
202+
203+
// Create a new transition block to hold the conditional branch.
204+
BasicBlock *TransitionBB = BB->splitBasicBlock(BI, "TransitionBlock");
205+
206+
Updates.reserve(Updates.size() + 2 * Successors.size() + 2);
207+
208+
// 'Successors' become successors of TransitionBB instead of BB,
209+
// and TransitionBB becomes a single successor of BB.
210+
Updates.emplace_back(DominatorTree::Insert, BB, TransitionBB);
211+
for (BasicBlock *Successor : Successors) {
212+
Updates.emplace_back(DominatorTree::Insert, TransitionBB, Successor);
213+
Updates.emplace_back(DominatorTree::Delete, BB, Successor);
214+
}
215+
216+
// Create a branch that will always branch to the transition block and
217+
// references DummyReturnBB.
218+
BB->getTerminator()->eraseFromParent();
219+
BranchInst::Create(TransitionBB, DummyReturnBB,
220+
ConstantInt::getTrue(F.getContext()), BB);
221+
Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB);
222+
}
223+
184224
bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,
185225
const PostDominatorTree &PDT,
186226
const UniformityInfo &UA) {
187-
assert(hasOnlySimpleTerminator(F) && "Unsupported block terminator.");
188-
189227
if (PDT.root_size() == 0 ||
190228
(PDT.root_size() == 1 &&
191-
!isa<BranchInst>(PDT.getRoot()->getTerminator())))
229+
!isa<BranchInst, CallBrInst>(PDT.getRoot()->getTerminator())))
192230
return false;
193231

194232
// Loop over all of the blocks in a function, tracking all of the blocks that
@@ -222,46 +260,28 @@ bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,
222260
if (HasDivergentExitBlock)
223261
UnreachableBlocks.push_back(BB);
224262
} else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
225-
226-
ConstantInt *BoolTrue = ConstantInt::getTrue(F.getContext());
227-
if (DummyReturnBB == nullptr) {
228-
DummyReturnBB =
229-
BasicBlock::Create(F.getContext(), "DummyReturnBlock", &F);
230-
Type *RetTy = F.getReturnType();
231-
Value *RetVal = RetTy->isVoidTy() ? nullptr : PoisonValue::get(RetTy);
232-
ReturnInst::Create(F.getContext(), RetVal, DummyReturnBB);
233-
ReturningBlocks.push_back(DummyReturnBB);
234-
}
263+
if (!DummyReturnBB)
264+
DummyReturnBB = createDummyReturnBlock(F, ReturningBlocks);
235265

236266
if (BI->isUnconditional()) {
237267
BasicBlock *LoopHeaderBB = BI->getSuccessor(0);
238268
BI->eraseFromParent(); // Delete the unconditional branch.
239269
// Add a new conditional branch with a dummy edge to the return block.
240-
BranchInst::Create(LoopHeaderBB, DummyReturnBB, BoolTrue, BB);
241-
Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB);
242-
} else { // Conditional branch.
243-
SmallVector<BasicBlock *, 2> Successors(successors(BB));
244-
245-
// Create a new transition block to hold the conditional branch.
246-
BasicBlock *TransitionBB = BB->splitBasicBlock(BI, "TransitionBlock");
247-
248-
Updates.reserve(Updates.size() + 2 * Successors.size() + 2);
249-
250-
// 'Successors' become successors of TransitionBB instead of BB,
251-
// and TransitionBB becomes a single successor of BB.
252-
Updates.emplace_back(DominatorTree::Insert, BB, TransitionBB);
253-
for (BasicBlock *Successor : Successors) {
254-
Updates.emplace_back(DominatorTree::Insert, TransitionBB, Successor);
255-
Updates.emplace_back(DominatorTree::Delete, BB, Successor);
256-
}
257-
258-
// Create a branch that will always branch to the transition block and
259-
// references DummyReturnBB.
260-
BB->getTerminator()->eraseFromParent();
261-
BranchInst::Create(TransitionBB, DummyReturnBB, BoolTrue, BB);
270+
BranchInst::Create(LoopHeaderBB, DummyReturnBB,
271+
ConstantInt::getTrue(F.getContext()), BB);
262272
Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB);
273+
} else {
274+
handleNBranch(F, BB, BI, DummyReturnBB, Updates);
263275
}
264276
Changed = true;
277+
} else if (CallBrInst *CBI = dyn_cast<CallBrInst>(BB->getTerminator())) {
278+
if (!DummyReturnBB)
279+
DummyReturnBB = createDummyReturnBlock(F, ReturningBlocks);
280+
281+
handleNBranch(F, BB, CBI, DummyReturnBB, Updates);
282+
Changed = true;
283+
} else {
284+
llvm_unreachable("unsupported block terminator");
265285
}
266286
}
267287

llvm/lib/Transforms/Scalar/StructurizeCFG.cpp

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -558,11 +558,10 @@ void StructurizeCFG::analyzeLoops(RegionNode *N) {
558558
} else {
559559
// Test for successors as back edge
560560
BasicBlock *BB = N->getNodeAs<BasicBlock>();
561-
BranchInst *Term = cast<BranchInst>(BB->getTerminator());
562-
563-
for (BasicBlock *Succ : Term->successors())
564-
if (Visited.count(Succ))
565-
Loops[Succ] = BB;
561+
if (BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator()))
562+
for (BasicBlock *Succ : Term->successors())
563+
if (Visited.count(Succ))
564+
Loops[Succ] = BB;
566565
}
567566
}
568567

@@ -594,7 +593,7 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {
594593

595594
for (BasicBlock *P : predecessors(BB)) {
596595
// Ignore it if it's a branch from outside into our region entry
597-
if (!ParentRegion->contains(P))
596+
if (!ParentRegion->contains(P) || !dyn_cast<BranchInst>(P->getTerminator()))
598597
continue;
599598

600599
Region *R = RI->getRegionFor(P);
@@ -1402,13 +1401,17 @@ bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) {
14021401
/// Run the transformation for each region found
14031402
bool StructurizeCFG::run(Region *R, DominatorTree *DT,
14041403
const TargetTransformInfo *TTI) {
1405-
if (R->isTopLevelRegion())
1404+
// CallBr and its corresponding direct target blocks are for now ignored by
1405+
// this pass. This is not a limitation for the currently intended uses cases
1406+
// of callbr in the AMDGPU backend.
1407+
// Parent and child regions are not affected by this (current) restriction.
1408+
// See `llvm/test/Transforms/StructurizeCFG/callbr.ll` for details.
1409+
if (R->isTopLevelRegion() || isa<CallBrInst>(R->getEntry()->getTerminator()))
14061410
return false;
14071411

14081412
this->DT = DT;
14091413
this->TTI = TTI;
14101414
Func = R->getEntry()->getParent();
1411-
assert(hasOnlySimpleTerminator(*Func) && "Unsupported block terminator.");
14121415

14131416
ParentRegion = R;
14141417

llvm/lib/Transforms/Utils/UnifyLoopExits.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
158158
SmallVector<BasicBlock *, 8> CallBrTargetBlocksToFix;
159159
// Redirect exiting edges through a control flow hub.
160160
ControlFlowHub CHub;
161+
bool Changed = false;
161162

162163
for (unsigned I = 0; I < ExitingBlocks.size(); ++I) {
163164
BasicBlock *BB = ExitingBlocks[I];
@@ -182,6 +183,10 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
182183
bool UpdatedLI = false;
183184
BasicBlock *NewSucc =
184185
SplitCallBrEdge(BB, Succ, J, &DTU, nullptr, &LI, &UpdatedLI);
186+
// SplitCallBrEdge modifies the CFG because it creates an intermediate
187+
// block. So we need to set the changed flag no matter what the
188+
// ControlFlowHub is going to do later.
189+
Changed = true;
185190
// Even if CallBr and Succ do not have a common parent loop, we need to
186191
// add the new target block to the parent loop of the current loop.
187192
if (!UpdatedLI)
@@ -207,6 +212,7 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
207212
bool ChangedCFG;
208213
std::tie(LoopExitBlock, ChangedCFG) = CHub.finalize(
209214
&DTU, GuardBlocks, "loop.exit", MaxBooleansInControlFlowHub.getValue());
215+
ChangedCFG |= Changed;
210216
if (!ChangedCFG)
211217
return false;
212218

llvm/test/CodeGen/AMDGPU/callbr.ll

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck %s
3+
4+
define void @callbr_inline_asm(ptr %src, ptr %dst1, ptr %dst2, i32 %c) {
5+
; CHECK-LABEL: callbr_inline_asm:
6+
; CHECK: ; %bb.0:
7+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8+
; CHECK-NEXT: flat_load_dword v0, v[0:1]
9+
; CHECK-NEXT: ;;#ASMSTART
10+
; CHECK-NEXT: v_cmp_gt_i32 vcc v6, 42; s_cbranch_vccnz .LBB0_2
11+
; CHECK-NEXT: ;;#ASMEND
12+
; CHECK-NEXT: ; %bb.1: ; %fallthrough
13+
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
14+
; CHECK-NEXT: flat_store_dword v[2:3], v0
15+
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
16+
; CHECK-NEXT: s_setpc_b64 s[30:31]
17+
; CHECK-NEXT: .LBB0_2: ; Inline asm indirect target
18+
; CHECK-NEXT: ; %indirect
19+
; CHECK-NEXT: ; Label of block must be emitted
20+
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
21+
; CHECK-NEXT: flat_store_dword v[4:5], v0
22+
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
23+
; CHECK-NEXT: s_setpc_b64 s[30:31]
24+
%a = load i32, ptr %src, align 4
25+
callbr void asm "v_cmp_gt_i32 vcc $0, 42; s_cbranch_vccnz ${1:l}", "r,!i"(i32 %c) to label %fallthrough [label %indirect]
26+
fallthrough:
27+
store i32 %a, ptr %dst1, align 4
28+
br label %ret
29+
indirect:
30+
store i32 %a, ptr %dst2, align 4
31+
br label %ret
32+
ret:
33+
ret void
34+
}
35+
36+
define void @callbr_self_loop(i1 %c) {
37+
; CHECK-LABEL: callbr_self_loop:
38+
; CHECK: ; %bb.0:
39+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
40+
; CHECK-NEXT: .LBB1_1: ; %callbr
41+
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
42+
; CHECK-NEXT: ;;#ASMSTART
43+
; CHECK-NEXT: ;;#ASMEND
44+
; CHECK-NEXT: s_branch .LBB1_1
45+
; CHECK-NEXT: .LBB1_2: ; Inline asm indirect target
46+
; CHECK-NEXT: ; %callbr.target.ret
47+
; CHECK-NEXT: ; Label of block must be emitted
48+
; CHECK-NEXT: s_setpc_b64 s[30:31]
49+
br label %callbr
50+
callbr:
51+
callbr void asm "", "!i"() to label %callbr [label %ret]
52+
ret:
53+
ret void
54+
}

0 commit comments

Comments
 (0)