Skip to content

Commit 8a0904b

Browse files
[Coroutines] Conditional elide coroutines based on hot/cold information
Unconditionally eliding all `[[clang::coro_await_elidable]]` coroutines is not good. For example, ``` Task bar(); Task foo(bool b) { if (b) [[unlikely]] { co_await bar(); } } ``` Assume Task is marked with [[clang::coro_await_elidable]], now we will always elide the call to bar() into the frame of foo(). But this may be a regression instead of an optimization if b is always false. This patch tries to mitigate the problem by leveraging hot/cold information. This can be optimized further in the future but at least this patch makes things better. This patch was originally written by ChuanqiXu9, but stalled during PR review because the diagnostics were not integrated with the existing optimization remarks. I rebased the original patch, integrated it with the optimization remarks, and did a couple of smaller cosmetic changes (e.g., made the test case expectations more targetted, etc.) Co-Authored-by: Chuanqi Xu <[email protected]>
1 parent bf0a6ae commit 8a0904b

File tree

3 files changed

+157
-1
lines changed

3 files changed

+157
-1
lines changed

llvm/lib/Transforms/Coroutines/CoroAnnotationElide.cpp

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@
2424
#include "llvm/IR/Instruction.h"
2525
#include "llvm/IR/Module.h"
2626
#include "llvm/IR/PassManager.h"
27+
#include "llvm/Support/BranchProbability.h"
28+
#include "llvm/Support/CommandLine.h"
29+
#include "llvm/Support/FileSystem.h"
2730
#include "llvm/Transforms/Utils/CallGraphUpdater.h"
2831
#include "llvm/Transforms/Utils/Cloning.h"
2932

@@ -33,6 +36,49 @@ using namespace llvm;
3336

3437
#define DEBUG_TYPE "coro-annotation-elide"
3538

39+
static cl::opt<float> CoroElideBranchRatio(
40+
"coro-elide-branch-ratio", cl::init(0.55), cl::Hidden,
41+
cl::desc("Minimum BranchProbability to consider a elide a coroutine."));
42+
extern cl::opt<unsigned> MinBlockCounterExecution;
43+
44+
static cl::opt<bool>
45+
PrintElidedCoroutine("print-elided-coroutine-stats", cl::init(false),
46+
cl::Hidden,
47+
cl::desc("Print stats for elided coroutine"));
48+
49+
static cl::opt<std::string>
50+
ElideStatOutput("coro-elide-stat-output", cl::init(""), cl::Hidden,
51+
cl::desc("Output file for -print-elided-coroutine-stats. "
52+
"Defaults to standard error output."));
53+
54+
// The return value is used to indicate the owner of the resources. The users
55+
// should use the output parameter.
56+
static std::unique_ptr<llvm::raw_ostream>
57+
getCoroElidedStatsOStream(llvm::raw_ostream *&OS) {
58+
if (!PrintElidedCoroutine) {
59+
OS = &llvm::nulls();
60+
return nullptr;
61+
}
62+
63+
if (ElideStatOutput.empty()) {
64+
OS = &llvm::errs();
65+
return nullptr;
66+
}
67+
68+
std::error_code EC;
69+
auto ret = std::make_unique<llvm::raw_fd_ostream>(ElideStatOutput, EC,
70+
sys::fs::OF_Append);
71+
72+
if (EC) {
73+
llvm::errs() << "llvm cannot open file: " << EC.message() << "\n";
74+
OS = &llvm::nulls();
75+
return nullptr;
76+
}
77+
78+
OS = ret.get();
79+
return ret;
80+
}
81+
3682
static Instruction *getFirstNonAllocaInTheEntryBlock(Function *F) {
3783
for (Instruction &I : F->getEntryBlock())
3884
if (!isa<AllocaInst>(&I))
@@ -145,6 +191,37 @@ PreservedAnalyses CoroAnnotationElidePass::run(LazyCallGraph::SCC &C,
145191
bool IsCallerPresplitCoroutine = Caller->isPresplitCoroutine();
146192
bool HasAttr = CB->hasFnAttr(llvm::Attribute::CoroElideSafe);
147193
if (IsCallerPresplitCoroutine && HasAttr) {
194+
195+
llvm::raw_ostream *OS = nullptr;
196+
auto _ = getCoroElidedStatsOStream(OS);
197+
assert(OS && "At least we should able to get access to standard error");
198+
199+
auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*Caller);
200+
if (BFI.getBlockFreq(CB->getParent()) <
201+
BFI.getEntryFreq()) {
202+
static BranchProbability MinBranchProbability(
203+
static_cast<int>(CoroElideBranchRatio * MinBlockCounterExecution),
204+
MinBlockCounterExecution);
205+
206+
auto Prob = BranchProbability::getBranchProbability(
207+
BFI.getBlockFreq(CB->getParent()).getFrequency(),
208+
BFI.getEntryFreq().getFrequency());
209+
210+
if (Prob < MinBranchProbability) {
211+
*OS << "Not eliding " << *CB
212+
<< " with estimated probability: " << Prob << "\n";
213+
continue;
214+
}
215+
216+
*OS << "BB Prob: \t" << Prob << "\n";
217+
} else {
218+
*OS << "BB Freq: \t"
219+
<< BFI.getBlockFreq(CB->getParent()).getFrequency() << "\n";
220+
*OS << "Entry Freq: \t" << BFI.getEntryFreq().getFrequency() << "\n";
221+
}
222+
223+
*OS << "eliding " << *CB << "\n";
224+
148225
auto *CallerN = CG.lookup(*Caller);
149226
auto *CallerC = CallerN ? CG.lookupSCC(*CallerN) : nullptr;
150227
// If CallerC is nullptr, it means LazyCallGraph hasn't visited Caller

llvm/lib/Transforms/IPO/PartialInlining.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ static cl::opt<float> MinRegionSizeRatio(
109109
"outline candidate and original function"));
110110
// Used to tune the minimum number of execution counts needed in the predecessor
111111
// block to the cold edge. ie. confidence interval.
112-
static cl::opt<unsigned>
112+
cl::opt<unsigned>
113113
MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden,
114114
cl::desc("Minimum block executions to consider "
115115
"its BranchProbabilityInfo valid"));
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
; Testing elide performed its job for calls to coroutines marked safe.
2+
; RUN: opt < %s -S -passes='cgscc(coro-annotation-elide)' -coro-elide-branch-ratio=0.55 | FileCheck %s
3+
4+
%struct.Task = type { ptr }
5+
6+
declare void @print(i32) nounwind
7+
8+
; resume part of the coroutine
9+
define fastcc void @callee.resume(ptr dereferenceable(1)) {
10+
tail call void @print(i32 0)
11+
ret void
12+
}
13+
14+
; destroy part of the coroutine
15+
define fastcc void @callee.destroy(ptr) {
16+
tail call void @print(i32 1)
17+
ret void
18+
}
19+
20+
; cleanup part of the coroutine
21+
define fastcc void @callee.cleanup(ptr) {
22+
tail call void @print(i32 2)
23+
ret void
24+
}
25+
26+
@callee.resumers = internal constant [3 x ptr] [
27+
ptr @callee.resume, ptr @callee.destroy, ptr @callee.cleanup]
28+
29+
declare void @alloc(i1) nounwind
30+
31+
; CHECK-LABEL: define ptr @callee
32+
define ptr @callee(i8 %arg) {
33+
entry:
34+
%task = alloca %struct.Task, align 8
35+
%id = call token @llvm.coro.id(i32 0, ptr null,
36+
ptr @callee,
37+
ptr @callee.resumers)
38+
%alloc = call i1 @llvm.coro.alloc(token %id)
39+
%hdl = call ptr @llvm.coro.begin(token %id, ptr null)
40+
store ptr %hdl, ptr %task
41+
ret ptr %task
42+
}
43+
44+
; CHECK-LABEL: define ptr @callee.noalloc
45+
define ptr @callee.noalloc(i8 %arg, ptr dereferenceable(32) align(8) %frame) {
46+
entry:
47+
%task = alloca %struct.Task, align 8
48+
%id = call token @llvm.coro.id(i32 0, ptr null,
49+
ptr @callee,
50+
ptr @callee.resumers)
51+
%hdl = call ptr @llvm.coro.begin(token %id, ptr null)
52+
store ptr %hdl, ptr %task
53+
ret ptr %task
54+
}
55+
56+
; CHECK-LABEL: define ptr @caller(i1 %cond)
57+
; Function Attrs: presplitcoroutine
58+
define ptr @caller(i1 %cond) #0 {
59+
entry:
60+
br i1 %cond, label %call, label %ret
61+
62+
call:
63+
%task = call ptr @callee(i8 0) #1
64+
br label %ret
65+
66+
ret:
67+
%retval = phi ptr [ %task, %call ], [ null, %entry ]
68+
ret ptr %retval
69+
; CHECK-NOT: alloca
70+
}
71+
72+
declare token @llvm.coro.id(i32, ptr, ptr, ptr)
73+
declare ptr @llvm.coro.begin(token, ptr)
74+
declare ptr @llvm.coro.frame()
75+
declare ptr @llvm.coro.subfn.addr(ptr, i8)
76+
declare i1 @llvm.coro.alloc(token)
77+
78+
attributes #0 = { presplitcoroutine }
79+
attributes #1 = { coro_elide_safe }

0 commit comments

Comments
 (0)