Skip to content

Commit e8e45f5

Browse files
htyutstellar
authored andcommitted
[CSSPGO] Unblock optimizations with pseudo probe instrumentation.
The IR/MIR pseudo probe intrinsics don't get materialized into real machine instructions and therefore they don't incur runtime cost directly. However, they come with indirect cost by blocking certain optimizations. Some of the blocking are intentional (such as blocking code merge) for better counts quality while the others are accidental. This change unblocks perf-critical optimizations that do not affect counts quality. They include: 1. IR InstCombine, sinking load operation to shorten lifetimes. 2. MIR LiveRangeShrink, similar to #1 3. MIR TwoAddressInstructionPass, i.e, opeq transform 4. MIR function argument copy elision 5. IR stack protection. (though not perf-critical but nice to have). Reviewed By: wmi Differential Revision: https://reviews.llvm.org/D95982
1 parent 1071279 commit e8e45f5

15 files changed

+209
-13
lines changed

llvm/include/llvm/CodeGen/MachineInstr.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1156,6 +1156,10 @@ class MachineInstr
11561156
return getOpcode() == TargetOpcode::CFI_INSTRUCTION;
11571157
}
11581158

1159+
bool isPseudoProbe() const {
1160+
return getOpcode() == TargetOpcode::PSEUDO_PROBE;
1161+
}
1162+
11591163
// True if the instruction represents a position in the function.
11601164
bool isPosition() const { return isLabel() || isCFIInstruction(); }
11611165

@@ -1165,6 +1169,9 @@ class MachineInstr
11651169
bool isDebugInstr() const {
11661170
return isDebugValue() || isDebugLabel() || isDebugRef();
11671171
}
1172+
bool isDebugOrPseudoInstr() const {
1173+
return isDebugInstr() || isPseudoProbe();
1174+
}
11681175

11691176
bool isDebugOffsetImm() const { return getDebugOffset().isImm(); }
11701177

llvm/include/llvm/IR/Instruction.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -654,6 +654,9 @@ class Instruction : public User,
654654
/// llvm.lifetime.end marker.
655655
bool isLifetimeStartOrEnd() const;
656656

657+
/// Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst.
658+
bool isDebugOrPseudoInst() const;
659+
657660
/// Return a pointer to the next non-debug instruction in the same basic
658661
/// block as 'this', or nullptr if no such instruction exists. Skip any pseudo
659662
/// operations if \c SkipPseudoOp is true.

llvm/lib/CodeGen/LiveRangeShrink.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,8 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) {
156156
// If MI has side effects, it should become a barrier for code motion.
157157
// IOM is rebuild from the next instruction to prevent later
158158
// instructions from being moved before this MI.
159-
if (MI.hasUnmodeledSideEffects() && Next != MBB.end()) {
159+
if (MI.hasUnmodeledSideEffects() && !MI.isPseudoProbe() &&
160+
Next != MBB.end()) {
160161
BuildInstOrderMap(Next, IOM);
161162
SawStore = false;
162163
}

llvm/lib/CodeGen/MachineInstr.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1462,7 +1462,8 @@ bool MachineInstr::hasUnmodeledSideEffects() const {
14621462
}
14631463

14641464
bool MachineInstr::isLoadFoldBarrier() const {
1465-
return mayStore() || isCall() || hasUnmodeledSideEffects();
1465+
return mayStore() || isCall() ||
1466+
(hasUnmodeledSideEffects() && !isPseudoProbe());
14661467
}
14671468

14681469
/// allDefsAreDead - Return true if all the defs of this instruction are dead.

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9660,8 +9660,9 @@ findArgumentCopyElisionCandidates(const DataLayout &DL,
96609660
// We will look through cast uses, so ignore them completely.
96619661
if (I.isCast())
96629662
continue;
9663-
// Ignore debug info intrinsics, they don't escape or store to allocas.
9664-
if (isa<DbgInfoIntrinsic>(I))
9663+
// Ignore debug info and pseudo op intrinsics, they don't escape or store
9664+
// to allocas.
9665+
if (I.isDebugOrPseudoInst())
96659666
continue;
96669667
// This is an unknown instruction. Assume it escapes or writes to all
96679668
// static alloca operands.

llvm/lib/CodeGen/StackProtector.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ bool StackProtector::HasAddressTaken(const Instruction *AI,
192192
// Ignore intrinsics that do not become real instructions.
193193
// TODO: Narrow this to intrinsics that have store-like effects.
194194
const auto *CI = cast<CallInst>(I);
195-
if (!isa<DbgInfoIntrinsic>(CI) && !CI->isLifetimeStartOrEnd())
195+
if (!CI->isDebugOrPseudoInst() && !CI->isLifetimeStartOrEnd())
196196
return true;
197197
break;
198198
}

llvm/lib/CodeGen/TwoAddressInstructionPass.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -801,8 +801,8 @@ bool TwoAddressInstructionPass::rescheduleMIBelowKill(
801801
MachineBasicBlock::iterator KillPos = KillMI;
802802
++KillPos;
803803
for (MachineInstr &OtherMI : make_range(End, KillPos)) {
804-
// Debug instructions cannot be counted against the limit.
805-
if (OtherMI.isDebugInstr())
804+
// Debug or pseudo instructions cannot be counted against the limit.
805+
if (OtherMI.isDebugOrPseudoInstr())
806806
continue;
807807
if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost.
808808
return false;
@@ -974,8 +974,8 @@ bool TwoAddressInstructionPass::rescheduleKillAboveMI(
974974
unsigned NumVisited = 0;
975975
for (MachineInstr &OtherMI :
976976
make_range(mi, MachineBasicBlock::iterator(KillMI))) {
977-
// Debug instructions cannot be counted against the limit.
978-
if (OtherMI.isDebugInstr())
977+
// Debug or pseudo instructions cannot be counted against the limit.
978+
if (OtherMI.isDebugOrPseudoInstr())
979979
continue;
980980
if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost.
981981
return false;

llvm/lib/IR/Instruction.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -651,6 +651,10 @@ bool Instruction::isLifetimeStartOrEnd() const {
651651
return ID == Intrinsic::lifetime_start || ID == Intrinsic::lifetime_end;
652652
}
653653

654+
bool Instruction::isDebugOrPseudoInst() const {
655+
return isa<DbgInfoIntrinsic>(this) || isa<PseudoProbeInst>(this);
656+
}
657+
654658
const Instruction *
655659
Instruction::getNextNonDebugInstruction(bool SkipPseudoOp) const {
656660
for (const Instruction *I = getNextNode(); I; I = I->getNextNode())

llvm/lib/Transforms/IPO/FunctionAttrs.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,13 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
149149
if (isNoModRef(MRI))
150150
continue;
151151

152+
// A pseudo probe call shouldn't change any function attribute since it
153+
// doesn't translate to a real instruction. It comes with a memory access
154+
// tag to prevent itself being removed by optimizations and not block
155+
// other instructions being optimized.
156+
if (isa<PseudoProbeInst>(I))
157+
continue;
158+
152159
if (!AliasAnalysis::onlyAccessesArgPointees(MRB)) {
153160
// The call could access any memory. If that includes writes, note it.
154161
if (isModSet(MRI))

llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -592,8 +592,14 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
592592
BasicBlock::iterator BBI = L->getIterator(), E = L->getParent()->end();
593593

594594
for (++BBI; BBI != E; ++BBI)
595-
if (BBI->mayWriteToMemory())
595+
if (BBI->mayWriteToMemory()) {
596+
// Calls that only access inaccessible memory do not block sinking the
597+
// load.
598+
if (auto *CB = dyn_cast<CallBase>(BBI))
599+
if (CB->onlyAccessesInaccessibleMemory())
600+
continue;
596601
return false;
602+
}
597603

598604
// Check for non-address taken alloca. If not address-taken already, it isn't
599605
// profitable to do this xform.

0 commit comments

Comments
 (0)