Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions llvm/include/llvm/IR/Instructions.h
Original file line number Diff line number Diff line change
Expand Up @@ -2822,6 +2822,11 @@ class PHINode : public Instruction {
/// non-undef value.
bool hasConstantOrUndefValue() const;

/// If the specified PHI node (possibly via other PHI nodes) merges together
/// the same or identical (i.e. Instruction::isIdenticalTo() returns true)
/// values, return one of the values, otherwise return null.
Value *hasIdenticalValue();

/// If the PHI node is complete which means all of its parent's predecessors
/// have incoming value in this PHI, return true, otherwise return false.
bool isComplete() const {
Expand Down
5 changes: 5 additions & 0 deletions llvm/include/llvm/Transforms/IPO/LowerTypeTests.h
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,11 @@ class LowerTypeTestsPass : public PassInfoMixin<LowerTypeTestsPass> {
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
};

class SimplifyTypeTestsPass : public PassInfoMixin<SimplifyTypeTestsPass> {
public:
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
};

} // end namespace llvm

#endif // LLVM_TRANSFORMS_IPO_LOWERTYPETESTS_H
17 changes: 13 additions & 4 deletions llvm/lib/CodeGen/CodeGenPrepare.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7739,9 +7739,14 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {

for (Use *U : reverse(OpsToSink)) {
auto *UI = cast<Instruction>(U->get());
if (isa<PHINode>(UI))
continue;
if (UI->getParent() == TargetBB) {
if (auto *PN = dyn_cast<PHINode>(UI)) {
auto *I0 = dyn_cast<Instruction>(PN->hasIdenticalValue());
if (!I0)
continue;
if (I0->getParent() == TargetBB &&
InstOrdering[I0] < InstOrdering[InsertPoint])
InsertPoint = I0;
} else if (UI->getParent() == TargetBB) {
if (InstOrdering[UI] < InstOrdering[InsertPoint])
InsertPoint = UI;
continue;
Expand All @@ -7753,7 +7758,11 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
DenseMap<Instruction *, Instruction *> NewInstructions;
for (Use *U : ToReplace) {
auto *UI = cast<Instruction>(U->get());
Instruction *NI = UI->clone();
Instruction *NI;
if (auto *PN = dyn_cast<PHINode>(UI))
NI = cast<Instruction>(PN->hasIdenticalValue())->clone();
else
NI = UI->clone();

if (IsHugeFunc) {
// Now we clone an instruction, its operands' defs may sink to this BB
Expand Down
35 changes: 35 additions & 0 deletions llvm/lib/IR/Instructions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
#include <cassert>
#include <cstdint>
#include <optional>
#include <set>
#include <vector>

using namespace llvm;
Expand Down Expand Up @@ -239,6 +240,40 @@ bool PHINode::hasConstantOrUndefValue() const {
return true;
}

/// If the specified PHI node (possibly via other PHI nodes) merges together the
/// same or identical (i.e. Instruction::isIdenticalTo() returns true) values,
/// return one of the values, otherwise return null.
Value *PHINode::hasIdenticalValue() {
std::vector<PHINode *> Worklist;
std::set<PHINode *> Seen;
Value *Result = nullptr;
Worklist.push_back(this);
while (!Worklist.empty()) {
PHINode *PN = Worklist.back();
Worklist.pop_back();
if (!Seen.insert(PN).second)
continue;
for (Value *V : PN->incoming_values()) {
if (auto *PN = dyn_cast<PHINode>(V)) {
Worklist.push_back(PN);
continue;
}
if (!Result) {
Result = V;
continue;
}
if (V == Result)
continue;
if (auto *I = dyn_cast<Instruction>(V))
if (auto *ResultI = dyn_cast<Instruction>(Result))
if (I->isIdenticalTo(ResultI))
continue;
return nullptr;
}
}
return Result;
}

//===----------------------------------------------------------------------===//
// LandingPadInst Implementation
//===----------------------------------------------------------------------===//
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Passes/PassBuilderPipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1288,6 +1288,9 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
// and argument promotion.
MPM.addPass(DeadArgumentEliminationPass());

if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink)
MPM.addPass(SimplifyTypeTestsPass());

if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink)
MPM.addPass(CoroCleanupPass());

Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Passes/PassRegistry.def
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ MODULE_PASS("jmc-instrumenter", JMCInstrumenterPass())
MODULE_PASS("lower-emutls", LowerEmuTLSPass())
MODULE_PASS("lower-global-dtors", LowerGlobalDtorsPass())
MODULE_PASS("lower-ifunc", LowerIFuncPass())
MODULE_PASS("simplify-type-tests", SimplifyTypeTestsPass())
MODULE_PASS("lowertypetests", LowerTypeTestsPass())
MODULE_PASS("fatlto-cleanup", FatLtoCleanup())
MODULE_PASS("pgo-force-function-attrs", PGOForceFunctionAttrsPass(PGOOpt ? PGOOpt->ColdOptType : PGOOptions::ColdFuncOpt::Default))
Expand Down
35 changes: 26 additions & 9 deletions llvm/lib/Target/X86/X86TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7132,10 +7132,8 @@ bool X86TTIImpl::isProfitableToSinkOperands(Instruction *I,
using namespace llvm::PatternMatch;

FixedVectorType *VTy = dyn_cast<FixedVectorType>(I->getType());
if (!VTy)
return false;

if (I->getOpcode() == Instruction::Mul &&
if (VTy && I->getOpcode() == Instruction::Mul &&
VTy->getElementType()->isIntegerTy(64)) {
for (auto &Op : I->operands()) {
// Make sure we are not already sinking this operand
Expand All @@ -7159,9 +7157,6 @@ bool X86TTIImpl::isProfitableToSinkOperands(Instruction *I,
return !Ops.empty();
}

// A uniform shift amount in a vector shift or funnel shift may be much
// cheaper than a generic variable vector shift, so make that pattern visible
// to SDAG by sinking the shuffle instruction next to the shift.
int ShiftAmountOpNum = -1;
if (I->isShift())
ShiftAmountOpNum = 1;
Expand All @@ -7170,16 +7165,38 @@ bool X86TTIImpl::isProfitableToSinkOperands(Instruction *I,
II->getIntrinsicID() == Intrinsic::fshr)
ShiftAmountOpNum = 2;
}

if (ShiftAmountOpNum == -1)
return false;
auto *ShiftAmountUse = &I->getOperandUse(ShiftAmountOpNum);

Value *ShiftAmount = ShiftAmountUse->get();
if (auto *PN = dyn_cast<PHINode>(ShiftAmount)) {
ShiftAmount = PN->hasIdenticalValue();
if (!ShiftAmount)
return false;
}

auto *Shuf = dyn_cast<ShuffleVectorInst>(I->getOperand(ShiftAmountOpNum));
// A uniform shift amount in a vector shift or funnel shift may be much
// cheaper than a generic variable vector shift, so make that pattern visible
// to SDAG by sinking the shuffle instruction next to the shift.
auto *Shuf = dyn_cast<ShuffleVectorInst>(ShiftAmount);
if (Shuf && getSplatIndex(Shuf->getShuffleMask()) >= 0 &&
isVectorShiftByScalarCheap(I->getType())) {
Ops.push_back(&I->getOperandUse(ShiftAmountOpNum));
Ops.push_back(ShiftAmountUse);
return true;
}

// Casts taking a constant expression (generally derived from a global
// variable address) as an operand are profitable to sink because they appear
// as subexpressions in the instruction sequence generated by the
// LowerTypeTests pass which is expected to pattern match to the rotate
// instruction's immediate operand.
if (auto *CI = dyn_cast<CastInst>(ShiftAmount)) {
if (isa<ConstantExpr>(CI->getOperand(0))) {
Ops.push_back(ShiftAmountUse);
return true;
}
}

return false;
}
74 changes: 74 additions & 0 deletions llvm/lib/Transforms/IPO/LowerTypeTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/ADT/PointerUnion.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
Expand Down Expand Up @@ -2478,3 +2479,76 @@ PreservedAnalyses LowerTypeTestsPass::run(Module &M,
return PreservedAnalyses::all();
return PreservedAnalyses::none();
}

PreservedAnalyses SimplifyTypeTestsPass::run(Module &M,
ModuleAnalysisManager &AM) {
bool Changed = false;
// Figure out whether inlining has exposed a constant address to a lowered
// type test, and remove the test if so and the address is known to pass the
// test. Unfortunately this pass ends up needing to reverse engineer what
// LowerTypeTests did; this is currently inherent to the design of ThinLTO
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a more extensive comment with what this is looking for and why? I don't look at lower type test output often so I don't recall offhand what e.g. it would have looked like without inlining vs with.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

// importing where LowerTypeTests needs to run at the start.
for (auto &GV : M.globals()) {
if (!GV.getName().starts_with("__typeid_") ||
!GV.getName().ends_with("_global_addr"))
continue;
auto *MD = MDString::get(M.getContext(),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a comment on this conversion? Figured it out by adding up the chars myself but it would be good to make it explicit.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would just use strlen("__typeid_") and strlen("_global_addr")

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added an example, I think that should make it clear enough.

GV.getName().substr(9, GV.getName().size() - 21));
auto MaySimplifyPtr = [&](Value *Ptr) {
if (auto *GV = dyn_cast<GlobalValue>(Ptr))
if (auto *CFIGV = M.getNamedValue((GV->getName() + ".cfi").str()))
Ptr = CFIGV;
return isKnownTypeIdMember(MD, M.getDataLayout(), Ptr, 0);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are there cases where the GV will not have a ".cfi" extension? I notice the test has that extension.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, if it's a function defined outside the LTO unit the jump table entry will be named foo.cfi_jt and the external function will be named foo. Since functions inside the LTO unit are the more common case I only handled them here.

};
auto MaySimplifyInt = [&](Value *Op) {
auto *PtrAsInt = dyn_cast<ConstantExpr>(Op);
if (!PtrAsInt || PtrAsInt->getOpcode() != Instruction::PtrToInt)
return false;
return MaySimplifyPtr(PtrAsInt->getOperand(0));
};
for (User *U : make_early_inc_range(GV.users())) {
if (auto *CI = dyn_cast<ICmpInst>(U)) {
if (CI->getPredicate() == CmpInst::ICMP_EQ &&
MaySimplifyPtr(CI->getOperand(0))) {
// This is an equality comparison (TypeTestResolution::Single case in
// lowerTypeTestCall). In this case we just replace the comparison
// with true.
CI->replaceAllUsesWith(ConstantInt::getTrue(M.getContext()));
CI->eraseFromParent();
Changed = true;
}
}
auto *CE = dyn_cast<ConstantExpr>(U);
if (!CE || CE->getOpcode() != Instruction::PtrToInt)
continue;
for (Use &U : make_early_inc_range(CE->uses())) {
auto *CE = dyn_cast<ConstantExpr>(U.getUser());
if (U.getOperandNo() == 1 && CE &&
CE->getOpcode() == Instruction::Sub &&
MaySimplifyInt(CE->getOperand(0))) {
// This is a computation of PtrOffset as generated by
// LowerTypeTestsModule::lowerTypeTestCall above. If
// isKnownTypeIdMember passes we just pretend it evaluated to 0. This
// should cause later passes to remove the range and alignment checks.
// The bitset checks won't be removed but those are uncommon.
CE->replaceAllUsesWith(ConstantInt::get(CE->getType(), 0));
Changed = true;
}
auto *CI = dyn_cast<ICmpInst>(U.getUser());
if (U.getOperandNo() == 1 && CI &&
CI->getPredicate() == CmpInst::ICMP_EQ &&
MaySimplifyInt(CI->getOperand(0))) {
// This is an equality comparison. Unlike in the case above it
// remained as an integer compare.
CI->replaceAllUsesWith(ConstantInt::getTrue(M.getContext()));
CI->eraseFromParent();
Changed = true;
}
}
}
}

if (!Changed)
return PreservedAnalyses::all();
return PreservedAnalyses::none();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is that true? E.g. the CFG doesn't change, right? Could we do what we do in HWASan: https://github.com/llvm/llvm-project/blob/main/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp#L485?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, I updated this to preserve the CFG related analyses.

}
1 change: 1 addition & 0 deletions llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@
; CHECK-O-NEXT: Invalidating analysis: ShouldNotRunFunctionPassesAnalysis
; CHECK-O-NEXT: Invalidating analysis: InlineAdvisorAnalysis
; CHECK-O-NEXT: Running pass: DeadArgumentEliminationPass
; CHECK-O-NEXT: Running pass: SimplifyTypeTestsPass
; CHECK-O-NEXT: Running pass: CoroCleanupPass
; CHECK-POSTLINK-O-NEXT: Running pass: GlobalOptPass
; CHECK-POSTLINK-O-NEXT: Running pass: GlobalDCEPass
Expand Down
1 change: 1 addition & 0 deletions llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@
; CHECK-O-NEXT: Invalidating analysis: ShouldNotRunFunctionPassesAnalysis
; CHECK-O-NEXT: Invalidating analysis: InlineAdvisorAnalysis
; CHECK-O-NEXT: Running pass: DeadArgumentEliminationPass
; CHECK-O-NEXT: Running pass: SimplifyTypeTestsPass
; CHECK-O-NEXT: Running pass: CoroCleanupPass
; CHECK-O-NEXT: Running pass: GlobalOptPass
; CHECK-O-NEXT: Running pass: GlobalDCEPass
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@
; CHECK-O-NEXT: Invalidating analysis: ShouldNotRunFunctionPassesAnalysis
; CHECK-O-NEXT: Invalidating analysis: InlineAdvisorAnalysis
; CHECK-O-NEXT: Running pass: DeadArgumentEliminationPass
; CHECK-O-NEXT: Running pass: SimplifyTypeTestsPass
; CHECK-O-NEXT: Running pass: CoroCleanupPass
; CHECK-O-NEXT: Running pass: GlobalOptPass
; CHECK-O-NEXT: Running pass: GlobalDCEPass
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
; Make sure that if a phi with identical inputs gets created it gets undone by CodeGenPrepare.

; RUN: opt -codegenprepare -S < %s | FileCheck %s

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@__typeid__ZTS1S_global_addr = external hidden global [0 x i8], code_model "small"
@__typeid__ZTS1S_align = external hidden global [0 x i8], !absolute_symbol !0
@__typeid__ZTS1S_size_m1 = external hidden global [0 x i8], !absolute_symbol !1

; Check that we recover the third pair of zexts from the phi.

; CHECK: define void @f4
define void @f4(i1 noundef zeroext %0, ptr noundef %1, ptr noundef %2, ptr noundef %3) #1 {
br i1 %0, label %5, label %18

5:
%6 = load ptr, ptr %1, align 8
%7 = ptrtoint ptr %6 to i64
%8 = sub i64 %7, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64)
; CHECK: zext {{.*}} @__typeid__ZTS1S_align
%9 = zext nneg i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64
%10 = lshr i64 %8, %9
; CHECK: zext {{.*}} @__typeid__ZTS1S_align
%11 = zext nneg i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64
%12 = shl i64 %8, %11
%13 = or i64 %10, %12
%14 = icmp ugt i64 %13, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64)
br i1 %14, label %15, label %16

15:
tail call void @llvm.ubsantrap(i8 2) #5
unreachable

16:
%17 = load ptr, ptr %6, align 8
tail call void %17(ptr noundef nonnull align 8 dereferenceable(8) %1)
br label %31

18:
%19 = load ptr, ptr %2, align 8
%20 = ptrtoint ptr %19 to i64
%21 = sub i64 %20, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64)
; CHECK: zext {{.*}} @__typeid__ZTS1S_align
%22 = zext nneg i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64
%23 = lshr i64 %21, %22
; CHECK: zext {{.*}} @__typeid__ZTS1S_align
%24 = zext nneg i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64
%25 = shl i64 %21, %24
%26 = or i64 %23, %25
%27 = icmp ugt i64 %26, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64)
br i1 %27, label %28, label %29

28:
tail call void @llvm.ubsantrap(i8 2) #5
unreachable

29:
%30 = load ptr, ptr %19, align 8
tail call void %30(ptr noundef nonnull align 8 dereferenceable(8) %2)
br label %31

31:
%32 = phi i64 [ %24, %29 ], [ %11, %16 ]
%33 = phi i64 [ %22, %29 ], [ %9, %16 ]
%34 = load ptr, ptr %3, align 8
%35 = ptrtoint ptr %34 to i64
%36 = sub i64 %35, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64)
; CHECK: zext {{.*}} @__typeid__ZTS1S_align
%37 = lshr i64 %36, %33
; CHECK: zext {{.*}} @__typeid__ZTS1S_align
%38 = shl i64 %36, %32
%39 = or i64 %37, %38
%40 = icmp ugt i64 %39, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64)
br i1 %40, label %41, label %42

41:
tail call void @llvm.ubsantrap(i8 2) #5
unreachable

42:
%43 = load ptr, ptr %34, align 8
tail call void %43(ptr noundef nonnull align 8 dereferenceable(8) %3)
ret void
}

declare i1 @llvm.type.test(ptr, metadata)
declare void @llvm.ubsantrap(i8 immarg)

!0 = !{i64 0, i64 256}
!1 = !{i64 0, i64 128}
Loading
Loading