Skip to content

Commit b74ffea

Browse files
committed
[𝘀𝗽𝗿] initial version
Created using spr 1.3.6-beta.1
2 parents 782a9e9 + 852b18f commit b74ffea

File tree

6 files changed

+349
-13
lines changed

6 files changed

+349
-13
lines changed

llvm/include/llvm/IR/Instructions.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2822,6 +2822,11 @@ class PHINode : public Instruction {
28222822
/// non-undef value.
28232823
bool hasConstantOrUndefValue() const;
28242824

2825+
/// If the specified PHI node (possibly via other PHI nodes) merges together
2826+
/// the same or identical (i.e. Instruction::isIdenticalTo() returns true)
2827+
/// values, return one of the values, otherwise return null.
2828+
Value *hasIdenticalValue();
2829+
28252830
/// If the PHI node is complete which means all of its parent's predecessors
28262831
/// have incoming value in this PHI, return true, otherwise return false.
28272832
bool isComplete() const {

llvm/lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7739,9 +7739,14 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
77397739

77407740
for (Use *U : reverse(OpsToSink)) {
77417741
auto *UI = cast<Instruction>(U->get());
7742-
if (isa<PHINode>(UI))
7743-
continue;
7744-
if (UI->getParent() == TargetBB) {
7742+
if (auto *PN = dyn_cast<PHINode>(UI)) {
7743+
auto *I0 = dyn_cast<Instruction>(PN->hasIdenticalValue());
7744+
if (!I0)
7745+
continue;
7746+
if (I0->getParent() == TargetBB &&
7747+
InstOrdering[I0] < InstOrdering[InsertPoint])
7748+
InsertPoint = I0;
7749+
} else if (UI->getParent() == TargetBB) {
77457750
if (InstOrdering[UI] < InstOrdering[InsertPoint])
77467751
InsertPoint = UI;
77477752
continue;
@@ -7753,7 +7758,11 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
77537758
DenseMap<Instruction *, Instruction *> NewInstructions;
77547759
for (Use *U : ToReplace) {
77557760
auto *UI = cast<Instruction>(U->get());
7756-
Instruction *NI = UI->clone();
7761+
Instruction *NI;
7762+
if (auto *PN = dyn_cast<PHINode>(UI))
7763+
NI = cast<Instruction>(PN->hasIdenticalValue())->clone();
7764+
else
7765+
NI = UI->clone();
77577766

77587767
if (IsHugeFunc) {
77597768
// Now we clone an instruction, its operands' defs may sink to this BB

llvm/lib/IR/Instructions.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
#include <cassert>
4949
#include <cstdint>
5050
#include <optional>
51+
#include <set>
5152
#include <vector>
5253

5354
using namespace llvm;
@@ -239,6 +240,40 @@ bool PHINode::hasConstantOrUndefValue() const {
239240
return true;
240241
}
241242

243+
/// If the specified PHI node (possibly via other PHI nodes) merges together the
244+
/// same or identical (i.e. Instruction::isIdenticalTo() returns true) values,
245+
/// return one of the values, otherwise return null.
246+
Value *PHINode::hasIdenticalValue() {
247+
std::vector<PHINode *> Worklist;
248+
std::set<PHINode *> Seen;
249+
Value *Result = nullptr;
250+
Worklist.push_back(this);
251+
while (!Worklist.empty()) {
252+
PHINode *PN = Worklist.back();
253+
Worklist.pop_back();
254+
if (!Seen.insert(PN).second)
255+
continue;
256+
for (Value *V : PN->incoming_values()) {
257+
if (auto *PN = dyn_cast<PHINode>(V)) {
258+
Worklist.push_back(PN);
259+
continue;
260+
}
261+
if (!Result) {
262+
Result = V;
263+
continue;
264+
}
265+
if (V == Result)
266+
continue;
267+
if (auto *I = dyn_cast<Instruction>(V))
268+
if (auto *ResultI = dyn_cast<Instruction>(Result))
269+
if (I->isIdenticalTo(ResultI))
270+
continue;
271+
return nullptr;
272+
}
273+
}
274+
return Result;
275+
}
276+
242277
//===----------------------------------------------------------------------===//
243278
// LandingPadInst Implementation
244279
//===----------------------------------------------------------------------===//

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7132,10 +7132,8 @@ bool X86TTIImpl::isProfitableToSinkOperands(Instruction *I,
71327132
using namespace llvm::PatternMatch;
71337133

71347134
FixedVectorType *VTy = dyn_cast<FixedVectorType>(I->getType());
7135-
if (!VTy)
7136-
return false;
71377135

7138-
if (I->getOpcode() == Instruction::Mul &&
7136+
if (VTy && I->getOpcode() == Instruction::Mul &&
71397137
VTy->getElementType()->isIntegerTy(64)) {
71407138
for (auto &Op : I->operands()) {
71417139
// Make sure we are not already sinking this operand
@@ -7159,9 +7157,6 @@ bool X86TTIImpl::isProfitableToSinkOperands(Instruction *I,
71597157
return !Ops.empty();
71607158
}
71617159

7162-
// A uniform shift amount in a vector shift or funnel shift may be much
7163-
// cheaper than a generic variable vector shift, so make that pattern visible
7164-
// to SDAG by sinking the shuffle instruction next to the shift.
71657160
int ShiftAmountOpNum = -1;
71667161
if (I->isShift())
71677162
ShiftAmountOpNum = 1;
@@ -7170,16 +7165,38 @@ bool X86TTIImpl::isProfitableToSinkOperands(Instruction *I,
71707165
II->getIntrinsicID() == Intrinsic::fshr)
71717166
ShiftAmountOpNum = 2;
71727167
}
7173-
71747168
if (ShiftAmountOpNum == -1)
71757169
return false;
7170+
auto *ShiftAmountUse = &I->getOperandUse(ShiftAmountOpNum);
7171+
7172+
Value *ShiftAmount = ShiftAmountUse->get();
7173+
if (auto *PN = dyn_cast<PHINode>(ShiftAmount)) {
7174+
ShiftAmount = PN->hasIdenticalValue();
7175+
if (!ShiftAmount)
7176+
return false;
7177+
}
71767178

7177-
auto *Shuf = dyn_cast<ShuffleVectorInst>(I->getOperand(ShiftAmountOpNum));
7179+
// A uniform shift amount in a vector shift or funnel shift may be much
7180+
// cheaper than a generic variable vector shift, so make that pattern visible
7181+
// to SDAG by sinking the shuffle instruction next to the shift.
7182+
auto *Shuf = dyn_cast<ShuffleVectorInst>(ShiftAmount);
71787183
if (Shuf && getSplatIndex(Shuf->getShuffleMask()) >= 0 &&
71797184
isVectorShiftByScalarCheap(I->getType())) {
7180-
Ops.push_back(&I->getOperandUse(ShiftAmountOpNum));
7185+
Ops.push_back(ShiftAmountUse);
71817186
return true;
71827187
}
71837188

7189+
// Casts taking a constant expression (generally derived from a global
7190+
// variable address) as an operand are profitable to sink because they appear
7191+
// as subexpressions in the instruction sequence generated by the
7192+
// LowerTypeTests pass which is expected to pattern match to the rotate
7193+
// instruction's immediate operand.
7194+
if (auto *CI = dyn_cast<CastInst>(ShiftAmount)) {
7195+
if (isa<ConstantExpr>(CI->getOperand(0))) {
7196+
Ops.push_back(ShiftAmountUse);
7197+
return true;
7198+
}
7199+
}
7200+
71847201
return false;
71857202
}
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
; Make sure that if a phi with identical inputs gets created it gets undone by CodeGenPrepare.
2+
3+
; RUN: opt -codegenprepare -S < %s | FileCheck %s
4+
5+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
6+
target triple = "x86_64-unknown-linux-gnu"
7+
8+
@__typeid__ZTS1S_global_addr = external hidden global [0 x i8], code_model "small"
9+
@__typeid__ZTS1S_align = external hidden global [0 x i8], !absolute_symbol !0
10+
@__typeid__ZTS1S_size_m1 = external hidden global [0 x i8], !absolute_symbol !1
11+
12+
; Check that we recover the third pair of zexts from the phi.
13+
14+
; CHECK: define void @f4
15+
define void @f4(i1 noundef zeroext %0, ptr noundef %1, ptr noundef %2, ptr noundef %3) #1 {
16+
br i1 %0, label %5, label %18
17+
18+
5:
19+
%6 = load ptr, ptr %1, align 8
20+
%7 = ptrtoint ptr %6 to i64
21+
%8 = sub i64 %7, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64)
22+
; CHECK: zext {{.*}} @__typeid__ZTS1S_align
23+
%9 = zext nneg i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64
24+
%10 = lshr i64 %8, %9
25+
; CHECK: zext {{.*}} @__typeid__ZTS1S_align
26+
%11 = zext nneg i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64
27+
%12 = shl i64 %8, %11
28+
%13 = or i64 %10, %12
29+
%14 = icmp ugt i64 %13, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64)
30+
br i1 %14, label %15, label %16
31+
32+
15:
33+
tail call void @llvm.ubsantrap(i8 2) #5
34+
unreachable
35+
36+
16:
37+
%17 = load ptr, ptr %6, align 8
38+
tail call void %17(ptr noundef nonnull align 8 dereferenceable(8) %1)
39+
br label %31
40+
41+
18:
42+
%19 = load ptr, ptr %2, align 8
43+
%20 = ptrtoint ptr %19 to i64
44+
%21 = sub i64 %20, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64)
45+
; CHECK: zext {{.*}} @__typeid__ZTS1S_align
46+
%22 = zext nneg i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64
47+
%23 = lshr i64 %21, %22
48+
; CHECK: zext {{.*}} @__typeid__ZTS1S_align
49+
%24 = zext nneg i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64
50+
%25 = shl i64 %21, %24
51+
%26 = or i64 %23, %25
52+
%27 = icmp ugt i64 %26, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64)
53+
br i1 %27, label %28, label %29
54+
55+
28:
56+
tail call void @llvm.ubsantrap(i8 2) #5
57+
unreachable
58+
59+
29:
60+
%30 = load ptr, ptr %19, align 8
61+
tail call void %30(ptr noundef nonnull align 8 dereferenceable(8) %2)
62+
br label %31
63+
64+
31:
65+
%32 = phi i64 [ %24, %29 ], [ %11, %16 ]
66+
%33 = phi i64 [ %22, %29 ], [ %9, %16 ]
67+
%34 = load ptr, ptr %3, align 8
68+
%35 = ptrtoint ptr %34 to i64
69+
%36 = sub i64 %35, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64)
70+
; CHECK: zext {{.*}} @__typeid__ZTS1S_align
71+
%37 = lshr i64 %36, %33
72+
; CHECK: zext {{.*}} @__typeid__ZTS1S_align
73+
%38 = shl i64 %36, %32
74+
%39 = or i64 %37, %38
75+
%40 = icmp ugt i64 %39, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64)
76+
br i1 %40, label %41, label %42
77+
78+
41:
79+
tail call void @llvm.ubsantrap(i8 2) #5
80+
unreachable
81+
82+
42:
83+
%43 = load ptr, ptr %34, align 8
84+
tail call void %43(ptr noundef nonnull align 8 dereferenceable(8) %3)
85+
ret void
86+
}
87+
88+
declare i1 @llvm.type.test(ptr, metadata)
89+
declare void @llvm.ubsantrap(i8 immarg)
90+
91+
!0 = !{i64 0, i64 256}
92+
!1 = !{i64 0, i64 128}

0 commit comments

Comments
 (0)