Skip to content

Commit 852b18f

Browse files
committed
[𝘀𝗽𝗿] changes to main this commit is based on
Created using spr 1.3.6-beta.1 [skip ci]
1 parent 782a9e9 commit 852b18f

File tree

2 files changed

+197
-9
lines changed

2 files changed

+197
-9
lines changed

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7132,10 +7132,8 @@ bool X86TTIImpl::isProfitableToSinkOperands(Instruction *I,
71327132
using namespace llvm::PatternMatch;
71337133

71347134
FixedVectorType *VTy = dyn_cast<FixedVectorType>(I->getType());
7135-
if (!VTy)
7136-
return false;
71377135

7138-
if (I->getOpcode() == Instruction::Mul &&
7136+
if (VTy && I->getOpcode() == Instruction::Mul &&
71397137
VTy->getElementType()->isIntegerTy(64)) {
71407138
for (auto &Op : I->operands()) {
71417139
// Make sure we are not already sinking this operand
@@ -7159,9 +7157,6 @@ bool X86TTIImpl::isProfitableToSinkOperands(Instruction *I,
71597157
return !Ops.empty();
71607158
}
71617159

7162-
// A uniform shift amount in a vector shift or funnel shift may be much
7163-
// cheaper than a generic variable vector shift, so make that pattern visible
7164-
// to SDAG by sinking the shuffle instruction next to the shift.
71657160
int ShiftAmountOpNum = -1;
71667161
if (I->isShift())
71677162
ShiftAmountOpNum = 1;
@@ -7170,16 +7165,31 @@ bool X86TTIImpl::isProfitableToSinkOperands(Instruction *I,
71707165
II->getIntrinsicID() == Intrinsic::fshr)
71717166
ShiftAmountOpNum = 2;
71727167
}
7173-
71747168
if (ShiftAmountOpNum == -1)
71757169
return false;
7170+
auto *ShiftAmount = &I->getOperandUse(ShiftAmountOpNum);
71767171

7177-
auto *Shuf = dyn_cast<ShuffleVectorInst>(I->getOperand(ShiftAmountOpNum));
7172+
// A uniform shift amount in a vector shift or funnel shift may be much
7173+
// cheaper than a generic variable vector shift, so make that pattern visible
7174+
// to SDAG by sinking the shuffle instruction next to the shift.
7175+
auto *Shuf = dyn_cast<ShuffleVectorInst>(ShiftAmount);
71787176
if (Shuf && getSplatIndex(Shuf->getShuffleMask()) >= 0 &&
71797177
isVectorShiftByScalarCheap(I->getType())) {
7180-
Ops.push_back(&I->getOperandUse(ShiftAmountOpNum));
7178+
Ops.push_back(ShiftAmount);
71817179
return true;
71827180
}
71837181

7182+
// Casts taking a constant expression (generally derived from a global
7183+
// variable address) as an operand are profitable to sink because they appear
7184+
// as subexpressions in the instruction sequence generated by the
7185+
// LowerTypeTests pass which is expected to pattern match to the rotate
7186+
// instruction's immediate operand.
7187+
if (auto *CI = dyn_cast<CastInst>(ShiftAmount)) {
7188+
if (isa<ConstantExpr>(CI->getOperand(0))) {
7189+
Ops.push_back(ShiftAmount);
7190+
return true;
7191+
}
7192+
}
7193+
71847194
return false;
71857195
}
Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
; Make sure that if optimizations hoist or CSE zext(const) it gets undone by CodeGenPrepare.
2+
3+
; This IR is normally generated by LowerTypeTests during ThinLTO importing
4+
; so it will go through the ThinLTO pass pipeline.
5+
; RUN: opt -passes='thinlto<O0>' -S < %s | opt -codegenprepare -S | FileCheck %s
6+
; RUN: opt -passes='thinlto<O1>' -S < %s | opt -codegenprepare -S | FileCheck %s
7+
; RUN: opt -passes='thinlto<O2>' -S < %s | opt -codegenprepare -S | FileCheck %s
8+
; RUN: opt -passes='thinlto<O3>' -S < %s | opt -codegenprepare -S | FileCheck %s
9+
10+
; Also check the regular pipelines for completeness.
11+
; RUN: opt -O0 -S < %s | opt -codegenprepare -S | FileCheck %s
12+
; RUN: opt -O1 -S < %s | opt -codegenprepare -S | FileCheck %s
13+
; RUN: opt -O2 -S < %s | opt -codegenprepare -S | FileCheck %s
14+
; RUN: opt -O3 -S < %s | opt -codegenprepare -S | FileCheck %s
15+
; RUN: opt -Os -S < %s | opt -codegenprepare -S | FileCheck %s
16+
; RUN: opt -Oz -S < %s | opt -codegenprepare -S | FileCheck %s
17+
18+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
19+
target triple = "x86_64-unknown-linux-gnu"
20+
21+
@__typeid__ZTS1S_global_addr = external hidden global [0 x i8], code_model "small"
22+
@__typeid__ZTS1S_align = external hidden global [0 x i8], !absolute_symbol !0
23+
@__typeid__ZTS1S_size_m1 = external hidden global [0 x i8], !absolute_symbol !1
24+
25+
; Check that we still have two pairs of zexts (non dominating case).
26+
27+
; CHECK: define void @f1
28+
define void @f1(i1 noundef zeroext %0, ptr noundef %1, ptr noundef %2) {
29+
br i1 %0, label %4, label %17
30+
31+
4:
32+
%5 = load ptr, ptr %1, align 8
33+
%6 = ptrtoint ptr %5 to i64
34+
%7 = sub i64 %6, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64)
35+
; CHECK: zext {{.*}} @__typeid__ZTS1S_align
36+
%8 = zext i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64
37+
%9 = lshr i64 %7, %8
38+
; CHECK: zext {{.*}} @__typeid__ZTS1S_align
39+
%10 = zext i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64
40+
%11 = shl i64 %7, %10
41+
%12 = or i64 %9, %11
42+
%13 = icmp ule i64 %12, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64)
43+
br i1 %13, label %15, label %14
44+
45+
14:
46+
call void @llvm.ubsantrap(i8 2)
47+
unreachable
48+
49+
15:
50+
%16 = load ptr, ptr %5, align 8
51+
call void %16(ptr noundef nonnull align 8 dereferenceable(8) %1)
52+
br label %30
53+
54+
17:
55+
%18 = load ptr, ptr %2, align 8
56+
%19 = ptrtoint ptr %18 to i64
57+
%20 = sub i64 %19, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64)
58+
; CHECK: zext {{.*}} @__typeid__ZTS1S_align
59+
%21 = zext i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64
60+
%22 = lshr i64 %20, %21
61+
; CHECK: zext {{.*}} @__typeid__ZTS1S_align
62+
%23 = zext i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64
63+
%24 = shl i64 %20, %23
64+
%25 = or i64 %22, %24
65+
%26 = icmp ule i64 %25, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64)
66+
br i1 %26, label %28, label %27
67+
68+
27:
69+
call void @llvm.ubsantrap(i8 2) #6
70+
unreachable
71+
72+
28:
73+
%29 = load ptr, ptr %18, align 8
74+
call void %29(ptr noundef nonnull align 8 dereferenceable(8) %2)
75+
br label %30
76+
77+
30:
78+
ret void
79+
}
80+
81+
; Check that we still have two pairs of zexts (dominating case).
82+
83+
; CHECK: define void @f2
84+
define void @f2(i1 noundef zeroext %0, ptr noundef %1, ptr noundef %2) {
85+
%4 = load ptr, ptr %1, align 8
86+
%5 = ptrtoint ptr %4 to i64
87+
%6 = sub i64 %5, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64)
88+
; CHECK: zext {{.*}} @__typeid__ZTS1S_align
89+
%7 = zext i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64
90+
%8 = lshr i64 %6, %7
91+
; CHECK: zext {{.*}} @__typeid__ZTS1S_align
92+
%9 = zext i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64
93+
%10 = shl i64 %6, %9
94+
%11 = or i64 %8, %10
95+
%12 = icmp ule i64 %11, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64)
96+
br i1 %12, label %14, label %13
97+
98+
13: ; preds = %3
99+
call void @llvm.ubsantrap(i8 2)
100+
unreachable
101+
102+
14: ; preds = %3
103+
%15 = load ptr, ptr %4, align 8
104+
call void %15(ptr noundef nonnull align 8 dereferenceable(8) %1)
105+
br i1 %0, label %16, label %29
106+
107+
16: ; preds = %14
108+
%17 = load ptr, ptr %2, align 8
109+
%18 = ptrtoint ptr %17 to i64
110+
%19 = sub i64 %18, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64)
111+
; CHECK: zext {{.*}} @__typeid__ZTS1S_align
112+
%20 = zext i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64
113+
%21 = lshr i64 %19, %20
114+
; CHECK: zext {{.*}} @__typeid__ZTS1S_align
115+
%22 = zext i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64
116+
%23 = shl i64 %19, %22
117+
%24 = or i64 %21, %23
118+
%25 = icmp ule i64 %24, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64)
119+
br i1 %25, label %27, label %26
120+
121+
26: ; preds = %16
122+
call void @llvm.ubsantrap(i8 2) #6
123+
unreachable
124+
125+
27: ; preds = %16
126+
%28 = load ptr, ptr %17, align 8
127+
call void %28(ptr noundef nonnull align 8 dereferenceable(8) %2)
128+
br label %29
129+
130+
29: ; preds = %27, %14
131+
ret void
132+
}
133+
134+
; Check that the zexts aren't moved to the preheader (or anywhere else)
135+
; and stay in the same basic block.
136+
137+
; CHECK: define void @f3
138+
define void @f3(ptr noundef readonly captures(address) %0, ptr noundef readnone captures(address) %1) {
139+
%3 = icmp eq ptr %0, %1
140+
br i1 %3, label %21, label %4
141+
142+
4:
143+
; CHECK: = phi
144+
%5 = phi ptr [ %19, %17 ], [ %0, %2 ]
145+
%6 = load ptr, ptr %5, align 8
146+
%7 = load ptr, ptr %6, align 8
147+
%8 = ptrtoint ptr %7 to i64
148+
%9 = sub i64 %8, ptrtoint (ptr @__typeid__ZTS1S_global_addr to i64)
149+
; CHECK: zext {{.*}} @__typeid__ZTS1S_align
150+
%10 = zext i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8) to i64
151+
%11 = lshr i64 %9, %10
152+
; CHECK: zext {{.*}} @__typeid__ZTS1S_align
153+
%12 = zext i8 sub (i8 64, i8 ptrtoint (ptr @__typeid__ZTS1S_align to i8)) to i64
154+
%13 = shl i64 %9, %12
155+
%14 = or i64 %11, %13
156+
%15 = icmp ule i64 %14, ptrtoint (ptr @__typeid__ZTS1S_size_m1 to i64)
157+
br i1 %15, label %17, label %16
158+
159+
16:
160+
call void @llvm.ubsantrap(i8 2)
161+
unreachable
162+
163+
17:
164+
%18 = load ptr, ptr %7, align 8
165+
call void %18(ptr noundef nonnull align 8 dereferenceable(8) %6)
166+
%19 = getelementptr inbounds nuw i8, ptr %5, i64 8
167+
%20 = icmp eq ptr %19, %1
168+
br i1 %20, label %21, label %4
169+
170+
21:
171+
ret void
172+
}
173+
174+
declare i1 @llvm.type.test(ptr, metadata)
175+
declare void @llvm.ubsantrap(i8 immarg)
176+
177+
!0 = !{i64 0, i64 256}
178+
!1 = !{i64 0, i64 128}

0 commit comments

Comments
 (0)