Skip to content

Commit 2e44ffd

Browse files
committed
[CVP] Implement type narrowing for LShr
Implements type narrowing for LShr. The treatment is analogous to the type narrowing of UDiv. Since LShr is a relatively cheap instruction, the narrowing occurs only if the following conditions hold: i) all the users of the LShr instruction are already TruncInst; ii) the narrowing is carried out to the largest TruncInst following the LShr instruction. Additionally, the function optimizes the cases where the result of the LShr instruction is guaranteed to vanish or be equal to poison.
1 parent 44e162b commit 2e44ffd

File tree

3 files changed

+51
-8
lines changed

3 files changed

+51
-8
lines changed

llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1134,6 +1134,19 @@ static bool narrowLShr(BinaryOperator *LShr, LazyValueInfo *LVI) {
11341134
return true;
11351135
}
11361136

1137+
// Since LShr returns poison if the shift is larger of equal that the bit
1138+
// width of the argument, we must make sure that the maximal possible value
1139+
// for the shift is larger than the new width after narrowing. Otherwise some
1140+
// shifts that originally vanish would result in poison after the narrowing.
1141+
uint64_t MaxShiftValue64 = ShiftRange.getUnsignedMax().getZExtValue();
1142+
unsigned MaxShiftValue =
1143+
MaxShiftValue64 < std::numeric_limits<unsigned>::max()
1144+
? static_cast<unsigned>(MaxShiftValue64)
1145+
: std::numeric_limits<unsigned>::max();
1146+
1147+
if (OrigWidth <= MaxShiftValue)
1148+
return false;
1149+
11371150
// That's how many bits we need.
11381151
unsigned MaxActiveBits =
11391152
std::max(MaxActiveBitsInArg, ShiftRange.getActiveBits());
@@ -1165,6 +1178,10 @@ static bool narrowLShr(BinaryOperator *LShr, LazyValueInfo *LVI) {
11651178
}
11661179
}
11671180

1181+
// See comment above MaxShiftValue.
1182+
if (NewWidth <= MaxShiftValue)
1183+
return false;
1184+
11681185
// We are ready to truncate.
11691186
IRBuilder<> B(LShr);
11701187
Type *TruncTy = RetTy->getWithNewBitWidth(NewWidth);

llvm/test/Transforms/CorrelatedValuePropagation/lshr-plus-instcombine.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,14 @@
77
; CHECK-LABEL: @trunc_test1
88
; CHECK-NEXT: [[A1:%.*]] = lshr i32 [[A:%.*]], 16
99
; CHECK-NEXT: [[CARG:%.*]] = trunc nuw i32 [[A1]] to i16
10-
; CHECK-NEXT: [[CSHIFT:%.*]] = trunc i32 [[B:%.*]] to i16
10+
; CHECK-NEXT: [[B1:%.*]] = trunc i32 [[B:%.*]] to i16
11+
; CHECK-NEXT: [[CSHIFT:%.*]] = and i16 [[B1]], 15
1112
; CHECK-NEXT: [[C1:%.*]] = lshr i16 [[CARG]], [[CSHIFT]]
1213
; CHECK-NEXT: ret i16 [[C1]]
1314

1415
define i16 @trunc_test1(i32 %a, i32 %b) {
1516
%a.eff.trunc = lshr i32 %a, 16
16-
%b.eff.trunc = and i32 %b, 65535
17+
%b.eff.trunc = and i32 %b, 15
1718
%c = lshr i32 %a.eff.trunc, %b.eff.trunc
1819
%c.trunc = trunc i32 %c to i16
1920
ret i16 %c.trunc

llvm/test/Transforms/CorrelatedValuePropagation/lshr.ll

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -50,19 +50,19 @@ entry:
5050
br i1 %cmp, label %bb, label %exit
5151

5252
bb:
53-
; CHECK: lshr i16
53+
; CHECK: lshr i32
5454
%shr = lshr i32 %m, %n
5555
br label %exit
5656

5757
exit:
5858
ret void
5959
}
6060

61-
; CHECK-LABEL: @test4(
62-
define void @test4(i32 %m, i32 %n) {
61+
; CHECK-LABEL: @test3a(
62+
define void @test3a(i32 %m, i32 %n) {
6363
entry:
6464
%cmp1 = icmp ult i32 %m, 65535
65-
%cmp2 = icmp ule i32 %n, 65536
65+
%cmp2 = icmp ult i32 %n, 17
6666
%cmp = and i1 %cmp1, %cmp2
6767
br i1 %cmp, label %bb, label %exit
6868

@@ -75,6 +75,23 @@ exit:
7575
ret void
7676
}
7777

78+
; CHECK-LABEL: @test3b(
79+
define void @test3b(i32 %m, i32 %n) {
80+
entry:
81+
%cmp1 = icmp ult i32 %m, 65535
82+
%cmp2 = icmp ult i32 %n, 16
83+
%cmp = and i1 %cmp1, %cmp2
84+
br i1 %cmp, label %bb, label %exit
85+
86+
bb:
87+
; CHECK: lshr i16
88+
%shr = lshr i32 %m, %n
89+
br label %exit
90+
91+
exit:
92+
ret void
93+
}
94+
7895
; CHECK-LABEL: @test5
7996
define void @test5(i32 %n) {
8097
%trunc = and i32 %n, 65535
@@ -83,6 +100,14 @@ define void @test5(i32 %n) {
83100
ret void
84101
}
85102

103+
; CHECK-LABEL: @test5a
104+
define void @test5a(i32 %n) {
105+
%trunc = and i32 %n, 65535
106+
; CHECK: lshr i16
107+
%shr = lshr i32 %trunc, 15
108+
ret void
109+
}
110+
86111
; CHECK-LABEL: @test6
87112
define void @test6(i32 %n) {
88113
entry:
@@ -107,7 +132,7 @@ exit:
107132

108133
; CHECK-LABEL: @trunc_test1
109134
; CHECK-NEXT: [[A1:%.*]] = lshr i32 [[A:%.*]], 16
110-
; CHECK-NEXT: [[B1:%.*]] = and i32 [[B:%.*]], 65535
135+
; CHECK-NEXT: [[B1:%.*]] = and i32 [[B:%.*]], 15
111136
; CHECK-NEXT: [[A2:%.*]] = trunc i32 [[A1]] to i16
112137
; CHECK-NEXT: [[B2:%.*]] = trunc i32 [[B1]] to i16
113138
; CHECK-NEXT: [[C1:%.*]] = lshr i16 [[A2]], [[B2]]
@@ -117,7 +142,7 @@ exit:
117142

118143
define i16 @trunc_test1(i32 %a, i32 %b) {
119144
%a.eff.trunc = lshr i32 %a, 16
120-
%b.eff.trunc = and i32 %b, 65535
145+
%b.eff.trunc = and i32 %b, 15
121146
%c = lshr i32 %a.eff.trunc, %b.eff.trunc
122147
%c.trunc = trunc i32 %c to i16
123148
ret i16 %c.trunc

0 commit comments

Comments
 (0)