Skip to content

Commit b3c5491

Browse files
authored
InstCombine: Stop transforming EQ/NE of SHR to 0 to ULT/UGT if >1 use
This is a small code size optimization that lets us avoid both shifting and comparing to a constant if we need the shifted value anyway. On most architectures the zero comparison is cheaper than a constant comparison (or free if the shift sets flags). Although this change appears to remove the optimization entirely, we continue to do this transform if there is one use because of the code below the removed code that transforms the shift into an and, followed by the PR10267 case in InstCombinerImpl::foldICmpAndConstConst that transforms the and into a ult/ugt. Added a test case to verify this explicitly. Per [1] reduces clang .text size by 0.09% and dynamic instruction count by 0.01%. [1] https://llvm-compile-time-tracker.com/compare.php?from=1f38d49ebe96417e368a567efa4d650b8a9ac30f&to=0873787a12b8f2eab019d8211ace4bccc1807343&stat=size-text Reviewers: nikic, dtcxzyw Reviewed By: dtcxzyw Pull Request: #168007
1 parent 3fb98e7 commit b3c5491

File tree

7 files changed

+57
-29
lines changed

7 files changed

+57
-29
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2638,16 +2638,6 @@ Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp,
26382638
if (Shr->isExact())
26392639
return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, C << ShAmtVal));
26402640

2641-
if (C.isZero()) {
2642-
// == 0 is u< 1.
2643-
if (Pred == CmpInst::ICMP_EQ)
2644-
return new ICmpInst(CmpInst::ICMP_ULT, X,
2645-
ConstantInt::get(ShrTy, (C + 1).shl(ShAmtVal)));
2646-
else
2647-
return new ICmpInst(CmpInst::ICMP_UGT, X,
2648-
ConstantInt::get(ShrTy, (C + 1).shl(ShAmtVal) - 1));
2649-
}
2650-
26512641
if (Shr->hasOneUse()) {
26522642
// Canonicalize the shift into an 'and':
26532643
// icmp eq/ne (shr X, ShAmt), C --> icmp eq/ne (and X, HiMask), (C << ShAmt)

llvm/test/Analysis/ValueTracking/known-power-of-two-urem.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@ define i64 @known_power_of_two_urem_loop_lshr(i64 %size, i64 %a) {
228228
; CHECK-NEXT: [[UREM:%.*]] = and i64 [[SIZE:%.*]], [[TMP0]]
229229
; CHECK-NEXT: [[ADD]] = add nuw i64 [[SUM]], [[UREM]]
230230
; CHECK-NEXT: [[I]] = lshr i64 [[PHI]], 1
231-
; CHECK-NEXT: [[ICMP_NOT:%.*]] = icmp ult i64 [[PHI]], 2
231+
; CHECK-NEXT: [[ICMP_NOT:%.*]] = icmp eq i64 [[I]], 0
232232
; CHECK-NEXT: br i1 [[ICMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
233233
; CHECK: for.end:
234234
; CHECK-NEXT: ret i64 [[SUM]]
@@ -328,7 +328,7 @@ define i64 @known_power_of_two_urem_loop_ashr_negative_2(i64 %size, i64 %a) {
328328
; CHECK-NEXT: [[UREM:%.*]] = urem i64 [[SIZE:%.*]], [[PHI]]
329329
; CHECK-NEXT: [[ADD]] = add nsw i64 [[SUM]], [[UREM]]
330330
; CHECK-NEXT: [[I]] = ashr i64 [[PHI]], 2
331-
; CHECK-NEXT: [[ICMP_NOT:%.*]] = icmp ult i64 [[PHI]], 4
331+
; CHECK-NEXT: [[ICMP_NOT:%.*]] = icmp eq i64 [[I]], 0
332332
; CHECK-NEXT: br i1 [[ICMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
333333
; CHECK: for.end:
334334
; CHECK-NEXT: ret i64 [[SUM]]

llvm/test/Transforms/InstCombine/icmp-shr.ll

Lines changed: 45 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -579,7 +579,7 @@ define i1 @ashr_ugt_0(i4 %x) {
579579
define i1 @ashr_ugt_0_multiuse(i4 %x, ptr %p) {
580580
; CHECK-LABEL: @ashr_ugt_0_multiuse(
581581
; CHECK-NEXT: [[S:%.*]] = ashr i4 [[X:%.*]], 1
582-
; CHECK-NEXT: [[R:%.*]] = icmp ugt i4 [[X]], 1
582+
; CHECK-NEXT: [[R:%.*]] = icmp ne i4 [[S]], 0
583583
; CHECK-NEXT: store i4 [[S]], ptr [[P:%.*]], align 1
584584
; CHECK-NEXT: ret i1 [[R]]
585585
;
@@ -934,7 +934,7 @@ define i1 @lshr_eq_0_multiuse(i8 %x) {
934934
; CHECK-LABEL: @lshr_eq_0_multiuse(
935935
; CHECK-NEXT: [[S:%.*]] = lshr i8 [[X:%.*]], 2
936936
; CHECK-NEXT: call void @use(i8 [[S]])
937-
; CHECK-NEXT: [[C:%.*]] = icmp ult i8 [[X]], 4
937+
; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[S]], 0
938938
; CHECK-NEXT: ret i1 [[C]]
939939
;
940940
%s = lshr i8 %x, 2
@@ -947,7 +947,7 @@ define i1 @lshr_ne_0_multiuse(i8 %x) {
947947
; CHECK-LABEL: @lshr_ne_0_multiuse(
948948
; CHECK-NEXT: [[S:%.*]] = lshr i8 [[X:%.*]], 2
949949
; CHECK-NEXT: call void @use(i8 [[S]])
950-
; CHECK-NEXT: [[C:%.*]] = icmp ugt i8 [[X]], 3
950+
; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[S]], 0
951951
; CHECK-NEXT: ret i1 [[C]]
952952
;
953953
%s = lshr i8 %x, 2
@@ -960,7 +960,7 @@ define i1 @ashr_eq_0_multiuse(i8 %x) {
960960
; CHECK-LABEL: @ashr_eq_0_multiuse(
961961
; CHECK-NEXT: [[S:%.*]] = ashr i8 [[X:%.*]], 2
962962
; CHECK-NEXT: call void @use(i8 [[S]])
963-
; CHECK-NEXT: [[C:%.*]] = icmp ult i8 [[X]], 4
963+
; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[S]], 0
964964
; CHECK-NEXT: ret i1 [[C]]
965965
;
966966
%s = ashr i8 %x, 2
@@ -973,7 +973,7 @@ define i1 @ashr_ne_0_multiuse(i8 %x) {
973973
; CHECK-LABEL: @ashr_ne_0_multiuse(
974974
; CHECK-NEXT: [[S:%.*]] = ashr i8 [[X:%.*]], 2
975975
; CHECK-NEXT: call void @use(i8 [[S]])
976-
; CHECK-NEXT: [[C:%.*]] = icmp ugt i8 [[X]], 3
976+
; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[S]], 0
977977
; CHECK-NEXT: ret i1 [[C]]
978978
;
979979
%s = ashr i8 %x, 2
@@ -982,6 +982,46 @@ define i1 @ashr_ne_0_multiuse(i8 %x) {
982982
ret i1 %c
983983
}
984984

985+
define i1 @lshr_eq_0(i8 %x) {
986+
; CHECK-LABEL: @lshr_eq_0(
987+
; CHECK-NEXT: [[C:%.*]] = icmp ult i8 [[X:%.*]], 4
988+
; CHECK-NEXT: ret i1 [[C]]
989+
;
990+
%s = lshr i8 %x, 2
991+
%c = icmp eq i8 %s, 0
992+
ret i1 %c
993+
}
994+
995+
define i1 @lshr_ne_0(i8 %x) {
996+
; CHECK-LABEL: @lshr_ne_0(
997+
; CHECK-NEXT: [[C:%.*]] = icmp ugt i8 [[X:%.*]], 3
998+
; CHECK-NEXT: ret i1 [[C]]
999+
;
1000+
%s = lshr i8 %x, 2
1001+
%c = icmp ne i8 %s, 0
1002+
ret i1 %c
1003+
}
1004+
1005+
define i1 @ashr_eq_0(i8 %x) {
1006+
; CHECK-LABEL: @ashr_eq_0(
1007+
; CHECK-NEXT: [[C:%.*]] = icmp ult i8 [[X:%.*]], 4
1008+
; CHECK-NEXT: ret i1 [[C]]
1009+
;
1010+
%s = ashr i8 %x, 2
1011+
%c = icmp eq i8 %s, 0
1012+
ret i1 %c
1013+
}
1014+
1015+
define i1 @ashr_ne_0(i8 %x) {
1016+
; CHECK-LABEL: @ashr_ne_0(
1017+
; CHECK-NEXT: [[C:%.*]] = icmp ugt i8 [[X:%.*]], 3
1018+
; CHECK-NEXT: ret i1 [[C]]
1019+
;
1020+
%s = ashr i8 %x, 2
1021+
%c = icmp ne i8 %s, 0
1022+
ret i1 %c
1023+
}
1024+
9851025
define i1 @lshr_exact_eq_0_multiuse(i8 %x) {
9861026
; CHECK-LABEL: @lshr_exact_eq_0_multiuse(
9871027
; CHECK-NEXT: [[S:%.*]] = lshr exact i8 [[X:%.*]], 2

llvm/test/Transforms/LoopVectorize/induction.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -914,7 +914,7 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) {
914914
; IND-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
915915
; IND-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 3
916916
; IND-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
917-
; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N]], 9
917+
; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i64 [[TMP1]], 0
918918
; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
919919
; IND: vector.ph:
920920
; IND-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 4611686018427387902

llvm/test/Transforms/LoopVectorize/loop-scalars.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ define void @scalar_store(ptr %a, ptr %b, i64 %n) {
6565
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
6666
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
6767
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
68-
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N]], 3
68+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i64 [[TMP1]], 0
6969
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
7070
; CHECK: vector.ph:
7171
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 9223372036854775806
@@ -125,7 +125,7 @@ define void @expansion(ptr %a, ptr %b, i64 %n) {
125125
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
126126
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
127127
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
128-
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N]], 3
128+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i64 [[TMP1]], 0
129129
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
130130
; CHECK: vector.ph:
131131
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 9223372036854775806

llvm/test/Transforms/PhaseOrdering/ARM/arm_mean_q7.ll

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,11 @@ target triple = "thumbv6m-none-none-eabi"
99
define void @arm_mean_q7(ptr noundef %pSrc, i32 noundef %blockSize, ptr noundef %pResult) #0 {
1010
; CHECK-LABEL: @arm_mean_q7(
1111
; CHECK-NEXT: entry:
12-
; CHECK-NEXT: [[CMP_NOT10:%.*]] = icmp ult i32 [[BLOCKSIZE:%.*]], 16
13-
; CHECK-NEXT: br i1 [[CMP_NOT10]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
14-
; CHECK: while.body.preheader:
15-
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[BLOCKSIZE]], 4
16-
; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
12+
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[BLOCKSIZE:%.*]], 4
13+
; CHECK-NEXT: [[CMP_NOT10:%.*]] = icmp eq i32 [[SHR]], 0
14+
; CHECK-NEXT: br i1 [[CMP_NOT10]], label [[WHILE_END:%.*]], label [[WHILE_BODY:%.*]]
1715
; CHECK: while.body:
18-
; CHECK-NEXT: [[SUM_013:%.*]] = phi i32 [ [[TMP2:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
16+
; CHECK-NEXT: [[SUM_013:%.*]] = phi i32 [ [[TMP2:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER:%.*]] ]
1917
; CHECK-NEXT: [[PSRC_ADDR_012:%.*]] = phi ptr [ [[ADD_PTR:%.*]], [[WHILE_BODY]] ], [ [[PSRC:%.*]], [[WHILE_BODY_PREHEADER]] ]
2018
; CHECK-NEXT: [[BLKCNT_011:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[SHR]], [[WHILE_BODY_PREHEADER]] ]
2119
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[PSRC_ADDR_012]], align 1
@@ -30,8 +28,8 @@ define void @arm_mean_q7(ptr noundef %pSrc, i32 noundef %blockSize, ptr noundef
3028
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[PSRC]], i32 [[TMP3]]
3129
; CHECK-NEXT: br label [[WHILE_END]]
3230
; CHECK: while.end:
33-
; CHECK-NEXT: [[PSRC_ADDR_0_LCSSA:%.*]] = phi ptr [ [[PSRC]], [[ENTRY:%.*]] ], [ [[SCEVGEP]], [[WHILE_END_LOOPEXIT]] ]
34-
; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP2]], [[WHILE_END_LOOPEXIT]] ]
31+
; CHECK-NEXT: [[PSRC_ADDR_0_LCSSA:%.*]] = phi ptr [ [[PSRC]], [[WHILE_BODY_PREHEADER]] ], [ [[SCEVGEP]], [[WHILE_END_LOOPEXIT]] ]
32+
; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[WHILE_BODY_PREHEADER]] ], [ [[TMP2]], [[WHILE_END_LOOPEXIT]] ]
3533
; CHECK-NEXT: [[AND:%.*]] = and i32 [[BLOCKSIZE]], 15
3634
; CHECK-NEXT: [[CMP2_NOT15:%.*]] = icmp eq i32 [[AND]], 0
3735
; CHECK-NEXT: br i1 [[CMP2_NOT15]], label [[WHILE_END5:%.*]], label [[VECTOR_BODY:%.*]]

llvm/test/Transforms/PhaseOrdering/X86/ctlz-loop.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ define i32 @ctlz_loop_with_abs(i32 %n) {
3232
; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
3333
; CHECK-NEXT: [[TMP1]] = lshr i32 [[N_ADDR_03]], 1
3434
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_02]], 1
35-
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp samesign ult i32 [[N_ADDR_03]], 2
35+
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP1]], 0
3636
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END]], label [[WHILE_BODY]]
3737
; CHECK: while.end:
3838
; CHECK-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC]], [[WHILE_BODY]] ]

0 commit comments

Comments
 (0)