Skip to content

Commit 55f236a

Browse files
committed
[InstCombine] Optimize sub(sext(add(x,y)),sext(add(x,z))).
This pattern can be often met in Flang generated LLVM IR, for example, for the counts of the loops generated for array expressions like: `a(x:x+y)` or `a(x+z:x+z)` or their variations. In order to compute the loop count, Flang needs to subtract the lower bound of the array slice from the upper bound of the array slice. To avoid the sign wraps, it sign extends the original values (that may be of any user data type) to `i64`. This peephole is really helpful in CPU2017/548.exchange2, where we have multiple following statements like this: ``` block(row+1:row+2, 7:9, i7) = block(row+1:row+2, 7:9, i7) - 10 ``` While this is just a 2x3 iterations loop nest, LLVM cannot figure it out, ending up vectorizing the inner loop really hard (with a vector epilog and scalar remainder). This, in turn, causes problems for LSR that ends up creating too many loop-carried values in the loop containing the above statement, which are then causing too many spills/reloads. Alive2: https://alive2.llvm.org/ce/z/gLgfYX Related to #143219.
1 parent 7f69cd5 commit 55f236a

File tree

2 files changed

+145
-0
lines changed

2 files changed

+145
-0
lines changed

llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2807,6 +2807,62 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
28072807
if (Instruction *Res = foldBinOpOfSelectAndCastOfSelectCondition(I))
28082808
return Res;
28092809

2810+
// (sub[ nsw][ nuw] (sext (add nsw (X, Y)), sext (X))) --> (sext (Y))
2811+
{
2812+
Value *Add0;
2813+
if (match(Op0, m_SExt(m_Value(Add0))) &&
2814+
match(Add0, m_Add(m_Value(X), m_Value(Y))) &&
2815+
match(Op1, m_SExt(m_Specific(X)))) {
2816+
auto *OBO0 = cast<OverflowingBinaryOperator>(Add0);
2817+
if (OBO0->hasNoSignedWrap()) {
2818+
// Non-constant Y requires new SExt.
2819+
unsigned numOfNewInstrs = !isa<Constant>(Y) ? 1 : 0;
2820+
// Check if we can trade some of the old instructions for the new ones.
2821+
unsigned numOfDeadInstrs = 0;
2822+
numOfDeadInstrs += Op0->hasOneUse() ? 1 : 0;
2823+
numOfDeadInstrs += Op1->hasOneUse() ? 1 : 0;
2824+
numOfDeadInstrs += Add0->hasOneUse() ? 1 : 0;
2825+
if (numOfDeadInstrs >= numOfNewInstrs) {
2826+
Value *SExtY = Builder.CreateSExt(Y, I.getType());
2827+
return replaceInstUsesWith(I, SExtY);
2828+
}
2829+
}
2830+
}
2831+
}
2832+
2833+
// (sub[ nsw] (sext (add nsw (X, Y)), sext (add nsw (X, Z)))) -->
2834+
// --> (sub[ nsw] (sext (Y), sext(Z)))
2835+
{
2836+
Value *Z, *Add0, *Add1;
2837+
if (match(Op0, m_SExt(m_Value(Add0))) &&
2838+
match(Add0, m_Add(m_Value(X), m_Value(Y))) &&
2839+
match(Op1, m_SExt(m_Value(Add1))) &&
2840+
match(Add1, m_Add(m_Specific(X), m_Value(Z)))) {
2841+
auto *OBO0 = cast<OverflowingBinaryOperator>(Add0);
2842+
auto *OBO1 = cast<OverflowingBinaryOperator>(Add1);
2843+
if (OBO0->hasNoSignedWrap() && OBO1->hasNoSignedWrap()) {
2844+
unsigned numOfNewInstrs = 0;
2845+
// Non-constant Y, Z require new SExt.
2846+
numOfNewInstrs += !isa<Constant>(Y) ? 1 : 0;
2847+
numOfNewInstrs += !isa<Constant>(Z) ? 1 : 0;
2848+
// Check if we can trade some of the old instructions for the new ones.
2849+
unsigned numOfDeadInstrs = 0;
2850+
numOfDeadInstrs += Op0->hasOneUse() ? 1 : 0;
2851+
numOfDeadInstrs += Op1->hasOneUse() ? 1 : 0;
2852+
numOfDeadInstrs += Add0->hasOneUse() ? 1 : 0;
2853+
numOfDeadInstrs += Add1->hasOneUse() ? 1 : 0;
2854+
if (numOfDeadInstrs >= numOfNewInstrs) {
2855+
Value *SExtY = Builder.CreateSExt(Y, I.getType());
2856+
Value *SExtZ = Builder.CreateSExt(Z, I.getType());
2857+
Value *Sub = Builder.CreateSub(SExtY, SExtZ, "",
2858+
/* HasNUW */ false,
2859+
/* HasNSW */ I.hasNoSignedWrap());
2860+
return replaceInstUsesWith(I, Sub);
2861+
}
2862+
}
2863+
}
2864+
}
2865+
28102866
return TryToNarrowDeduceFlags();
28112867
}
28122868

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
3+
4+
define i64 @src_2add_2sext_sub(i32 %x, i32 %y, i32 %z) {
5+
; CHECK-LABEL: define i64 @src_2add_2sext_sub(
6+
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[Z:%.*]]) {
7+
; CHECK-NEXT: [[SEXT1:%.*]] = sext i32 [[Y]] to i64
8+
; CHECK-NEXT: [[SEXT2:%.*]] = sext i32 [[Z]] to i64
9+
; CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 [[SEXT1]], [[SEXT2]]
10+
; CHECK-NEXT: ret i64 [[SUB]]
11+
;
12+
%add1 = add nsw i32 %x, %y
13+
%add2 = add nsw i32 %x, %z
14+
%sext1 = sext i32 %add1 to i64
15+
%sext2 = sext i32 %add2 to i64
16+
%sub = sub i64 %sext1, %sext2
17+
ret i64 %sub
18+
}
19+
20+
define i64 @src_2add_2sext_sub_nsw(i32 %x, i32 %y, i32 %z) {
21+
; CHECK-LABEL: define i64 @src_2add_2sext_sub_nsw(
22+
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[Z:%.*]]) {
23+
; CHECK-NEXT: [[SEXT1:%.*]] = sext i32 [[Y]] to i64
24+
; CHECK-NEXT: [[SEXT2:%.*]] = sext i32 [[Z]] to i64
25+
; CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 [[SEXT1]], [[SEXT2]]
26+
; CHECK-NEXT: ret i64 [[SUB]]
27+
;
28+
%add1 = add nsw i32 %x, %y
29+
%add2 = add nsw i32 %x, %z
30+
%sext1 = sext i32 %add1 to i64
31+
%sext2 = sext i32 %add2 to i64
32+
%sub = sub nsw i64 %sext1, %sext2
33+
ret i64 %sub
34+
}
35+
36+
define i64 @src_2add_2sext_sub_nuw(i32 %x, i32 %y, i32 %z) {
37+
; CHECK-LABEL: define i64 @src_2add_2sext_sub_nuw(
38+
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[Z:%.*]]) {
39+
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[Y]] to i64
40+
; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[Z]] to i64
41+
; CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 [[TMP1]], [[TMP2]]
42+
; CHECK-NEXT: ret i64 [[SUB]]
43+
;
44+
%add1 = add nsw i32 %x, %y
45+
%add2 = add nsw i32 %x, %z
46+
%sext1 = sext i32 %add1 to i64
47+
%sext2 = sext i32 %add2 to i64
48+
%sub = sub nuw i64 %sext1, %sext2
49+
ret i64 %sub
50+
}
51+
52+
define i64 @src_x_add_2sext_sub(i32 %x, i32 %y) {
53+
; CHECK-LABEL: define i64 @src_x_add_2sext_sub(
54+
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
55+
; CHECK-NEXT: [[SUB:%.*]] = sext i32 [[Y]] to i64
56+
; CHECK-NEXT: ret i64 [[SUB]]
57+
;
58+
%add1 = add nsw i32 %x, %y
59+
%sext1 = sext i32 %add1 to i64
60+
%sext2 = sext i32 %x to i64
61+
%sub = sub i64 %sext1, %sext2
62+
ret i64 %sub
63+
}
64+
65+
define i64 @src_x_add_2sext_sub_nsw(i32 %x, i32 %y) {
66+
; CHECK-LABEL: define i64 @src_x_add_2sext_sub_nsw(
67+
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
68+
; CHECK-NEXT: [[SUB:%.*]] = sext i32 [[Y]] to i64
69+
; CHECK-NEXT: ret i64 [[SUB]]
70+
;
71+
%add1 = add nsw i32 %x, %y
72+
%sext1 = sext i32 %add1 to i64
73+
%sext2 = sext i32 %x to i64
74+
%sub = sub nsw i64 %sext1, %sext2
75+
ret i64 %sub
76+
}
77+
78+
define i64 @src_x_add_2sext_sub_nuw(i32 %x, i32 %y) {
79+
; CHECK-LABEL: define i64 @src_x_add_2sext_sub_nuw(
80+
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
81+
; CHECK-NEXT: [[SUB:%.*]] = sext i32 [[Y]] to i64
82+
; CHECK-NEXT: ret i64 [[SUB]]
83+
;
84+
%add1 = add nsw i32 %x, %y
85+
%sext1 = sext i32 %add1 to i64
86+
%sext2 = sext i32 %x to i64
87+
%sub = sub nuw i64 %sext1, %sext2
88+
ret i64 %sub
89+
}

0 commit comments

Comments
 (0)