Skip to content

Commit 053436d

Browse files
xortatormemfrob
authored andcommitted
Revert "[Codegenprepare][X86] Use usub with overflow opt for IV increment"
This reverts commit 3d15b7e7dfc3e2cefc47791d1e8d95909e937842. We've found an internal failure, need to analyze.
1 parent 68b3d0b commit 053436d

File tree

4 files changed

+38
-68
lines changed

4 files changed

+38
-68
lines changed

llvm/lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 2 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1284,29 +1284,7 @@ bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
12841284
Value *Arg0, Value *Arg1,
12851285
CmpInst *Cmp,
12861286
Intrinsic::ID IID) {
1287-
auto isIVIncrement = [this, &Cmp](BinaryOperator *BO) {
1288-
auto *PN = dyn_cast<PHINode>(BO->getOperand(0));
1289-
if (!PN)
1290-
return false;
1291-
const Loop *L = LI->getLoopFor(BO->getParent());
1292-
if (!L || L->getHeader() != PN->getParent() || !L->getLoopLatch())
1293-
return false;
1294-
if (PN->getIncomingValueForBlock(L->getLoopLatch()) != BO)
1295-
return false;
1296-
if (auto *Step = dyn_cast<Instruction>(BO->getOperand(1)))
1297-
if (L->contains(Step->getParent()))
1298-
return false;
1299-
// IV increment may have other users than the IV. We do not want to make
1300-
// dominance queries to analyze the legality of moving it towards the cmp,
1301-
// so just check that there is no other users.
1302-
if (!BO->hasOneUse())
1303-
return false;
1304-
// Do not risk on moving increment into a child loop.
1305-
if (LI->getLoopFor(Cmp->getParent()) != L)
1306-
return false;
1307-
return true;
1308-
};
1309-
if (BO->getParent() != Cmp->getParent() && !isIVIncrement(BO)) {
1287+
if (BO->getParent() != Cmp->getParent()) {
13101288
// We used to use a dominator tree here to allow multi-block optimization.
13111289
// But that was problematic because:
13121290
// 1. It could cause a perf regression by hoisting the math op into the
@@ -1317,16 +1295,9 @@ bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
13171295
// This is because we recompute the DT on every change in the main CGP
13181296
// run-loop. The recomputing is probably unnecessary in many cases, so if
13191297
// that was fixed, using a DT here would be ok.
1320-
//
1321-
// There is one important particular case we still want to handle: if BO is
1322-
// the IV increment. Important properties that make it profitable:
1323-
// - We can speculate IV increment anywhere in the loop (as long as the
1324-
// indvar Phi is its only user);
1325-
// - Upon computing Cmp, we effectively compute something equivalent to the
1326-
// IV increment (despite it loops differently in the IR). So moving it up
1327-
// to the cmp point does not really increase register pressure.
13281298
return false;
13291299
}
1300+
13301301
// We allow matching the canonical IR (add X, C) back to (usubo X, -C).
13311302
if (BO->getOpcode() == Instruction::Add &&
13321303
IID == Intrinsic::usub_with_overflow) {

llvm/test/CodeGen/X86/2020_12_02_decrementing_loop.ll

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -89,16 +89,15 @@ failure: ; preds = %backedge
8989
define i32 @test_02(i32* %p, i64 %len, i32 %x) {
9090
; CHECK-LABEL: test_02:
9191
; CHECK: ## %bb.0: ## %entry
92-
; CHECK-NEXT: movq %rsi, %rax
9392
; CHECK-NEXT: .p2align 4, 0x90
9493
; CHECK-NEXT: LBB2_1: ## %loop
9594
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
96-
; CHECK-NEXT: subq $1, %rax
97-
; CHECK-NEXT: jb LBB2_4
95+
; CHECK-NEXT: testq %rsi, %rsi
96+
; CHECK-NEXT: je LBB2_4
9897
; CHECK-NEXT: ## %bb.2: ## %backedge
9998
; CHECK-NEXT: ## in Loop: Header=BB2_1 Depth=1
10099
; CHECK-NEXT: cmpl %edx, -4(%rdi,%rsi,4)
101-
; CHECK-NEXT: movq %rax, %rsi
100+
; CHECK-NEXT: leaq -1(%rsi), %rsi
102101
; CHECK-NEXT: jne LBB2_1
103102
; CHECK-NEXT: ## %bb.3: ## %failure
104103
; CHECK-NEXT: ud2
@@ -133,16 +132,15 @@ failure: ; preds = %backedge
133132
define i32 @test_03(i32* %p, i64 %len, i32 %x) {
134133
; CHECK-LABEL: test_03:
135134
; CHECK: ## %bb.0: ## %entry
136-
; CHECK-NEXT: movq %rsi, %rax
137135
; CHECK-NEXT: .p2align 4, 0x90
138136
; CHECK-NEXT: LBB3_1: ## %loop
139137
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
140-
; CHECK-NEXT: subq $1, %rax
141-
; CHECK-NEXT: jb LBB3_4
138+
; CHECK-NEXT: testq %rsi, %rsi
139+
; CHECK-NEXT: je LBB3_4
142140
; CHECK-NEXT: ## %bb.2: ## %backedge
143141
; CHECK-NEXT: ## in Loop: Header=BB3_1 Depth=1
144142
; CHECK-NEXT: cmpl %edx, -4(%rdi,%rsi,4)
145-
; CHECK-NEXT: movq %rax, %rsi
143+
; CHECK-NEXT: leaq -1(%rsi), %rsi
146144
; CHECK-NEXT: jne LBB3_1
147145
; CHECK-NEXT: ## %bb.3: ## %failure
148146
; CHECK-NEXT: ud2

llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,11 @@ define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r
1616
; GENERIC-NEXT: movl (%rdx), %eax
1717
; GENERIC-NEXT: movl 4(%rdx), %ebx
1818
; GENERIC-NEXT: decl %ecx
19-
; GENERIC-NEXT: leaq 20(%rdx), %r11
19+
; GENERIC-NEXT: leaq 20(%rdx), %r14
2020
; GENERIC-NEXT: movq _Te0@{{.*}}(%rip), %r9
2121
; GENERIC-NEXT: movq _Te1@{{.*}}(%rip), %r8
2222
; GENERIC-NEXT: movq _Te3@{{.*}}(%rip), %r10
23-
; GENERIC-NEXT: movq %rcx, %r14
23+
; GENERIC-NEXT: movq %rcx, %r11
2424
; GENERIC-NEXT: .p2align 4, 0x90
2525
; GENERIC-NEXT: LBB0_1: ## %bb
2626
; GENERIC-NEXT: ## =>This Inner Loop Header: Depth=1
@@ -32,29 +32,30 @@ define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r
3232
; GENERIC-NEXT: movzbl %bpl, %ebp
3333
; GENERIC-NEXT: movl (%r8,%rbp,4), %ebp
3434
; GENERIC-NEXT: xorl (%r9,%rax,4), %ebp
35-
; GENERIC-NEXT: xorl -12(%r11), %ebp
35+
; GENERIC-NEXT: xorl -12(%r14), %ebp
3636
; GENERIC-NEXT: shrl $24, %ebx
3737
; GENERIC-NEXT: movl (%r10,%rdi,4), %edi
3838
; GENERIC-NEXT: xorl (%r9,%rbx,4), %edi
39-
; GENERIC-NEXT: xorl -8(%r11), %edi
39+
; GENERIC-NEXT: xorl -8(%r14), %edi
4040
; GENERIC-NEXT: movl %ebp, %eax
4141
; GENERIC-NEXT: shrl $24, %eax
4242
; GENERIC-NEXT: movl (%r9,%rax,4), %eax
43-
; GENERIC-NEXT: subq $1, %r14
44-
; GENERIC-NEXT: jb LBB0_3
43+
; GENERIC-NEXT: testq %r11, %r11
44+
; GENERIC-NEXT: je LBB0_3
4545
; GENERIC-NEXT: ## %bb.2: ## %bb1
4646
; GENERIC-NEXT: ## in Loop: Header=BB0_1 Depth=1
4747
; GENERIC-NEXT: movl %edi, %ebx
4848
; GENERIC-NEXT: shrl $16, %ebx
4949
; GENERIC-NEXT: movzbl %bl, %ebx
5050
; GENERIC-NEXT: xorl (%r8,%rbx,4), %eax
51-
; GENERIC-NEXT: xorl -4(%r11), %eax
51+
; GENERIC-NEXT: xorl -4(%r14), %eax
5252
; GENERIC-NEXT: shrl $24, %edi
5353
; GENERIC-NEXT: movzbl %bpl, %ebx
5454
; GENERIC-NEXT: movl (%r10,%rbx,4), %ebx
5555
; GENERIC-NEXT: xorl (%r9,%rdi,4), %ebx
56-
; GENERIC-NEXT: xorl (%r11), %ebx
57-
; GENERIC-NEXT: addq $16, %r11
56+
; GENERIC-NEXT: xorl (%r14), %ebx
57+
; GENERIC-NEXT: decq %r11
58+
; GENERIC-NEXT: addq $16, %r14
5859
; GENERIC-NEXT: jmp LBB0_1
5960
; GENERIC-NEXT: LBB0_3: ## %bb2
6061
; GENERIC-NEXT: shlq $4, %rcx
@@ -98,12 +99,12 @@ define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r
9899
; ATOM-NEXT: ## kill: def $ecx killed $ecx def $rcx
99100
; ATOM-NEXT: movl (%rdx), %r15d
100101
; ATOM-NEXT: movl 4(%rdx), %eax
101-
; ATOM-NEXT: leaq 20(%rdx), %r11
102+
; ATOM-NEXT: leaq 20(%rdx), %r14
102103
; ATOM-NEXT: movq _Te0@{{.*}}(%rip), %r9
103104
; ATOM-NEXT: movq _Te1@{{.*}}(%rip), %r8
104105
; ATOM-NEXT: movq _Te3@{{.*}}(%rip), %r10
105106
; ATOM-NEXT: decl %ecx
106-
; ATOM-NEXT: movq %rcx, %r14
107+
; ATOM-NEXT: movq %rcx, %r11
107108
; ATOM-NEXT: .p2align 4, 0x90
108109
; ATOM-NEXT: LBB0_1: ## %bb
109110
; ATOM-NEXT: ## =>This Inner Loop Header: Depth=1
@@ -117,27 +118,28 @@ define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r
117118
; ATOM-NEXT: movzbl %r15b, %edi
118119
; ATOM-NEXT: xorl (%r9,%rbp,4), %ebx
119120
; ATOM-NEXT: movl (%r10,%rdi,4), %edi
120-
; ATOM-NEXT: xorl -12(%r11), %ebx
121+
; ATOM-NEXT: xorl -12(%r14), %ebx
121122
; ATOM-NEXT: xorl (%r9,%rax,4), %edi
122123
; ATOM-NEXT: movl %ebx, %eax
123-
; ATOM-NEXT: xorl -8(%r11), %edi
124+
; ATOM-NEXT: xorl -8(%r14), %edi
124125
; ATOM-NEXT: shrl $24, %eax
125126
; ATOM-NEXT: movl (%r9,%rax,4), %r15d
126-
; ATOM-NEXT: subq $1, %r14
127+
; ATOM-NEXT: testq %r11, %r11
127128
; ATOM-NEXT: movl %edi, %eax
128-
; ATOM-NEXT: jb LBB0_3
129+
; ATOM-NEXT: je LBB0_3
129130
; ATOM-NEXT: ## %bb.2: ## %bb1
130131
; ATOM-NEXT: ## in Loop: Header=BB0_1 Depth=1
131132
; ATOM-NEXT: shrl $16, %eax
132133
; ATOM-NEXT: shrl $24, %edi
133-
; ATOM-NEXT: movzbl %al, %eax
134-
; ATOM-NEXT: xorl (%r8,%rax,4), %r15d
134+
; ATOM-NEXT: decq %r11
135+
; ATOM-NEXT: movzbl %al, %ebp
135136
; ATOM-NEXT: movzbl %bl, %eax
136137
; ATOM-NEXT: movl (%r10,%rax,4), %eax
137-
; ATOM-NEXT: xorl -4(%r11), %r15d
138+
; ATOM-NEXT: xorl (%r8,%rbp,4), %r15d
138139
; ATOM-NEXT: xorl (%r9,%rdi,4), %eax
139-
; ATOM-NEXT: xorl (%r11), %eax
140-
; ATOM-NEXT: addq $16, %r11
140+
; ATOM-NEXT: xorl -4(%r14), %r15d
141+
; ATOM-NEXT: xorl (%r14), %eax
142+
; ATOM-NEXT: addq $16, %r14
141143
; ATOM-NEXT: jmp LBB0_1
142144
; ATOM-NEXT: LBB0_3: ## %bb2
143145
; ATOM-NEXT: shrl $16, %eax

llvm/test/CodeGen/X86/usub_inc_iv.ll

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -102,19 +102,18 @@ define i32 @test_02(i32* %p, i64 %len, i32 %x) {
102102
; CHECK-NEXT: entry:
103103
; CHECK-NEXT: br label [[LOOP:%.*]]
104104
; CHECK: loop:
105-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[MATH:%.*]], [[BACKEDGE:%.*]] ], [ [[LEN:%.*]], [[ENTRY:%.*]] ]
106-
; CHECK-NEXT: [[TMP0:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[IV]], i64 1)
107-
; CHECK-NEXT: [[MATH]] = extractvalue { i64, i1 } [[TMP0]], 0
108-
; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1
109-
; CHECK-NEXT: br i1 [[OV]], label [[EXIT:%.*]], label [[BACKEDGE]]
105+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[LEN:%.*]], [[ENTRY:%.*]] ]
106+
; CHECK-NEXT: [[COND_1:%.*]] = icmp eq i64 [[IV]], 0
107+
; CHECK-NEXT: br i1 [[COND_1]], label [[EXIT:%.*]], label [[BACKEDGE]]
110108
; CHECK: backedge:
111109
; CHECK-NEXT: [[SUNKADDR:%.*]] = mul i64 [[IV]], 4
112-
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[P:%.*]] to i8*
113-
; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr i8, i8* [[TMP1]], i64 [[SUNKADDR]]
110+
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to i8*
111+
; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 [[SUNKADDR]]
114112
; CHECK-NEXT: [[SUNKADDR2:%.*]] = getelementptr i8, i8* [[SUNKADDR1]], i64 -4
115-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[SUNKADDR2]] to i32*
116-
; CHECK-NEXT: [[LOADED:%.*]] = load atomic i32, i32* [[TMP2]] unordered, align 4
113+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[SUNKADDR2]] to i32*
114+
; CHECK-NEXT: [[LOADED:%.*]] = load atomic i32, i32* [[TMP1]] unordered, align 4
117115
; CHECK-NEXT: [[COND_2:%.*]] = icmp eq i32 [[LOADED]], [[X:%.*]]
116+
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], -1
118117
; CHECK-NEXT: br i1 [[COND_2]], label [[FAILURE:%.*]], label [[LOOP]]
119118
; CHECK: exit:
120119
; CHECK-NEXT: ret i32 -1

0 commit comments

Comments
 (0)