Skip to content

Commit 5b20453

Browse files
authored
[CodeGenPrepare] sinkCmpExpression - don't sink larger than legal integer comparisons (llvm#166778)
A generic alternative to llvm#166564 - make the assumption that expanding integer comparisons will be expensive if they are larger than the largest legal type so avoid sinking if they are also used in the current BB + any phis. Fixes llvm#166534
1 parent bba40ab commit 5b20453

File tree

3 files changed

+53
-80
lines changed

3 files changed

+53
-80
lines changed

llvm/lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1839,14 +1839,27 @@ bool CodeGenPrepare::unfoldPowerOf2Test(CmpInst *Cmp) {
18391839
/// lose; some adjustment may be wanted there.
18401840
///
18411841
/// Return true if any changes are made.
1842-
static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
1842+
static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI,
1843+
const DataLayout &DL) {
18431844
if (TLI.hasMultipleConditionRegisters(EVT::getEVT(Cmp->getType())))
18441845
return false;
18451846

18461847
// Avoid sinking soft-FP comparisons, since this can move them into a loop.
18471848
if (TLI.useSoftFloat() && isa<FCmpInst>(Cmp))
18481849
return false;
18491850

1851+
bool UsedInPhiOrCurrentBlock = any_of(Cmp->users(), [Cmp](User *U) {
1852+
return isa<PHINode>(U) ||
1853+
cast<Instruction>(U)->getParent() == Cmp->getParent();
1854+
});
1855+
1856+
// Avoid sinking larger than legal integer comparisons unless its ONLY used in
1857+
// another BB.
1858+
if (UsedInPhiOrCurrentBlock && Cmp->getOperand(0)->getType()->isIntegerTy() &&
1859+
Cmp->getOperand(0)->getType()->getScalarSizeInBits() >
1860+
DL.getLargestLegalIntTypeSizeInBits())
1861+
return false;
1862+
18501863
// Only insert a cmp in each block once.
18511864
DenseMap<BasicBlock *, CmpInst *> InsertedCmps;
18521865

@@ -2224,7 +2237,7 @@ bool CodeGenPrepare::optimizeURem(Instruction *Rem) {
22242237
}
22252238

22262239
bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
2227-
if (sinkCmpExpression(Cmp, *TLI))
2240+
if (sinkCmpExpression(Cmp, *TLI, *DL))
22282241
return true;
22292242

22302243
if (combineToUAddWithOverflow(Cmp, ModifiedDT))

llvm/test/CodeGen/RISCV/overflow-intrinsics.ll

Lines changed: 22 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ define i64 @uaddo3_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp {
232232
ret i64 %Q
233233
}
234234

235-
; TODO? CGP sinks the compare before we have a chance to form the overflow intrinsic.
235+
; Ensure CGP doesn't sink the compare before we have a chance to form the overflow intrinsic.
236236

237237
define i64 @uaddo4(i64 %a, i64 %b, i1 %c) nounwind ssp {
238238
; RV32-LABEL: uaddo4:
@@ -1076,41 +1076,37 @@ define i1 @usubo_ult_cmp_dominates_i64(i64 %x, i64 %y, ptr %p, i1 %cond) {
10761076
; RV32-NEXT: .cfi_offset s4, -24
10771077
; RV32-NEXT: .cfi_offset s5, -28
10781078
; RV32-NEXT: .cfi_offset s6, -32
1079-
; RV32-NEXT: mv s5, a5
1080-
; RV32-NEXT: mv s3, a1
1079+
; RV32-NEXT: mv s1, a5
1080+
; RV32-NEXT: mv s4, a1
10811081
; RV32-NEXT: andi a1, a5, 1
1082-
; RV32-NEXT: beqz a1, .LBB32_8
1082+
; RV32-NEXT: beqz a1, .LBB32_6
10831083
; RV32-NEXT: # %bb.1: # %t
10841084
; RV32-NEXT: mv s0, a4
1085-
; RV32-NEXT: mv s2, a3
1086-
; RV32-NEXT: mv s1, a2
1087-
; RV32-NEXT: mv s4, a0
1088-
; RV32-NEXT: beq s3, a3, .LBB32_3
1085+
; RV32-NEXT: mv s3, a3
1086+
; RV32-NEXT: mv s2, a2
1087+
; RV32-NEXT: mv s5, a0
1088+
; RV32-NEXT: beq s4, a3, .LBB32_3
10891089
; RV32-NEXT: # %bb.2: # %t
1090-
; RV32-NEXT: sltu s6, s3, s2
1090+
; RV32-NEXT: sltu s6, s4, s3
10911091
; RV32-NEXT: j .LBB32_4
10921092
; RV32-NEXT: .LBB32_3:
1093-
; RV32-NEXT: sltu s6, s4, s1
1093+
; RV32-NEXT: sltu s6, s5, s2
10941094
; RV32-NEXT: .LBB32_4: # %t
10951095
; RV32-NEXT: mv a0, s6
10961096
; RV32-NEXT: call call
1097-
; RV32-NEXT: beqz s6, .LBB32_8
1097+
; RV32-NEXT: beqz s6, .LBB32_6
10981098
; RV32-NEXT: # %bb.5: # %end
1099-
; RV32-NEXT: sltu a1, s4, s1
1100-
; RV32-NEXT: mv a0, a1
1101-
; RV32-NEXT: beq s3, s2, .LBB32_7
1102-
; RV32-NEXT: # %bb.6: # %end
1103-
; RV32-NEXT: sltu a0, s3, s2
1104-
; RV32-NEXT: .LBB32_7: # %end
1105-
; RV32-NEXT: sub a2, s3, s2
1106-
; RV32-NEXT: sub a3, s4, s1
1107-
; RV32-NEXT: sub a2, a2, a1
1108-
; RV32-NEXT: sw a3, 0(s0)
1109-
; RV32-NEXT: sw a2, 4(s0)
1110-
; RV32-NEXT: j .LBB32_9
1111-
; RV32-NEXT: .LBB32_8: # %f
1112-
; RV32-NEXT: mv a0, s5
1113-
; RV32-NEXT: .LBB32_9: # %f
1099+
; RV32-NEXT: sltu a0, s5, s2
1100+
; RV32-NEXT: sub a1, s4, s3
1101+
; RV32-NEXT: sub a2, s5, s2
1102+
; RV32-NEXT: sub a1, a1, a0
1103+
; RV32-NEXT: sw a2, 0(s0)
1104+
; RV32-NEXT: sw a1, 4(s0)
1105+
; RV32-NEXT: mv a0, s6
1106+
; RV32-NEXT: j .LBB32_7
1107+
; RV32-NEXT: .LBB32_6: # %f
1108+
; RV32-NEXT: mv a0, s1
1109+
; RV32-NEXT: .LBB32_7: # %f
11141110
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
11151111
; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
11161112
; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload

llvm/test/CodeGen/X86/pr166534.ll

Lines changed: 16 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -7,100 +7,64 @@
77
define void @pr166534(ptr %pa, ptr %pb, ptr %pc, ptr %pd) {
88
; SSE2-LABEL: pr166534:
99
; SSE2: # %bb.0: # %entry
10-
; SSE2-NEXT: movq (%rdi), %rax
11-
; SSE2-NEXT: movq 8(%rdi), %r8
1210
; SSE2-NEXT: movdqu (%rdi), %xmm0
13-
; SSE2-NEXT: movq (%rsi), %r9
14-
; SSE2-NEXT: movq 8(%rsi), %rdi
1511
; SSE2-NEXT: movdqu (%rsi), %xmm1
1612
; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
1713
; SSE2-NEXT: pmovmskb %xmm1, %esi
18-
; SSE2-NEXT: xorl %r10d, %r10d
14+
; SSE2-NEXT: xorl %eax, %eax
1915
; SSE2-NEXT: cmpl $65535, %esi # imm = 0xFFFF
20-
; SSE2-NEXT: sete %r10b
21-
; SSE2-NEXT: orq %r10, (%rdx)
16+
; SSE2-NEXT: sete %al
17+
; SSE2-NEXT: orq %rax, (%rdx)
2218
; SSE2-NEXT: cmpl $65535, %esi # imm = 0xFFFF
2319
; SSE2-NEXT: jne .LBB0_2
2420
; SSE2-NEXT: # %bb.1: # %if.then
25-
; SSE2-NEXT: xorq %r9, %rax
26-
; SSE2-NEXT: xorq %rdi, %r8
27-
; SSE2-NEXT: xorl %edx, %edx
28-
; SSE2-NEXT: orq %rax, %r8
29-
; SSE2-NEXT: sete %dl
30-
; SSE2-NEXT: orq %rdx, (%rcx)
21+
; SSE2-NEXT: orq %rax, (%rcx)
3122
; SSE2-NEXT: .LBB0_2: # %if.end
3223
; SSE2-NEXT: retq
3324
;
3425
; SSE4-LABEL: pr166534:
3526
; SSE4: # %bb.0: # %entry
36-
; SSE4-NEXT: movq (%rdi), %rax
37-
; SSE4-NEXT: movq 8(%rdi), %r8
3827
; SSE4-NEXT: movdqu (%rdi), %xmm0
39-
; SSE4-NEXT: movq (%rsi), %r9
40-
; SSE4-NEXT: movq 8(%rsi), %rdi
4128
; SSE4-NEXT: movdqu (%rsi), %xmm1
4229
; SSE4-NEXT: pxor %xmm0, %xmm1
43-
; SSE4-NEXT: xorl %esi, %esi
30+
; SSE4-NEXT: xorl %eax, %eax
4431
; SSE4-NEXT: ptest %xmm1, %xmm1
45-
; SSE4-NEXT: sete %sil
46-
; SSE4-NEXT: orq %rsi, (%rdx)
32+
; SSE4-NEXT: sete %al
33+
; SSE4-NEXT: orq %rax, (%rdx)
4734
; SSE4-NEXT: ptest %xmm1, %xmm1
4835
; SSE4-NEXT: jne .LBB0_2
4936
; SSE4-NEXT: # %bb.1: # %if.then
50-
; SSE4-NEXT: xorq %r9, %rax
51-
; SSE4-NEXT: xorq %rdi, %r8
52-
; SSE4-NEXT: xorl %edx, %edx
53-
; SSE4-NEXT: orq %rax, %r8
54-
; SSE4-NEXT: sete %dl
55-
; SSE4-NEXT: orq %rdx, (%rcx)
37+
; SSE4-NEXT: orq %rax, (%rcx)
5638
; SSE4-NEXT: .LBB0_2: # %if.end
5739
; SSE4-NEXT: retq
5840
;
5941
; AVX2-LABEL: pr166534:
6042
; AVX2: # %bb.0: # %entry
61-
; AVX2-NEXT: movq (%rdi), %rax
62-
; AVX2-NEXT: movq 8(%rdi), %r8
6343
; AVX2-NEXT: vmovdqu (%rdi), %xmm0
64-
; AVX2-NEXT: movq (%rsi), %rdi
6544
; AVX2-NEXT: vpxor (%rsi), %xmm0, %xmm0
66-
; AVX2-NEXT: movq 8(%rsi), %rsi
67-
; AVX2-NEXT: xorl %r9d, %r9d
45+
; AVX2-NEXT: xorl %eax, %eax
6846
; AVX2-NEXT: vptest %xmm0, %xmm0
69-
; AVX2-NEXT: sete %r9b
70-
; AVX2-NEXT: orq %r9, (%rdx)
47+
; AVX2-NEXT: sete %al
48+
; AVX2-NEXT: orq %rax, (%rdx)
7149
; AVX2-NEXT: vptest %xmm0, %xmm0
7250
; AVX2-NEXT: jne .LBB0_2
7351
; AVX2-NEXT: # %bb.1: # %if.then
74-
; AVX2-NEXT: xorq %rdi, %rax
75-
; AVX2-NEXT: xorq %rsi, %r8
76-
; AVX2-NEXT: xorl %edx, %edx
77-
; AVX2-NEXT: orq %rax, %r8
78-
; AVX2-NEXT: sete %dl
79-
; AVX2-NEXT: orq %rdx, (%rcx)
52+
; AVX2-NEXT: orq %rax, (%rcx)
8053
; AVX2-NEXT: .LBB0_2: # %if.end
8154
; AVX2-NEXT: retq
8255
;
8356
; AVX512-LABEL: pr166534:
8457
; AVX512: # %bb.0: # %entry
85-
; AVX512-NEXT: movq (%rdi), %rax
86-
; AVX512-NEXT: movq 8(%rdi), %r8
8758
; AVX512-NEXT: vmovdqu (%rdi), %xmm0
88-
; AVX512-NEXT: movq (%rsi), %r9
89-
; AVX512-NEXT: movq 8(%rsi), %rdi
9059
; AVX512-NEXT: vpxor (%rsi), %xmm0, %xmm0
91-
; AVX512-NEXT: xorl %esi, %esi
60+
; AVX512-NEXT: xorl %eax, %eax
9261
; AVX512-NEXT: vptest %xmm0, %xmm0
93-
; AVX512-NEXT: sete %sil
94-
; AVX512-NEXT: orq %rsi, (%rdx)
62+
; AVX512-NEXT: sete %al
63+
; AVX512-NEXT: orq %rax, (%rdx)
9564
; AVX512-NEXT: vptest %xmm0, %xmm0
9665
; AVX512-NEXT: jne .LBB0_2
9766
; AVX512-NEXT: # %bb.1: # %if.then
98-
; AVX512-NEXT: xorq %r9, %rax
99-
; AVX512-NEXT: xorq %rdi, %r8
100-
; AVX512-NEXT: xorl %edx, %edx
101-
; AVX512-NEXT: orq %rax, %r8
102-
; AVX512-NEXT: sete %dl
103-
; AVX512-NEXT: orq %rdx, (%rcx)
67+
; AVX512-NEXT: orq %rax, (%rcx)
10468
; AVX512-NEXT: .LBB0_2: # %if.end
10569
; AVX512-NEXT: retq
10670
entry:

0 commit comments

Comments
 (0)