Skip to content

Commit 000a106

Browse files
author
git apple-llvm automerger
committed
Merge commit '5b204530629c' from llvm.org/main into next
2 parents f73be3f + 5b20453 commit 000a106

File tree

3 files changed

+53
-80
lines changed

3 files changed

+53
-80
lines changed

llvm/lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1839,14 +1839,27 @@ bool CodeGenPrepare::unfoldPowerOf2Test(CmpInst *Cmp) {
18391839
/// lose; some adjustment may be wanted there.
18401840
///
18411841
/// Return true if any changes are made.
1842-
static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
1842+
static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI,
1843+
const DataLayout &DL) {
18431844
if (TLI.hasMultipleConditionRegisters(EVT::getEVT(Cmp->getType())))
18441845
return false;
18451846

18461847
// Avoid sinking soft-FP comparisons, since this can move them into a loop.
18471848
if (TLI.useSoftFloat() && isa<FCmpInst>(Cmp))
18481849
return false;
18491850

1851+
bool UsedInPhiOrCurrentBlock = any_of(Cmp->users(), [Cmp](User *U) {
1852+
return isa<PHINode>(U) ||
1853+
cast<Instruction>(U)->getParent() == Cmp->getParent();
1854+
});
1855+
1856+
// Avoid sinking larger than legal integer comparisons unless its ONLY used in
1857+
// another BB.
1858+
if (UsedInPhiOrCurrentBlock && Cmp->getOperand(0)->getType()->isIntegerTy() &&
1859+
Cmp->getOperand(0)->getType()->getScalarSizeInBits() >
1860+
DL.getLargestLegalIntTypeSizeInBits())
1861+
return false;
1862+
18501863
// Only insert a cmp in each block once.
18511864
DenseMap<BasicBlock *, CmpInst *> InsertedCmps;
18521865

@@ -2224,7 +2237,7 @@ bool CodeGenPrepare::optimizeURem(Instruction *Rem) {
22242237
}
22252238

22262239
bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
2227-
if (sinkCmpExpression(Cmp, *TLI))
2240+
if (sinkCmpExpression(Cmp, *TLI, *DL))
22282241
return true;
22292242

22302243
if (combineToUAddWithOverflow(Cmp, ModifiedDT))

llvm/test/CodeGen/RISCV/overflow-intrinsics.ll

Lines changed: 22 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ define i64 @uaddo3_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp {
232232
ret i64 %Q
233233
}
234234

235-
; TODO? CGP sinks the compare before we have a chance to form the overflow intrinsic.
235+
; Ensure CGP doesn't sink the compare before we have a chance to form the overflow intrinsic.
236236

237237
define i64 @uaddo4(i64 %a, i64 %b, i1 %c) nounwind ssp {
238238
; RV32-LABEL: uaddo4:
@@ -1076,41 +1076,37 @@ define i1 @usubo_ult_cmp_dominates_i64(i64 %x, i64 %y, ptr %p, i1 %cond) {
10761076
; RV32-NEXT: .cfi_offset s4, -24
10771077
; RV32-NEXT: .cfi_offset s5, -28
10781078
; RV32-NEXT: .cfi_offset s6, -32
1079-
; RV32-NEXT: mv s5, a5
1080-
; RV32-NEXT: mv s3, a1
1079+
; RV32-NEXT: mv s1, a5
1080+
; RV32-NEXT: mv s4, a1
10811081
; RV32-NEXT: andi a1, a5, 1
1082-
; RV32-NEXT: beqz a1, .LBB32_8
1082+
; RV32-NEXT: beqz a1, .LBB32_6
10831083
; RV32-NEXT: # %bb.1: # %t
10841084
; RV32-NEXT: mv s0, a4
1085-
; RV32-NEXT: mv s2, a3
1086-
; RV32-NEXT: mv s1, a2
1087-
; RV32-NEXT: mv s4, a0
1088-
; RV32-NEXT: beq s3, a3, .LBB32_3
1085+
; RV32-NEXT: mv s3, a3
1086+
; RV32-NEXT: mv s2, a2
1087+
; RV32-NEXT: mv s5, a0
1088+
; RV32-NEXT: beq s4, a3, .LBB32_3
10891089
; RV32-NEXT: # %bb.2: # %t
1090-
; RV32-NEXT: sltu s6, s3, s2
1090+
; RV32-NEXT: sltu s6, s4, s3
10911091
; RV32-NEXT: j .LBB32_4
10921092
; RV32-NEXT: .LBB32_3:
1093-
; RV32-NEXT: sltu s6, s4, s1
1093+
; RV32-NEXT: sltu s6, s5, s2
10941094
; RV32-NEXT: .LBB32_4: # %t
10951095
; RV32-NEXT: mv a0, s6
10961096
; RV32-NEXT: call call
1097-
; RV32-NEXT: beqz s6, .LBB32_8
1097+
; RV32-NEXT: beqz s6, .LBB32_6
10981098
; RV32-NEXT: # %bb.5: # %end
1099-
; RV32-NEXT: sltu a1, s4, s1
1100-
; RV32-NEXT: mv a0, a1
1101-
; RV32-NEXT: beq s3, s2, .LBB32_7
1102-
; RV32-NEXT: # %bb.6: # %end
1103-
; RV32-NEXT: sltu a0, s3, s2
1104-
; RV32-NEXT: .LBB32_7: # %end
1105-
; RV32-NEXT: sub a2, s3, s2
1106-
; RV32-NEXT: sub a3, s4, s1
1107-
; RV32-NEXT: sub a2, a2, a1
1108-
; RV32-NEXT: sw a3, 0(s0)
1109-
; RV32-NEXT: sw a2, 4(s0)
1110-
; RV32-NEXT: j .LBB32_9
1111-
; RV32-NEXT: .LBB32_8: # %f
1112-
; RV32-NEXT: mv a0, s5
1113-
; RV32-NEXT: .LBB32_9: # %f
1099+
; RV32-NEXT: sltu a0, s5, s2
1100+
; RV32-NEXT: sub a1, s4, s3
1101+
; RV32-NEXT: sub a2, s5, s2
1102+
; RV32-NEXT: sub a1, a1, a0
1103+
; RV32-NEXT: sw a2, 0(s0)
1104+
; RV32-NEXT: sw a1, 4(s0)
1105+
; RV32-NEXT: mv a0, s6
1106+
; RV32-NEXT: j .LBB32_7
1107+
; RV32-NEXT: .LBB32_6: # %f
1108+
; RV32-NEXT: mv a0, s1
1109+
; RV32-NEXT: .LBB32_7: # %f
11141110
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
11151111
; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
11161112
; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload

llvm/test/CodeGen/X86/pr166534.ll

Lines changed: 16 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -7,100 +7,64 @@
77
define void @pr166534(ptr %pa, ptr %pb, ptr %pc, ptr %pd) {
88
; SSE2-LABEL: pr166534:
99
; SSE2: # %bb.0: # %entry
10-
; SSE2-NEXT: movq (%rdi), %rax
11-
; SSE2-NEXT: movq 8(%rdi), %r8
1210
; SSE2-NEXT: movdqu (%rdi), %xmm0
13-
; SSE2-NEXT: movq (%rsi), %r9
14-
; SSE2-NEXT: movq 8(%rsi), %rdi
1511
; SSE2-NEXT: movdqu (%rsi), %xmm1
1612
; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
1713
; SSE2-NEXT: pmovmskb %xmm1, %esi
18-
; SSE2-NEXT: xorl %r10d, %r10d
14+
; SSE2-NEXT: xorl %eax, %eax
1915
; SSE2-NEXT: cmpl $65535, %esi # imm = 0xFFFF
20-
; SSE2-NEXT: sete %r10b
21-
; SSE2-NEXT: orq %r10, (%rdx)
16+
; SSE2-NEXT: sete %al
17+
; SSE2-NEXT: orq %rax, (%rdx)
2218
; SSE2-NEXT: cmpl $65535, %esi # imm = 0xFFFF
2319
; SSE2-NEXT: jne .LBB0_2
2420
; SSE2-NEXT: # %bb.1: # %if.then
25-
; SSE2-NEXT: xorq %r9, %rax
26-
; SSE2-NEXT: xorq %rdi, %r8
27-
; SSE2-NEXT: xorl %edx, %edx
28-
; SSE2-NEXT: orq %rax, %r8
29-
; SSE2-NEXT: sete %dl
30-
; SSE2-NEXT: orq %rdx, (%rcx)
21+
; SSE2-NEXT: orq %rax, (%rcx)
3122
; SSE2-NEXT: .LBB0_2: # %if.end
3223
; SSE2-NEXT: retq
3324
;
3425
; SSE4-LABEL: pr166534:
3526
; SSE4: # %bb.0: # %entry
36-
; SSE4-NEXT: movq (%rdi), %rax
37-
; SSE4-NEXT: movq 8(%rdi), %r8
3827
; SSE4-NEXT: movdqu (%rdi), %xmm0
39-
; SSE4-NEXT: movq (%rsi), %r9
40-
; SSE4-NEXT: movq 8(%rsi), %rdi
4128
; SSE4-NEXT: movdqu (%rsi), %xmm1
4229
; SSE4-NEXT: pxor %xmm0, %xmm1
43-
; SSE4-NEXT: xorl %esi, %esi
30+
; SSE4-NEXT: xorl %eax, %eax
4431
; SSE4-NEXT: ptest %xmm1, %xmm1
45-
; SSE4-NEXT: sete %sil
46-
; SSE4-NEXT: orq %rsi, (%rdx)
32+
; SSE4-NEXT: sete %al
33+
; SSE4-NEXT: orq %rax, (%rdx)
4734
; SSE4-NEXT: ptest %xmm1, %xmm1
4835
; SSE4-NEXT: jne .LBB0_2
4936
; SSE4-NEXT: # %bb.1: # %if.then
50-
; SSE4-NEXT: xorq %r9, %rax
51-
; SSE4-NEXT: xorq %rdi, %r8
52-
; SSE4-NEXT: xorl %edx, %edx
53-
; SSE4-NEXT: orq %rax, %r8
54-
; SSE4-NEXT: sete %dl
55-
; SSE4-NEXT: orq %rdx, (%rcx)
37+
; SSE4-NEXT: orq %rax, (%rcx)
5638
; SSE4-NEXT: .LBB0_2: # %if.end
5739
; SSE4-NEXT: retq
5840
;
5941
; AVX2-LABEL: pr166534:
6042
; AVX2: # %bb.0: # %entry
61-
; AVX2-NEXT: movq (%rdi), %rax
62-
; AVX2-NEXT: movq 8(%rdi), %r8
6343
; AVX2-NEXT: vmovdqu (%rdi), %xmm0
64-
; AVX2-NEXT: movq (%rsi), %rdi
6544
; AVX2-NEXT: vpxor (%rsi), %xmm0, %xmm0
66-
; AVX2-NEXT: movq 8(%rsi), %rsi
67-
; AVX2-NEXT: xorl %r9d, %r9d
45+
; AVX2-NEXT: xorl %eax, %eax
6846
; AVX2-NEXT: vptest %xmm0, %xmm0
69-
; AVX2-NEXT: sete %r9b
70-
; AVX2-NEXT: orq %r9, (%rdx)
47+
; AVX2-NEXT: sete %al
48+
; AVX2-NEXT: orq %rax, (%rdx)
7149
; AVX2-NEXT: vptest %xmm0, %xmm0
7250
; AVX2-NEXT: jne .LBB0_2
7351
; AVX2-NEXT: # %bb.1: # %if.then
74-
; AVX2-NEXT: xorq %rdi, %rax
75-
; AVX2-NEXT: xorq %rsi, %r8
76-
; AVX2-NEXT: xorl %edx, %edx
77-
; AVX2-NEXT: orq %rax, %r8
78-
; AVX2-NEXT: sete %dl
79-
; AVX2-NEXT: orq %rdx, (%rcx)
52+
; AVX2-NEXT: orq %rax, (%rcx)
8053
; AVX2-NEXT: .LBB0_2: # %if.end
8154
; AVX2-NEXT: retq
8255
;
8356
; AVX512-LABEL: pr166534:
8457
; AVX512: # %bb.0: # %entry
85-
; AVX512-NEXT: movq (%rdi), %rax
86-
; AVX512-NEXT: movq 8(%rdi), %r8
8758
; AVX512-NEXT: vmovdqu (%rdi), %xmm0
88-
; AVX512-NEXT: movq (%rsi), %r9
89-
; AVX512-NEXT: movq 8(%rsi), %rdi
9059
; AVX512-NEXT: vpxor (%rsi), %xmm0, %xmm0
91-
; AVX512-NEXT: xorl %esi, %esi
60+
; AVX512-NEXT: xorl %eax, %eax
9261
; AVX512-NEXT: vptest %xmm0, %xmm0
93-
; AVX512-NEXT: sete %sil
94-
; AVX512-NEXT: orq %rsi, (%rdx)
62+
; AVX512-NEXT: sete %al
63+
; AVX512-NEXT: orq %rax, (%rdx)
9564
; AVX512-NEXT: vptest %xmm0, %xmm0
9665
; AVX512-NEXT: jne .LBB0_2
9766
; AVX512-NEXT: # %bb.1: # %if.then
98-
; AVX512-NEXT: xorq %r9, %rax
99-
; AVX512-NEXT: xorq %rdi, %r8
100-
; AVX512-NEXT: xorl %edx, %edx
101-
; AVX512-NEXT: orq %rax, %r8
102-
; AVX512-NEXT: sete %dl
103-
; AVX512-NEXT: orq %rdx, (%rcx)
67+
; AVX512-NEXT: orq %rax, (%rcx)
10468
; AVX512-NEXT: .LBB0_2: # %if.end
10569
; AVX512-NEXT: retq
10670
entry:

0 commit comments

Comments
 (0)