Skip to content

Commit a5b02e1

Browse files
authored
[X86] Add EltsFromConsecutiveLoads test for infinite loop if we match reverse(vzload(ptr)) patterns (#170889)
This was fixed by #170852 - previously we're get stuck in a loop: shuffle(movq(p),undef,1,0) -> bulid_vector(0, p[0]) -> shuffle(movq(p),1,0) .....
1 parent a5c751e commit a5b02e1

File tree

1 file changed

+90
-3
lines changed

1 file changed

+90
-3
lines changed

llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll

Lines changed: 90 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
33
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
4-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX
5-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX
6-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX
4+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
5+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
6+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512
77
;
88
; 32-bit SSE tests to make sure we do reasonable things.
99
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse | FileCheck %s --check-prefixes=X86-SSE,X86-SSE1
@@ -1637,3 +1637,90 @@ define <4 x i32> @load_i32_zext_i128_v4i32(ptr %ptr) {
16371637
%3 = bitcast i128 %2 to <4 x i32>
16381638
ret <4 x i32> %3
16391639
}
1640+
1641+
; Don't attempt to reverse a partial VZEXT_LOAD
1642+
define <4 x i32> @no_reverse_vzload(ptr %p0) nounwind {
1643+
; SSE2-LABEL: no_reverse_vzload:
1644+
; SSE2: # %bb.0:
1645+
; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1646+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
1647+
; SSE2-NEXT: pxor %xmm2, %xmm2
1648+
; SSE2-NEXT: paddd %xmm1, %xmm1
1649+
; SSE2-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1]
1650+
; SSE2-NEXT: pcmpgtd %xmm2, %xmm0
1651+
; SSE2-NEXT: retq
1652+
;
1653+
; SSE41-LABEL: no_reverse_vzload:
1654+
; SSE41: # %bb.0:
1655+
; SSE41-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
1656+
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
1657+
; SSE41-NEXT: pxor %xmm2, %xmm2
1658+
; SSE41-NEXT: paddd %xmm1, %xmm1
1659+
; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
1660+
; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
1661+
; SSE41-NEXT: retq
1662+
;
1663+
; AVX1-LABEL: no_reverse_vzload:
1664+
; AVX1: # %bb.0:
1665+
; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
1666+
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1667+
; AVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm2
1668+
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
1669+
; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
1670+
; AVX1-NEXT: retq
1671+
;
1672+
; AVX2-LABEL: no_reverse_vzload:
1673+
; AVX2: # %bb.0:
1674+
; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1675+
; AVX2-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
1676+
; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1677+
; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
1678+
; AVX2-NEXT: retq
1679+
;
1680+
; AVX512-LABEL: no_reverse_vzload:
1681+
; AVX512: # %bb.0:
1682+
; AVX512-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
1683+
; AVX512-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
1684+
; AVX512-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1685+
; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
1686+
; AVX512-NEXT: retq
1687+
;
1688+
; X86-SSE1-LABEL: no_reverse_vzload:
1689+
; X86-SSE1: # %bb.0:
1690+
; X86-SSE1-NEXT: pushl %ebx
1691+
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
1692+
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %edx
1693+
; X86-SSE1-NEXT: xorl %ecx, %ecx
1694+
; X86-SSE1-NEXT: cmpl $0, (%edx)
1695+
; X86-SSE1-NEXT: setg %cl
1696+
; X86-SSE1-NEXT: negl %ecx
1697+
; X86-SSE1-NEXT: xorl %ebx, %ebx
1698+
; X86-SSE1-NEXT: cmpl $0, 4(%edx)
1699+
; X86-SSE1-NEXT: setg %bl
1700+
; X86-SSE1-NEXT: negl %ebx
1701+
; X86-SSE1-NEXT: movl %ebx, 4(%eax)
1702+
; X86-SSE1-NEXT: movl %ecx, (%eax)
1703+
; X86-SSE1-NEXT: movl $0, 12(%eax)
1704+
; X86-SSE1-NEXT: movl $0, 8(%eax)
1705+
; X86-SSE1-NEXT: popl %ebx
1706+
; X86-SSE1-NEXT: retl $4
1707+
;
1708+
; X86-SSE41-LABEL: no_reverse_vzload:
1709+
; X86-SSE41: # %bb.0:
1710+
; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
1711+
; X86-SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1712+
; X86-SSE41-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0]
1713+
; X86-SSE41-NEXT: pxor %xmm2, %xmm2
1714+
; X86-SSE41-NEXT: paddd %xmm1, %xmm1
1715+
; X86-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
1716+
; X86-SSE41-NEXT: pcmpgtd %xmm1, %xmm0
1717+
; X86-SSE41-NEXT: retl
1718+
%i0 = load <2 x i32>, ptr %p0, align 4
1719+
%i1 = shufflevector <2 x i32> %i0, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
1720+
%i2 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %i1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1721+
%i3 = shl <4 x i32> %i2, <i32 4, i32 4, i32 1, i32 1>
1722+
%i4 = shufflevector <4 x i32> %i1, <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1723+
%i5 = icmp slt <4 x i32> %i3, %i4
1724+
%i6 = sext <4 x i1> %i5 to <4 x i32>
1725+
ret <4 x i32> %i6
1726+
}

0 commit comments

Comments
 (0)