@@ -1990,38 +1990,38 @@ define <8 x half> @maxnum_v8f16(<8 x half> %0, <8 x half> %1) #0 {
19901990define void @pr63114 () {
19911991; CHECK-LIBCALL-LABEL: pr63114:
19921992; CHECK-LIBCALL: # %bb.0:
1993- ; CHECK-LIBCALL-NEXT: movdqu (%rax), %xmm4
1994- ; CHECK-LIBCALL-NEXT: pshuflw {{.*#+}} xmm0 = xmm4[0,1,3,3,4,5,6,7]
1995- ; CHECK-LIBCALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,1 ]
1996- ; CHECK-LIBCALL-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,0,65535,65535,65535,65535]
1997- ; CHECK-LIBCALL-NEXT: pand %xmm1 , %xmm0
1998- ; CHECK-LIBCALL-NEXT: movq {{.*#+}} xmm2 = [0,0,0,15360,0,0,0,0]
1999- ; CHECK-LIBCALL-NEXT: por % xmm2, % xmm0
2000- ; CHECK-LIBCALL-NEXT: movdqa {{.*#+}} xmm3 = [65535,65535,65535,65535,65535,65535,65535,0]
2001- ; CHECK-LIBCALL-NEXT: pand %xmm3, %xmm0
2002- ; CHECK-LIBCALL-NEXT: movdqa {{.*#+}} xmm5 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,60]
2003- ; CHECK-LIBCALL-NEXT: por %xmm5, % xmm0
2004- ; CHECK-LIBCALL-NEXT: pshufhw {{.*#+}} xmm6 = xmm4[0,1,2,3,4,5,7,7]
2005- ; CHECK-LIBCALL-NEXT: pshufd {{.*#+}} xmm6 = xmm6[0,2,2,3]
2006- ; CHECK-LIBCALL-NEXT: pand %xmm1 , %xmm6
2007- ; CHECK-LIBCALL-NEXT: por %xmm2 , %xmm6
2008- ; CHECK-LIBCALL-NEXT: pand %xmm3 , %xmm6
2009- ; CHECK-LIBCALL-NEXT: por %xmm5, %xmm6
2010- ; CHECK-LIBCALL-NEXT: pshufhw {{.*#+}} xmm7 = xmm4[0,1,2,3,5,5,5,5 ]
2011- ; CHECK-LIBCALL-NEXT: shufps {{.*#+}} xmm4 = xmm4 [0,3,0,3 ]
2012- ; CHECK-LIBCALL-NEXT: pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,5,5,5,5 ]
2013- ; CHECK-LIBCALL-NEXT: pand %xmm1, % xmm4
2014- ; CHECK-LIBCALL-NEXT: por %xmm2, % xmm4
2015- ; CHECK-LIBCALL-NEXT: pand %xmm3, %xmm4
2016- ; CHECK-LIBCALL-NEXT: por %xmm5, %xmm4
2017- ; CHECK-LIBCALL-NEXT: pand %xmm1, %xmm7
2018- ; CHECK-LIBCALL-NEXT: por %xmm2, %xmm7
2019- ; CHECK-LIBCALL-NEXT: pand %xmm3, %xmm7
2020- ; CHECK-LIBCALL-NEXT: por %xmm5, %xmm7
2021- ; CHECK-LIBCALL-NEXT: movdqu %xmm7, 0
2022- ; CHECK-LIBCALL-NEXT: movdqu %xmm4, 32
2023- ; CHECK-LIBCALL-NEXT: movdqu %xmm6, 48
2024- ; CHECK-LIBCALL-NEXT: movdqu %xmm0, 16
1993+ ; CHECK-LIBCALL-NEXT: movups (%rax), %xmm0
1994+ ; CHECK-LIBCALL-NEXT: movaps % xmm0, %xmm1
1995+ ; CHECK-LIBCALL-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3 ]
1996+ ; CHECK-LIBCALL-NEXT: movaps %xmm0, %xmm3
1997+ ; CHECK-LIBCALL-NEXT: psrld $16 , %xmm3
1998+ ; CHECK-LIBCALL-NEXT: movaps %xmm0, % xmm2
1999+ ; CHECK-LIBCALL-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1], xmm0[1]
2000+ ; CHECK-LIBCALL-NEXT: movaps %xmm0, %xmm5
2001+ ; CHECK-LIBCALL-NEXT: psrldq {{.*#+}} xmm5 = xmm5[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2002+ ; CHECK-LIBCALL-NEXT: movaps %xmm0, %xmm4
2003+ ; CHECK-LIBCALL-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1], xmm0[1,1]
2004+ ; CHECK-LIBCALL-NEXT: movaps %xmm0, % xmm6
2005+ ; CHECK-LIBCALL-NEXT: psrldq {{.*#+}} xmm6 = xmm6[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2006+ ; CHECK-LIBCALL-NEXT: movaps %xmm0 , %xmm7
2007+ ; CHECK-LIBCALL-NEXT: psrlq $48 , %xmm7
2008+ ; CHECK-LIBCALL-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip) , %xmm8
2009+ ; CHECK-LIBCALL-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm8[0],xmm7[1],xmm8[1],xmm7[2],xmm8[2],xmm7[3],xmm8[3]
2010+ ; CHECK-LIBCALL-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm8[0],xmm0[1],xmm8[1],xmm0[2],xmm8[2],xmm0[3],xmm8[3 ]
2011+ ; CHECK-LIBCALL-NEXT: shufps {{.*#+}} xmm0 = xmm0 [0,0],xmm7[0,0 ]
2012+ ; CHECK-LIBCALL-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm8[0],xmm6[1],xmm8[1],xmm6[2],xmm8[2],xmm6[3],xmm8[3 ]
2013+ ; CHECK-LIBCALL-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm8[0],xmm4[1],xmm8[1],xmm4[2],xmm8[2],xmm4[3],xmm8[3]
2014+ ; CHECK-LIBCALL-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,0],xmm6[0,0]
2015+ ; CHECK-LIBCALL-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm8[0],xmm5[1],xmm8[1],xmm5[2],xmm8[2],xmm5[3],xmm8[3]
2016+ ; CHECK-LIBCALL-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm8[0],xmm2[1],xmm8[1],xmm2[2],xmm8[2],xmm2[3],xmm8[3]
2017+ ; CHECK-LIBCALL-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm5[0,0]
2018+ ; CHECK-LIBCALL-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3]
2019+ ; CHECK-LIBCALL-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm8[0],xmm1[1],xmm8[1],xmm1[2],xmm8[2],xmm1[3],xmm8[3]
2020+ ; CHECK-LIBCALL-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm3[0,0]
2021+ ; CHECK-LIBCALL-NEXT: movups %xmm1, 32
2022+ ; CHECK-LIBCALL-NEXT: movups %xmm2, 48
2023+ ; CHECK-LIBCALL-NEXT: movups %xmm4, 0
2024+ ; CHECK-LIBCALL-NEXT: movups %xmm0, 16
20252025; CHECK-LIBCALL-NEXT: retq
20262026;
20272027; BWON-F16C-LABEL: pr63114:
@@ -2055,38 +2055,43 @@ define void @pr63114() {
20552055;
20562056; CHECK-I686-LABEL: pr63114:
20572057; CHECK-I686: # %bb.0:
2058- ; CHECK-I686-NEXT: movdqu (%eax), %xmm6
2059- ; CHECK-I686-NEXT: pshuflw {{.*#+}} xmm0 = xmm6[0,1,3,3,4,5,6,7]
2060- ; CHECK-I686-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,1]
2061- ; CHECK-I686-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,0,65535,65535,65535,65535]
2062- ; CHECK-I686-NEXT: pand %xmm1, %xmm0
2063- ; CHECK-I686-NEXT: movq {{.*#+}} xmm2 = [0,0,0,15360,0,0,0,0]
2064- ; CHECK-I686-NEXT: por %xmm2, %xmm0
2065- ; CHECK-I686-NEXT: movdqa {{.*#+}} xmm3 = [65535,65535,65535,65535,65535,65535,65535,0]
2066- ; CHECK-I686-NEXT: pand %xmm3, %xmm0
2067- ; CHECK-I686-NEXT: movdqa {{.*#+}} xmm4 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,60]
2068- ; CHECK-I686-NEXT: por %xmm4, %xmm0
2069- ; CHECK-I686-NEXT: pshufhw {{.*#+}} xmm5 = xmm6[0,1,2,3,4,5,7,7]
2070- ; CHECK-I686-NEXT: pshufd {{.*#+}} xmm5 = xmm5[0,2,2,3]
2071- ; CHECK-I686-NEXT: pand %xmm1, %xmm5
2072- ; CHECK-I686-NEXT: por %xmm2, %xmm5
2073- ; CHECK-I686-NEXT: pand %xmm3, %xmm5
2074- ; CHECK-I686-NEXT: por %xmm4, %xmm5
2075- ; CHECK-I686-NEXT: pshufhw {{.*#+}} xmm7 = xmm6[0,1,2,3,5,5,5,5]
2076- ; CHECK-I686-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,3,0,3]
2077- ; CHECK-I686-NEXT: pshufhw {{.*#+}} xmm6 = xmm6[0,1,2,3,5,5,5,5]
2078- ; CHECK-I686-NEXT: pand %xmm1, %xmm6
2079- ; CHECK-I686-NEXT: por %xmm2, %xmm6
2080- ; CHECK-I686-NEXT: pand %xmm3, %xmm6
2081- ; CHECK-I686-NEXT: por %xmm4, %xmm6
2082- ; CHECK-I686-NEXT: pand %xmm1, %xmm7
2083- ; CHECK-I686-NEXT: por %xmm2, %xmm7
2084- ; CHECK-I686-NEXT: pand %xmm3, %xmm7
2085- ; CHECK-I686-NEXT: por %xmm4, %xmm7
2086- ; CHECK-I686-NEXT: movdqu %xmm7, 0
2087- ; CHECK-I686-NEXT: movdqu %xmm6, 32
2088- ; CHECK-I686-NEXT: movdqu %xmm5, 48
2089- ; CHECK-I686-NEXT: movdqu %xmm0, 16
2058+ ; CHECK-I686-NEXT: subl $28, %esp
2059+ ; CHECK-I686-NEXT: .cfi_def_cfa_offset 32
2060+ ; CHECK-I686-NEXT: movdqu (%eax), %xmm0
2061+ ; CHECK-I686-NEXT: movdqa %xmm0, %xmm3
2062+ ; CHECK-I686-NEXT: movdqa %xmm0, (%esp) # 16-byte Spill
2063+ ; CHECK-I686-NEXT: movdqa %xmm0, %xmm2
2064+ ; CHECK-I686-NEXT: movdqa %xmm0, %xmm5
2065+ ; CHECK-I686-NEXT: movdqa %xmm0, %xmm4
2066+ ; CHECK-I686-NEXT: movdqa %xmm0, %xmm7
2067+ ; CHECK-I686-NEXT: psrlq $48, %xmm7
2068+ ; CHECK-I686-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}, %xmm6
2069+ ; CHECK-I686-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm6[0],xmm7[1],xmm6[1],xmm7[2],xmm6[2],xmm7[3],xmm6[3]
2070+ ; CHECK-I686-NEXT: movdqa %xmm0, %xmm1
2071+ ; CHECK-I686-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3]
2072+ ; CHECK-I686-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm7[0,0]
2073+ ; CHECK-I686-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1,1,1]
2074+ ; CHECK-I686-NEXT: psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2075+ ; CHECK-I686-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3]
2076+ ; CHECK-I686-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm6[0],xmm4[1],xmm6[1],xmm4[2],xmm6[2],xmm4[3],xmm6[3]
2077+ ; CHECK-I686-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,0],xmm1[0,0]
2078+ ; CHECK-I686-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1,1]
2079+ ; CHECK-I686-NEXT: psrldq {{.*#+}} xmm5 = xmm5[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2080+ ; CHECK-I686-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
2081+ ; CHECK-I686-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1],xmm2[2],xmm6[2],xmm2[3],xmm6[3]
2082+ ; CHECK-I686-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm5[0,0]
2083+ ; CHECK-I686-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,3,3,3]
2084+ ; CHECK-I686-NEXT: movdqa (%esp), %xmm1 # 16-byte Reload
2085+ ; CHECK-I686-NEXT: psrld $16, %xmm1
2086+ ; CHECK-I686-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3]
2087+ ; CHECK-I686-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm6[0],xmm3[1],xmm6[1],xmm3[2],xmm6[2],xmm3[3],xmm6[3]
2088+ ; CHECK-I686-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,0],xmm1[0,0]
2089+ ; CHECK-I686-NEXT: movups %xmm3, 32
2090+ ; CHECK-I686-NEXT: movups %xmm2, 48
2091+ ; CHECK-I686-NEXT: movups %xmm4, 0
2092+ ; CHECK-I686-NEXT: movups %xmm0, 16
2093+ ; CHECK-I686-NEXT: addl $28, %esp
2094+ ; CHECK-I686-NEXT: .cfi_def_cfa_offset 4
20902095; CHECK-I686-NEXT: retl
20912096 %1 = load <24 x half >, ptr poison, align 2
20922097 %2 = shufflevector <24 x half > %1 , <24 x half > poison, <8 x i32 > <i32 2 , i32 5 , i32 8 , i32 11 , i32 14 , i32 17 , i32 20 , i32 23 >
0 commit comments