@@ -88,53 +88,47 @@ define <4 x i32> @combine_pmaddwd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) {
8888 ret <4 x i32 > %4
8989}
9090
91- ; TODO
92- define i32 @combine_pmaddwd_constant () {
91+ ; TODO: [2] = (-5*13)+(6*-15) = -155 = 4294967141
92+ define < 4 x i32 > @combine_pmaddwd_constant () {
9393; SSE-LABEL: combine_pmaddwd_constant:
9494; SSE: # %bb.0:
9595; SSE-NEXT: pmovsxbw {{.*#+}} xmm0 = [65535,2,3,65532,65531,6,7,65528]
9696; SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [65531,7,65527,65525,13,65521,17,65517]
97- ; SSE-NEXT: pextrd $2, %xmm0, %eax
9897; SSE-NEXT: retq
9998;
10099; AVX-LABEL: combine_pmaddwd_constant:
101100; AVX: # %bb.0:
102101; AVX-NEXT: vpmovsxbw {{.*#+}} xmm0 = [65535,2,3,65532,65531,6,7,65528]
103102; AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [65531,7,65527,65525,13,65521,17,65517]
104- ; AVX-NEXT: vpextrd $2, %xmm0, %eax
105103; AVX-NEXT: retq
106104 %1 = call <4 x i32 > @llvm.x86.sse2.pmadd.wd (<8 x i16 > <i16 -1 , i16 2 , i16 3 , i16 -4 , i16 -5 , i16 6 , i16 7 , i16 -8 >, <8 x i16 > <i16 -5 , i16 7 , i16 -9 , i16 -11 , i16 13 , i16 -15 , i16 17 , i16 -19 >)
107- %2 = extractelement <4 x i32 > %1 , i32 2 ; (-5*13)+(6*-15) = -155
108- ret i32 %2
105+ ret <4 x i32 > %1
109106}
110107
111108; ensure we don't assume pmaddwd performs add nsw
112- define i32 @combine_pmaddwd_constant_nsw () {
109+ ; TODO: (-32768*-32768)+(-32768*-32768) = 0x80000000 = 2147483648
110+ define <4 x i32 > @combine_pmaddwd_constant_nsw () {
113111; SSE-LABEL: combine_pmaddwd_constant_nsw:
114112; SSE: # %bb.0:
115113; SSE-NEXT: movdqa {{.*#+}} xmm0 = [32768,32768,32768,32768,32768,32768,32768,32768]
116114; SSE-NEXT: pmaddwd %xmm0, %xmm0
117- ; SSE-NEXT: movd %xmm0, %eax
118115; SSE-NEXT: retq
119116;
120117; AVX1-LABEL: combine_pmaddwd_constant_nsw:
121118; AVX1: # %bb.0:
122119; AVX1-NEXT: vbroadcastss {{.*#+}} xmm0 = [32768,32768,32768,32768,32768,32768,32768,32768]
123120; AVX1-NEXT: vpmaddwd %xmm0, %xmm0, %xmm0
124- ; AVX1-NEXT: vmovd %xmm0, %eax
125121; AVX1-NEXT: retq
126122;
127123; AVX2-LABEL: combine_pmaddwd_constant_nsw:
128124; AVX2: # %bb.0:
129125; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm0 = [32768,32768,32768,32768,32768,32768,32768,32768]
130126; AVX2-NEXT: vpmaddwd %xmm0, %xmm0, %xmm0
131- ; AVX2-NEXT: vmovd %xmm0, %eax
132127; AVX2-NEXT: retq
133128 %1 = insertelement <8 x i16 > undef , i16 32768 , i32 0
134129 %2 = shufflevector <8 x i16 > %1 , <8 x i16 > undef , <8 x i32 > zeroinitializer
135130 %3 = call <4 x i32 > @llvm.x86.sse2.pmadd.wd (<8 x i16 > %2 , <8 x i16 > %2 )
136- %4 = extractelement <4 x i32 > %3 , i32 0 ; (-32768*-32768)+(-32768*-32768) = 0x80000000
137- ret i32 %4
131+ ret <4 x i32 > %3
138132}
139133
140134define <8 x i16 > @combine_pmaddubsw_zero (<16 x i8 > %a0 , <16 x i8 > %a1 ) {
0 commit comments