@@ -135,40 +135,39 @@ define i8 @v8i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) {
135
135
define i8 @v8f64 (<8 x double > %a , <8 x double > %b , <8 x double > %c , <8 x double > %d ) {
136
136
; SSE-LABEL: v8f64:
137
137
; SSE: # BB#0:
138
+ ; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm8
138
139
; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm9
139
140
; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm10
140
- ; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm8
141
141
; SSE-NEXT: movapd {{[0-9]+}}(%rsp), %xmm11
142
- ; SSE-NEXT: cmpltpd %xmm3, %xmm7
143
- ; SSE-NEXT: cmpltpd %xmm2, %xmm6
144
- ; SSE-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,2],xmm7[0,2]
145
- ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,4,5,6,7,0,1,4,5,8,9,12,13]
146
- ; SSE-NEXT: pshufb %xmm2, %xmm6
147
142
; SSE-NEXT: cmpltpd %xmm1, %xmm5
148
143
; SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,2,2,3]
149
144
; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm5[0,2,2,3,4,5,6,7]
150
145
; SSE-NEXT: cmpltpd %xmm0, %xmm4
151
146
; SSE-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,2,2,3]
152
147
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm4[0,2,2,3,4,5,6,7]
153
148
; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
154
- ; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm6[4,5,6,7]
149
+ ; SSE-NEXT: cmpltpd %xmm3, %xmm7
150
+ ; SSE-NEXT: cmpltpd %xmm2, %xmm6
151
+ ; SSE-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,2],xmm7[0,2]
152
+ ; SSE-NEXT: packssdw %xmm6, %xmm6
153
+ ; SSE-NEXT: pblendw {{.*#+}} xmm6 = xmm0[0,1,2,3],xmm6[4,5,6,7]
155
154
; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm11
156
- ; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm8
157
- ; SSE-NEXT: shufps {{.*#+}} xmm8 = xmm8[0,2],xmm11[0,2]
158
- ; SSE-NEXT: pshufb %xmm2, %xmm8
155
+ ; SSE-NEXT: shufps {{.*#+}} xmm11 = xmm11[0,2,2,3]
156
+ ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm11[0,2,2,3,4,5,6,7]
159
157
; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm10
160
158
; SSE-NEXT: shufps {{.*#+}} xmm10 = xmm10[0,2,2,3]
161
159
; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm10[0,2,2,3,4,5,6,7]
160
+ ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
162
161
; SSE-NEXT: cmpltpd {{[0-9]+}}(%rsp), %xmm9
163
- ; SSE-NEXT: shufps {{.*# +}} xmm9 = xmm9[0,2,2,3]
164
- ; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm9 [0,2,2,3,4,5,6,7 ]
165
- ; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
166
- ; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2 [0,1,2,3],xmm8[4,5,6,7]
167
- ; SSE-NEXT: pand %xmm0 , %xmm2
168
- ; SSE-NEXT: psllw $15, %xmm2
169
- ; SSE-NEXT: psraw $15, %xmm2
170
- ; SSE-NEXT: packsswb %xmm0, %xmm2
171
- ; SSE-NEXT: pmovmskb %xmm2 , %eax
162
+ ; SSE-NEXT: cmpltpd {{[0-9] +}}(%rsp), %xmm8
163
+ ; SSE-NEXT: shufps {{.*#+}} xmm8 = xmm8 [0,2],xmm9[0,2 ]
164
+ ; SSE-NEXT: packssdw %xmm8, %xmm8
165
+ ; SSE-NEXT: pblendw {{.*#+}} xmm8 = xmm1 [0,1,2,3],xmm8[4,5,6,7]
166
+ ; SSE-NEXT: pand %xmm6 , %xmm8
167
+ ; SSE-NEXT: psllw $15, %xmm8
168
+ ; SSE-NEXT: psraw $15, %xmm8
169
+ ; SSE-NEXT: packsswb %xmm0, %xmm8
170
+ ; SSE-NEXT: pmovmskb %xmm8 , %eax
172
171
; SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
173
172
; SSE-NEXT: ret{{[l|q]}}
174
173
;
@@ -718,37 +717,23 @@ define i16 @v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x floa
718
717
; SSE-LABEL: v16f32:
719
718
; SSE: # BB#0:
720
719
; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm8
721
- ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm10
722
720
; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm9
721
+ ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm10
723
722
; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm11
724
723
; SSE-NEXT: cmpltps %xmm3, %xmm7
725
- ; SSE-NEXT: movdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
726
- ; SSE-NEXT: pshufb %xmm3, %xmm7
727
724
; SSE-NEXT: cmpltps %xmm2, %xmm6
728
- ; SSE-NEXT: pshufb %xmm3, %xmm6
729
- ; SSE-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm7[0]
730
- ; SSE-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
731
- ; SSE-NEXT: pshufb %xmm2, %xmm6
725
+ ; SSE-NEXT: packssdw %xmm7, %xmm6
732
726
; SSE-NEXT: cmpltps %xmm1, %xmm5
733
- ; SSE-NEXT: pshufb %xmm3, %xmm5
734
727
; SSE-NEXT: cmpltps %xmm0, %xmm4
735
- ; SSE-NEXT: pshufb %xmm3, %xmm4
736
- ; SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm5[0]
737
- ; SSE-NEXT: pshufb %xmm2, %xmm4
738
- ; SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm6[0]
728
+ ; SSE-NEXT: packssdw %xmm5, %xmm4
729
+ ; SSE-NEXT: packsswb %xmm6, %xmm4
739
730
; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm11
740
- ; SSE-NEXT: pshufb %xmm3, %xmm11
741
- ; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm9
742
- ; SSE-NEXT: pshufb %xmm3, %xmm9
743
- ; SSE-NEXT: punpcklqdq {{.*#+}} xmm9 = xmm9[0],xmm11[0]
744
- ; SSE-NEXT: pshufb %xmm2, %xmm9
745
731
; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm10
746
- ; SSE-NEXT: pshufb %xmm3, %xmm10
732
+ ; SSE-NEXT: packssdw %xmm11, %xmm10
733
+ ; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm9
747
734
; SSE-NEXT: cmpltps {{[0-9]+}}(%rsp), %xmm8
748
- ; SSE-NEXT: pshufb %xmm3, %xmm8
749
- ; SSE-NEXT: punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm10[0]
750
- ; SSE-NEXT: pshufb %xmm2, %xmm8
751
- ; SSE-NEXT: punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm9[0]
735
+ ; SSE-NEXT: packssdw %xmm9, %xmm8
736
+ ; SSE-NEXT: packsswb %xmm10, %xmm8
752
737
; SSE-NEXT: pand %xmm4, %xmm8
753
738
; SSE-NEXT: pmovmskb %xmm8, %eax
754
739
; SSE-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
@@ -759,22 +744,17 @@ define i16 @v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x floa
759
744
; AVX12-NEXT: vcmpltps %ymm1, %ymm3, %ymm1
760
745
; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm3
761
746
; AVX12-NEXT: vpackssdw %xmm3, %xmm1, %xmm1
762
- ; AVX12-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
763
- ; AVX12-NEXT: vpshufb %xmm3, %xmm1, %xmm1
764
747
; AVX12-NEXT: vcmpltps %ymm0, %ymm2, %ymm0
765
748
; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm2
766
749
; AVX12-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
767
- ; AVX12-NEXT: vpshufb %xmm3, %xmm0, %xmm0
768
- ; AVX12-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
750
+ ; AVX12-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
769
751
; AVX12-NEXT: vcmpltps %ymm5, %ymm7, %ymm1
770
752
; AVX12-NEXT: vextractf128 $1, %ymm1, %xmm2
771
753
; AVX12-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
772
- ; AVX12-NEXT: vpshufb %xmm3, %xmm1, %xmm1
773
754
; AVX12-NEXT: vcmpltps %ymm4, %ymm6, %ymm2
774
- ; AVX12-NEXT: vextractf128 $1, %ymm2, %xmm4
775
- ; AVX12-NEXT: vpackssdw %xmm4, %xmm2, %xmm2
776
- ; AVX12-NEXT: vpshufb %xmm3, %xmm2, %xmm2
777
- ; AVX12-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
755
+ ; AVX12-NEXT: vextractf128 $1, %ymm2, %xmm3
756
+ ; AVX12-NEXT: vpackssdw %xmm3, %xmm2, %xmm2
757
+ ; AVX12-NEXT: vpacksswb %xmm1, %xmm2, %xmm1
778
758
; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
779
759
; AVX12-NEXT: vpmovmskb %xmm0, %eax
780
760
; AVX12-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
0 commit comments