@@ -170,25 +170,26 @@ define <2 x i64> @var_shuffle_zero_v2i64(<2 x i64> %v, <2 x i64> %indices) nounw
170170; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
171171; AVX512-NEXT: vpmovsxbq {{.*#+}} xmm2 = [3,3]
172172; AVX512-NEXT: vpcmpnleuq %zmm2, %zmm1, %k1
173- ; AVX512-NEXT: vpcmpeqd %xmm2 , %xmm2 , %xmm2
174- ; AVX512-NEXT: vmovdqa64 %zmm2 , %zmm1 {%k1}
175- ; AVX512-NEXT: vpaddq %xmm1 , %xmm1 , %xmm1
176- ; AVX512-NEXT: vpermilpd %xmm1 , %xmm0, %xmm0
177- ; AVX512-NEXT: vpxor %xmm1 , %xmm1 , %xmm1
178- ; AVX512-NEXT: vmovdqa64 %zmm1 , %zmm0 {%k1}
173+ ; AVX512-NEXT: vpcmpeqd %xmm3 , %xmm3 , %xmm3
174+ ; AVX512-NEXT: vpblendmq %zmm3 , %zmm1, %zmm3 {%k1}
175+ ; AVX512-NEXT: vpaddq %xmm3 , %xmm3 , %xmm3
176+ ; AVX512-NEXT: vpermilpd %xmm3 , %xmm0, %xmm0
177+ ; AVX512-NEXT: vpcmpleuq %zmm2 , %zmm1 , %k1
178+ ; AVX512-NEXT: vmovdqa64 %zmm0 , %zmm0 {%k1} {z }
179179; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
180180; AVX512-NEXT: vzeroupper
181181; AVX512-NEXT: retq
182182;
183183; AVX512VL-LABEL: var_shuffle_zero_v2i64:
184184; AVX512VL: # %bb.0:
185- ; AVX512VL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %k1
186- ; AVX512VL-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
187- ; AVX512VL-NEXT: vmovdqa64 %xmm2, %xmm1 {%k1}
188- ; AVX512VL-NEXT: vpaddq %xmm1, %xmm1, %xmm1
189- ; AVX512VL-NEXT: vpermilpd %xmm1, %xmm0, %xmm0
190- ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
191- ; AVX512VL-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1}
185+ ; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [3,3]
186+ ; AVX512VL-NEXT: vpcmpnleuq %xmm2, %xmm1, %k1
187+ ; AVX512VL-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
188+ ; AVX512VL-NEXT: vpblendmq %xmm3, %xmm1, %xmm3 {%k1}
189+ ; AVX512VL-NEXT: vpaddq %xmm3, %xmm3, %xmm3
190+ ; AVX512VL-NEXT: vpermilpd %xmm3, %xmm0, %xmm0
191+ ; AVX512VL-NEXT: vpcmpleuq %xmm2, %xmm1, %k1
192+ ; AVX512VL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
192193; AVX512VL-NEXT: retq
193194 %cmp = icmp ugt <2 x i64 > %indices , <i64 3 , i64 3 >
194195 %or = select <2 x i1 > %cmp , <2 x i64 > <i64 -1 , i64 -1 >, <2 x i64 > %indices
@@ -355,24 +356,26 @@ define <4 x i32> @var_shuffle_zero_v4i32(<4 x i32> %v, <4 x i32> %indices) nounw
355356; AVX512-LABEL: var_shuffle_zero_v4i32:
356357; AVX512: # %bb.0:
357358; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
358- ; AVX512-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %k1
359- ; AVX512-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
360- ; AVX512-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1}
361- ; AVX512-NEXT: vpermilps %xmm1, %xmm0, %xmm0
362- ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
363- ; AVX512-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
359+ ; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [3,3,3,3]
360+ ; AVX512-NEXT: vpcmpnleud %zmm2, %zmm1, %k1
361+ ; AVX512-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
362+ ; AVX512-NEXT: vpblendmd %zmm3, %zmm1, %zmm3 {%k1}
363+ ; AVX512-NEXT: vpermilps %xmm3, %xmm0, %xmm0
364+ ; AVX512-NEXT: vpcmpleud %zmm2, %zmm1, %k1
365+ ; AVX512-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
364366; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
365367; AVX512-NEXT: vzeroupper
366368; AVX512-NEXT: retq
367369;
368370; AVX512VL-LABEL: var_shuffle_zero_v4i32:
369371; AVX512VL: # %bb.0:
370- ; AVX512VL-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %k1
371- ; AVX512VL-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
372- ; AVX512VL-NEXT: vmovdqa32 %xmm2, %xmm1 {%k1}
373- ; AVX512VL-NEXT: vpermilps %xmm1, %xmm0, %xmm0
374- ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
375- ; AVX512VL-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1}
372+ ; AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [3,3,3,3]
373+ ; AVX512VL-NEXT: vpcmpnleud %xmm2, %xmm1, %k1
374+ ; AVX512VL-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
375+ ; AVX512VL-NEXT: vpblendmd %xmm3, %xmm1, %xmm3 {%k1}
376+ ; AVX512VL-NEXT: vpermilps %xmm3, %xmm0, %xmm0
377+ ; AVX512VL-NEXT: vpcmpleud %xmm2, %xmm1, %k1
378+ ; AVX512VL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
376379; AVX512VL-NEXT: retq
377380 %cmp = icmp ugt <4 x i32 > %indices , <i32 3 , i32 3 , i32 3 , i32 3 >
378381 %or = select <4 x i1 > %cmp , <4 x i32 > <i32 -1 , i32 -1 , i32 -1 , i32 -1 >, <4 x i32 > %indices
@@ -600,12 +603,12 @@ define <8 x i16> @var_shuffle_zero_v8i16(<8 x i16> %v, <8 x i16> %indices) nounw
600603;
601604; AVX512VL-LABEL: var_shuffle_zero_v8i16:
602605; AVX512VL: # %bb.0:
603- ; AVX512VL-NEXT: vpcmpnleuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
604- ; AVX512VL-NEXT: vpcmpeqd %xmm2, %xmm2 , %xmm2
605- ; AVX512VL-NEXT: vmovdqu16 %xmm2 , %xmm1 {%k1}
606- ; AVX512VL-NEXT: vpermw %xmm0 , %xmm1, %xmm0
607- ; AVX512VL-NEXT: vpxor %xmm1 , %xmm1, %xmm1
608- ; AVX512VL-NEXT: vmovdqu16 % xmm1, %xmm0 {%k1 }
606+ ; AVX512VL-NEXT: vpbroadcastw {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7]
607+ ; AVX512VL-NEXT: vpcmpnleuw %xmm2, %xmm1 , %k1
608+ ; AVX512VL-NEXT: vpcmpeqd %xmm3 , %xmm3, %xmm3
609+ ; AVX512VL-NEXT: vpcmpleuw %xmm2 , %xmm1, %k2
610+ ; AVX512VL-NEXT: vmovdqu16 %xmm3 , %xmm1 {%k1}
611+ ; AVX512VL-NEXT: vpermw %xmm0, % xmm1, %xmm0 {%k2} {z }
609612; AVX512VL-NEXT: retq
610613 %cmp = icmp ugt <8 x i16 > %indices , <i16 7 , i16 7 , i16 7 , i16 7 , i16 7 , i16 7 , i16 7 , i16 7 >
611614 %or = select <8 x i1 > %cmp , <8 x i16 > <i16 -1 , i16 -1 , i16 -1 , i16 -1 , i16 -1 , i16 -1 , i16 -1 , i16 -1 >, <8 x i16 > %indices
@@ -923,12 +926,12 @@ define <16 x i8> @var_shuffle_zero_v16i8(<16 x i8> %v, <16 x i8> %indices) nounw
923926;
924927; AVX512VL-LABEL: var_shuffle_zero_v16i8:
925928; AVX512VL: # %bb.0:
926- ; AVX512VL-NEXT: vpcmpnleub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
927- ; AVX512VL-NEXT: vpcmpeqd %xmm2, %xmm2 , %xmm2
928- ; AVX512VL-NEXT: vmovdqu8 %xmm2 , %xmm1 {%k1}
929- ; AVX512VL-NEXT: vpshufb %xmm1 , %xmm0 , %xmm0
930- ; AVX512VL-NEXT: vpxor %xmm1 , %xmm1, %xmm1
931- ; AVX512VL-NEXT: vmovdqu8 %xmm1, %xmm0 {%k1 }
929+ ; AVX512VL-NEXT: vpbroadcastb {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
930+ ; AVX512VL-NEXT: vpcmpnleub %xmm2, %xmm1 , %k1
931+ ; AVX512VL-NEXT: vpcmpeqd %xmm3 , %xmm3, %xmm3
932+ ; AVX512VL-NEXT: vpcmpleub %xmm2 , %xmm1 , %k2
933+ ; AVX512VL-NEXT: vmovdqu8 %xmm3 , %xmm1 {%k1}
934+ ; AVX512VL-NEXT: vpshufb %xmm1, %xmm0, %xmm0 {%k2} {z }
932935; AVX512VL-NEXT: retq
933936 %cmp = icmp ugt <16 x i8 > %indices , <i8 15 , i8 15 , i8 15 , i8 15 , i8 15 , i8 15 , i8 15 , i8 15 , i8 15 , i8 15 , i8 15 , i8 15 , i8 15 , i8 15 , i8 15 , i8 15 >
934937 %or = select <16 x i1 > %cmp , <16 x i8 > <i8 -1 , i8 -1 , i8 -1 , i8 -1 , i8 -1 , i8 -1 , i8 -1 , i8 -1 , i8 -1 , i8 -1 , i8 -1 , i8 -1 , i8 -1 , i8 -1 , i8 -1 , i8 -1 >, <16 x i8 > %indices
@@ -1139,25 +1142,25 @@ define <2 x double> @var_shuffle_zero_v2f64(<2 x double> %v, <2 x i64> %indices)
11391142; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
11401143; AVX512-NEXT: vpmovsxbq {{.*#+}} xmm2 = [3,3]
11411144; AVX512-NEXT: vpcmpnleuq %zmm2, %zmm1, %k1
1142- ; AVX512-NEXT: vpcmpeqd %xmm2 , %xmm2 , %xmm2
1143- ; AVX512-NEXT: vmovdqa64 %zmm2 , %zmm1 {%k1}
1144- ; AVX512-NEXT: vpaddq %xmm1 , %xmm1 , %xmm1
1145- ; AVX512-NEXT: vpermilpd %xmm1 , %xmm0, %xmm0
1146- ; AVX512-NEXT: vxorpd %xmm1 , %xmm1 , %xmm1
1147- ; AVX512-NEXT: vmovapd %zmm1 , %zmm0 {%k1}
1145+ ; AVX512-NEXT: vpcmpeqd %xmm3 , %xmm3 , %xmm3
1146+ ; AVX512-NEXT: vpblendmq %zmm3 , %zmm1, %zmm3 {%k1}
1147+ ; AVX512-NEXT: vpaddq %xmm3 , %xmm3 , %xmm3
1148+ ; AVX512-NEXT: vpermilpd %xmm3 , %xmm0, %xmm0
1149+ ; AVX512-NEXT: vpcmpleuq %zmm2 , %zmm1 , %k1
1150+ ; AVX512-NEXT: vmovapd %zmm0 , %zmm0 {%k1} {z }
11481151; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
11491152; AVX512-NEXT: vzeroupper
11501153; AVX512-NEXT: retq
11511154;
11521155; AVX512VL-LABEL: var_shuffle_zero_v2f64:
11531156; AVX512VL: # %bb.0:
1154- ; AVX512VL-NEXT: vpcmpnleuq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %k1
1155- ; AVX512VL-NEXT: vpcmpeqd %xmm2, %xmm2 , %xmm2
1156- ; AVX512VL-NEXT: vmovdqa64 %xmm2 , %xmm1 {%k1}
1157- ; AVX512VL-NEXT: vpaddq %xmm1 , %xmm1, %xmm1
1158- ; AVX512VL-NEXT: vpermilpd %xmm1 , %xmm0 , %xmm0
1159- ; AVX512VL-NEXT: vxorpd %xmm1 , %xmm1, %xmm1
1160- ; AVX512VL-NEXT: vmovapd %xmm1 , %xmm0 {%k1}
1157+ ; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm2 = [3,3]
1158+ ; AVX512VL-NEXT: vpcmpnleuq %xmm2, %xmm1 , %k1
1159+ ; AVX512VL-NEXT: vpcmpeqd %xmm3 , %xmm3, %xmm3
1160+ ; AVX512VL-NEXT: vpblendmq %xmm3 , %xmm1, %xmm3 {%k1}
1161+ ; AVX512VL-NEXT: vpaddq %xmm3 , %xmm3 , %xmm3
1162+ ; AVX512VL-NEXT: vpcmpleuq %xmm2 , %xmm1, %k1
1163+ ; AVX512VL-NEXT: vpermilpd %xmm3 , %xmm0, %xmm0 {%k1} {z }
11611164; AVX512VL-NEXT: retq
11621165 %cmp = icmp ugt <2 x i64 > %indices , <i64 3 , i64 3 >
11631166 %or = select <2 x i1 > %cmp , <2 x i64 > <i64 -1 , i64 -1 >, <2 x i64 > %indices
@@ -1324,24 +1327,25 @@ define <4 x float> @var_shuffle_zero_v4f32(<4 x float> %v, <4 x i32> %indices) n
13241327; AVX512-LABEL: var_shuffle_zero_v4f32:
13251328; AVX512: # %bb.0:
13261329; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
1327- ; AVX512-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %k1
1328- ; AVX512-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
1329- ; AVX512-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1}
1330- ; AVX512-NEXT: vpermilps %xmm1, %xmm0, %xmm0
1331- ; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
1332- ; AVX512-NEXT: vmovaps %zmm1, %zmm0 {%k1}
1330+ ; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [3,3,3,3]
1331+ ; AVX512-NEXT: vpcmpnleud %zmm2, %zmm1, %k1
1332+ ; AVX512-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
1333+ ; AVX512-NEXT: vpblendmd %zmm3, %zmm1, %zmm3 {%k1}
1334+ ; AVX512-NEXT: vpermilps %xmm3, %xmm0, %xmm0
1335+ ; AVX512-NEXT: vpcmpleud %zmm2, %zmm1, %k1
1336+ ; AVX512-NEXT: vmovaps %zmm0, %zmm0 {%k1} {z}
13331337; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
13341338; AVX512-NEXT: vzeroupper
13351339; AVX512-NEXT: retq
13361340;
13371341; AVX512VL-LABEL: var_shuffle_zero_v4f32:
13381342; AVX512VL: # %bb.0:
1339- ; AVX512VL-NEXT: vpcmpnleud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %k1
1340- ; AVX512VL-NEXT: vpcmpeqd %xmm2, %xmm2 , %xmm2
1341- ; AVX512VL-NEXT: vmovdqa32 %xmm2 , %xmm1 {%k1}
1342- ; AVX512VL-NEXT: vpermilps %xmm1 , %xmm0 , %xmm0
1343- ; AVX512VL-NEXT: vxorps %xmm1 , %xmm1, %xmm1
1344- ; AVX512VL-NEXT: vmovaps %xmm1, %xmm0 {%k1 }
1343+ ; AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [3,3,3,3]
1344+ ; AVX512VL-NEXT: vpcmpnleud %xmm2, %xmm1 , %k1
1345+ ; AVX512VL-NEXT: vpcmpeqd %xmm3 , %xmm3, %xmm3
1346+ ; AVX512VL-NEXT: vpcmpleud %xmm2 , %xmm1 , %k2
1347+ ; AVX512VL-NEXT: vmovdqa32 %xmm3 , %xmm1 {%k1}
1348+ ; AVX512VL-NEXT: vpermilps %xmm1, %xmm0, %xmm0 {%k2} {z }
13451349; AVX512VL-NEXT: retq
13461350 %cmp = icmp ugt <4 x i32 > %indices , <i32 3 , i32 3 , i32 3 , i32 3 >
13471351 %or = select <4 x i1 > %cmp , <4 x i32 > <i32 -1 , i32 -1 , i32 -1 , i32 -1 >, <4 x i32 > %indices
0 commit comments