@@ -2563,15 +2563,15 @@ define <64 x float> @test_mul8x8_f32(<64 x float> %a0, <64 x float> %a1) nounwin
25632563; AVX512F-NEXT: vbroadcastss %xmm4, %ymm12
25642564; AVX512F-NEXT: vmulps %ymm0, %ymm12, %ymm12
25652565; AVX512F-NEXT: vmovshdup {{.*#+}} xmm13 = xmm4[1,1,3,3]
2566- ; AVX512F-NEXT: vbroadcastsd %xmm13, %ymm13
2566+ ; AVX512F-NEXT: vbroadcastss %xmm13, %ymm13
25672567; AVX512F-NEXT: vmulps %ymm13, %ymm11, %ymm13
25682568; AVX512F-NEXT: vaddps %ymm13, %ymm12, %ymm12
2569- ; AVX512F-NEXT: vshufps {{.*#+}} xmm13 = xmm4[2,2,2,2 ]
2570- ; AVX512F-NEXT: vbroadcastsd %xmm13, %ymm13
2569+ ; AVX512F-NEXT: vshufpd {{.*#+}} xmm13 = xmm4[1,0 ]
2570+ ; AVX512F-NEXT: vbroadcastss %xmm13, %ymm13
25712571; AVX512F-NEXT: vmulps %ymm1, %ymm13, %ymm13
25722572; AVX512F-NEXT: vaddps %ymm13, %ymm12, %ymm12
25732573; AVX512F-NEXT: vshufps {{.*#+}} xmm13 = xmm4[3,3,3,3]
2574- ; AVX512F-NEXT: vbroadcastsd %xmm13, %ymm13
2574+ ; AVX512F-NEXT: vbroadcastss %xmm13, %ymm13
25752575; AVX512F-NEXT: vmulps %ymm13, %ymm10, %ymm13
25762576; AVX512F-NEXT: vaddps %ymm13, %ymm12, %ymm12
25772577; AVX512F-NEXT: vextractf128 $1, %ymm4, %xmm13
@@ -2627,15 +2627,15 @@ define <64 x float> @test_mul8x8_f32(<64 x float> %a0, <64 x float> %a1) nounwin
26272627; AVX512F-NEXT: vbroadcastss %xmm5, %ymm13
26282628; AVX512F-NEXT: vmulps %ymm0, %ymm13, %ymm13
26292629; AVX512F-NEXT: vmovshdup {{.*#+}} xmm14 = xmm5[1,1,3,3]
2630- ; AVX512F-NEXT: vbroadcastsd %xmm14, %ymm14
2630+ ; AVX512F-NEXT: vbroadcastss %xmm14, %ymm14
26312631; AVX512F-NEXT: vmulps %ymm14, %ymm11, %ymm14
26322632; AVX512F-NEXT: vaddps %ymm14, %ymm13, %ymm13
2633- ; AVX512F-NEXT: vshufps {{.*#+}} xmm14 = xmm5[2,2,2,2 ]
2634- ; AVX512F-NEXT: vbroadcastsd %xmm14, %ymm14
2633+ ; AVX512F-NEXT: vshufpd {{.*#+}} xmm14 = xmm5[1,0 ]
2634+ ; AVX512F-NEXT: vbroadcastss %xmm14, %ymm14
26352635; AVX512F-NEXT: vmulps %ymm1, %ymm14, %ymm14
26362636; AVX512F-NEXT: vaddps %ymm14, %ymm13, %ymm13
26372637; AVX512F-NEXT: vshufps {{.*#+}} xmm14 = xmm5[3,3,3,3]
2638- ; AVX512F-NEXT: vbroadcastsd %xmm14, %ymm14
2638+ ; AVX512F-NEXT: vbroadcastss %xmm14, %ymm14
26392639; AVX512F-NEXT: vmulps %ymm14, %ymm10, %ymm14
26402640; AVX512F-NEXT: vaddps %ymm14, %ymm13, %ymm13
26412641; AVX512F-NEXT: vextractf128 $1, %ymm5, %xmm14
@@ -2689,15 +2689,15 @@ define <64 x float> @test_mul8x8_f32(<64 x float> %a0, <64 x float> %a1) nounwin
26892689; AVX512F-NEXT: vbroadcastss %xmm6, %ymm12
26902690; AVX512F-NEXT: vmulps %ymm0, %ymm12, %ymm12
26912691; AVX512F-NEXT: vmovshdup {{.*#+}} xmm14 = xmm6[1,1,3,3]
2692- ; AVX512F-NEXT: vbroadcastsd %xmm14, %ymm14
2692+ ; AVX512F-NEXT: vbroadcastss %xmm14, %ymm14
26932693; AVX512F-NEXT: vmulps %ymm14, %ymm11, %ymm14
26942694; AVX512F-NEXT: vaddps %ymm14, %ymm12, %ymm12
2695- ; AVX512F-NEXT: vshufps {{.*#+}} xmm14 = xmm6[2,2,2,2 ]
2696- ; AVX512F-NEXT: vbroadcastsd %xmm14, %ymm14
2695+ ; AVX512F-NEXT: vshufpd {{.*#+}} xmm14 = xmm6[1,0 ]
2696+ ; AVX512F-NEXT: vbroadcastss %xmm14, %ymm14
26972697; AVX512F-NEXT: vmulps %ymm1, %ymm14, %ymm14
26982698; AVX512F-NEXT: vaddps %ymm14, %ymm12, %ymm12
26992699; AVX512F-NEXT: vshufps {{.*#+}} xmm14 = xmm6[3,3,3,3]
2700- ; AVX512F-NEXT: vbroadcastsd %xmm14, %ymm14
2700+ ; AVX512F-NEXT: vbroadcastss %xmm14, %ymm14
27012701; AVX512F-NEXT: vmulps %ymm14, %ymm10, %ymm14
27022702; AVX512F-NEXT: vaddps %ymm14, %ymm12, %ymm12
27032703; AVX512F-NEXT: vextractf128 $1, %ymm6, %xmm14
@@ -2753,15 +2753,15 @@ define <64 x float> @test_mul8x8_f32(<64 x float> %a0, <64 x float> %a1) nounwin
27532753; AVX512F-NEXT: vbroadcastss %xmm7, %ymm12
27542754; AVX512F-NEXT: vmulps %ymm0, %ymm12, %ymm12
27552755; AVX512F-NEXT: vmovshdup {{.*#+}} xmm15 = xmm7[1,1,3,3]
2756- ; AVX512F-NEXT: vbroadcastsd %xmm15, %ymm15
2756+ ; AVX512F-NEXT: vbroadcastss %xmm15, %ymm15
27572757; AVX512F-NEXT: vmulps %ymm15, %ymm11, %ymm15
27582758; AVX512F-NEXT: vaddps %ymm15, %ymm12, %ymm12
2759- ; AVX512F-NEXT: vshufps {{.*#+}} xmm15 = xmm7[2,2,2,2 ]
2760- ; AVX512F-NEXT: vbroadcastsd %xmm15, %ymm15
2759+ ; AVX512F-NEXT: vshufpd {{.*#+}} xmm15 = xmm7[1,0 ]
2760+ ; AVX512F-NEXT: vbroadcastss %xmm15, %ymm15
27612761; AVX512F-NEXT: vmulps %ymm1, %ymm15, %ymm15
27622762; AVX512F-NEXT: vaddps %ymm15, %ymm12, %ymm12
27632763; AVX512F-NEXT: vshufps {{.*#+}} xmm15 = xmm7[3,3,3,3]
2764- ; AVX512F-NEXT: vbroadcastsd %xmm15, %ymm15
2764+ ; AVX512F-NEXT: vbroadcastss %xmm15, %ymm15
27652765; AVX512F-NEXT: vmulps %ymm15, %ymm10, %ymm15
27662766; AVX512F-NEXT: vaddps %ymm15, %ymm12, %ymm12
27672767; AVX512F-NEXT: vextractf128 $1, %ymm7, %xmm15
@@ -2828,15 +2828,15 @@ define <64 x float> @test_mul8x8_f32(<64 x float> %a0, <64 x float> %a1) nounwin
28282828; AVX512VL-NEXT: vbroadcastss %xmm4, %ymm12
28292829; AVX512VL-NEXT: vmulps %ymm0, %ymm12, %ymm12
28302830; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm13 = xmm4[1,1,3,3]
2831- ; AVX512VL-NEXT: vbroadcastsd %xmm13, %ymm13
2831+ ; AVX512VL-NEXT: vbroadcastss %xmm13, %ymm13
28322832; AVX512VL-NEXT: vmulps %ymm13, %ymm11, %ymm13
28332833; AVX512VL-NEXT: vaddps %ymm13, %ymm12, %ymm12
2834- ; AVX512VL-NEXT: vshufps {{.*#+}} xmm13 = xmm4[2,2,2,2 ]
2835- ; AVX512VL-NEXT: vbroadcastsd %xmm13, %ymm13
2834+ ; AVX512VL-NEXT: vshufpd {{.*#+}} xmm13 = xmm4[1,0 ]
2835+ ; AVX512VL-NEXT: vbroadcastss %xmm13, %ymm13
28362836; AVX512VL-NEXT: vmulps %ymm1, %ymm13, %ymm13
28372837; AVX512VL-NEXT: vaddps %ymm13, %ymm12, %ymm12
28382838; AVX512VL-NEXT: vshufps {{.*#+}} xmm13 = xmm4[3,3,3,3]
2839- ; AVX512VL-NEXT: vbroadcastsd %xmm13, %ymm13
2839+ ; AVX512VL-NEXT: vbroadcastss %xmm13, %ymm13
28402840; AVX512VL-NEXT: vmulps %ymm13, %ymm10, %ymm13
28412841; AVX512VL-NEXT: vaddps %ymm13, %ymm12, %ymm12
28422842; AVX512VL-NEXT: vextractf128 $1, %ymm4, %xmm13
@@ -2890,15 +2890,15 @@ define <64 x float> @test_mul8x8_f32(<64 x float> %a0, <64 x float> %a1) nounwin
28902890; AVX512VL-NEXT: vbroadcastss %xmm5, %ymm13
28912891; AVX512VL-NEXT: vmulps %ymm0, %ymm13, %ymm13
28922892; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm14 = xmm5[1,1,3,3]
2893- ; AVX512VL-NEXT: vbroadcastsd %xmm14, %ymm14
2893+ ; AVX512VL-NEXT: vbroadcastss %xmm14, %ymm14
28942894; AVX512VL-NEXT: vmulps %ymm14, %ymm11, %ymm14
28952895; AVX512VL-NEXT: vaddps %ymm14, %ymm13, %ymm13
2896- ; AVX512VL-NEXT: vshufps {{.*#+}} xmm14 = xmm5[2,2,2,2 ]
2897- ; AVX512VL-NEXT: vbroadcastsd %xmm14, %ymm14
2896+ ; AVX512VL-NEXT: vshufpd {{.*#+}} xmm14 = xmm5[1,0 ]
2897+ ; AVX512VL-NEXT: vbroadcastss %xmm14, %ymm14
28982898; AVX512VL-NEXT: vmulps %ymm1, %ymm14, %ymm14
28992899; AVX512VL-NEXT: vaddps %ymm14, %ymm13, %ymm13
29002900; AVX512VL-NEXT: vshufps {{.*#+}} xmm14 = xmm5[3,3,3,3]
2901- ; AVX512VL-NEXT: vbroadcastsd %xmm14, %ymm14
2901+ ; AVX512VL-NEXT: vbroadcastss %xmm14, %ymm14
29022902; AVX512VL-NEXT: vmulps %ymm14, %ymm10, %ymm14
29032903; AVX512VL-NEXT: vaddps %ymm14, %ymm13, %ymm13
29042904; AVX512VL-NEXT: vextractf128 $1, %ymm5, %xmm14
@@ -2952,15 +2952,15 @@ define <64 x float> @test_mul8x8_f32(<64 x float> %a0, <64 x float> %a1) nounwin
29522952; AVX512VL-NEXT: vbroadcastss %xmm6, %ymm14
29532953; AVX512VL-NEXT: vmulps %ymm0, %ymm14, %ymm14
29542954; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm15 = xmm6[1,1,3,3]
2955- ; AVX512VL-NEXT: vbroadcastsd %xmm15, %ymm15
2955+ ; AVX512VL-NEXT: vbroadcastss %xmm15, %ymm15
29562956; AVX512VL-NEXT: vmulps %ymm15, %ymm11, %ymm15
29572957; AVX512VL-NEXT: vaddps %ymm15, %ymm14, %ymm14
2958- ; AVX512VL-NEXT: vshufps {{.*#+}} xmm15 = xmm6[2,2,2,2 ]
2959- ; AVX512VL-NEXT: vbroadcastsd %xmm15, %ymm15
2958+ ; AVX512VL-NEXT: vshufpd {{.*#+}} xmm15 = xmm6[1,0 ]
2959+ ; AVX512VL-NEXT: vbroadcastss %xmm15, %ymm15
29602960; AVX512VL-NEXT: vmulps %ymm1, %ymm15, %ymm15
29612961; AVX512VL-NEXT: vaddps %ymm15, %ymm14, %ymm14
29622962; AVX512VL-NEXT: vshufps {{.*#+}} xmm15 = xmm6[3,3,3,3]
2963- ; AVX512VL-NEXT: vbroadcastsd %xmm15, %ymm15
2963+ ; AVX512VL-NEXT: vbroadcastss %xmm15, %ymm15
29642964; AVX512VL-NEXT: vmulps %ymm15, %ymm10, %ymm15
29652965; AVX512VL-NEXT: vaddps %ymm15, %ymm14, %ymm14
29662966; AVX512VL-NEXT: vextractf128 $1, %ymm6, %xmm15
@@ -3014,15 +3014,15 @@ define <64 x float> @test_mul8x8_f32(<64 x float> %a0, <64 x float> %a1) nounwin
30143014; AVX512VL-NEXT: vbroadcastss %xmm7, %ymm15
30153015; AVX512VL-NEXT: vmulps %ymm0, %ymm15, %ymm15
30163016; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm16 = xmm7[1,1,3,3]
3017- ; AVX512VL-NEXT: vbroadcastsd %xmm16, %ymm16
3017+ ; AVX512VL-NEXT: vbroadcastss %xmm16, %ymm16
30183018; AVX512VL-NEXT: vmulps %ymm16, %ymm11, %ymm16
30193019; AVX512VL-NEXT: vaddps %ymm16, %ymm15, %ymm15
3020- ; AVX512VL-NEXT: vshufps {{.*#+}} xmm16 = xmm7[2,2,2,2 ]
3021- ; AVX512VL-NEXT: vbroadcastsd %xmm16, %ymm16
3020+ ; AVX512VL-NEXT: vshufpd {{.*#+}} xmm16 = xmm7[1,0 ]
3021+ ; AVX512VL-NEXT: vbroadcastss %xmm16, %ymm16
30223022; AVX512VL-NEXT: vmulps %ymm16, %ymm1, %ymm16
30233023; AVX512VL-NEXT: vaddps %ymm16, %ymm15, %ymm15
30243024; AVX512VL-NEXT: vshufps {{.*#+}} xmm16 = xmm7[3,3,3,3]
3025- ; AVX512VL-NEXT: vbroadcastsd %xmm16, %ymm16
3025+ ; AVX512VL-NEXT: vbroadcastss %xmm16, %ymm16
30263026; AVX512VL-NEXT: vmulps %ymm16, %ymm10, %ymm16
30273027; AVX512VL-NEXT: vaddps %ymm16, %ymm15, %ymm15
30283028; AVX512VL-NEXT: vextractf32x4 $1, %ymm7, %xmm16
0 commit comments