@@ -14,9 +14,9 @@ define <8 x float> @concat_rsqrt_v8f32_v4f32(<4 x float> %a0, <4 x float> %a1) {
1414;
1515; AVX-LABEL: concat_rsqrt_v8f32_v4f32:
1616; AVX: # %bb.0:
17- ; AVX-NEXT: vrsqrtps %xmm0, %xmm0
18- ; AVX-NEXT: vrsqrtps %xmm1, %xmm1
17+ ; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
1918; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
19+ ; AVX-NEXT: vrsqrtps %ymm0, %ymm0
2020; AVX-NEXT: retq
2121 %v0 = call <4 x float > @llvm.x86.sse.rsqrt.ps (<4 x float > %a0 )
2222 %v1 = call <4 x float > @llvm.x86.sse.rsqrt.ps (<4 x float > %a1 )
@@ -36,23 +36,23 @@ define <16 x float> @concat_rsqrt_v16f32_v4f32(<4 x float> %a0, <4 x float> %a1,
3636;
3737; AVX1OR2-LABEL: concat_rsqrt_v16f32_v4f32:
3838; AVX1OR2: # %bb.0:
39- ; AVX1OR2-NEXT: vrsqrtps %xmm0, %xmm0
40- ; AVX1OR2-NEXT: vrsqrtps %xmm1, %xmm1
41- ; AVX1OR2-NEXT: vrsqrtps %xmm2, %xmm2
42- ; AVX1OR2-NEXT: vrsqrtps %xmm3, %xmm3
39+ ; AVX1OR2-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
40+ ; AVX1OR2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
4341; AVX1OR2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
42+ ; AVX1OR2-NEXT: vrsqrtps %ymm0, %ymm0
4443; AVX1OR2-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
44+ ; AVX1OR2-NEXT: vrsqrtps %ymm1, %ymm1
4545; AVX1OR2-NEXT: retq
4646;
4747; AVX512-LABEL: concat_rsqrt_v16f32_v4f32:
4848; AVX512: # %bb.0:
49- ; AVX512-NEXT: vrsqrtps %xmm0, %xmm0
50- ; AVX512-NEXT: vrsqrtps %xmm1, %xmm1
51- ; AVX512-NEXT: vrsqrtps %xmm2, %xmm2
52- ; AVX512-NEXT: vrsqrtps %xmm3, %xmm3
53- ; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
49+ ; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
50+ ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
5451; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
55- ; AVX512-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
52+ ; AVX512-NEXT: vrsqrtps %ymm0, %ymm0
53+ ; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
54+ ; AVX512-NEXT: vrsqrtps %ymm1, %ymm1
55+ ; AVX512-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
5656; AVX512-NEXT: retq
5757 %v0 = call <4 x float > @llvm.x86.sse.rsqrt.ps (<4 x float > %a0 )
5858 %v1 = call <4 x float > @llvm.x86.sse.rsqrt.ps (<4 x float > %a1 )
0 commit comments