55; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX,AVX1OR2
66; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX,AVX512
77
8+ define <4 x double > @concat_sqrt_v4f64_v2f64 (<2 x double > %a0 , <2 x double > %a1 ) {
9+ ; SSE-LABEL: concat_sqrt_v4f64_v2f64:
10+ ; SSE: # %bb.0:
11+ ; SSE-NEXT: sqrtpd %xmm0, %xmm0
12+ ; SSE-NEXT: sqrtpd %xmm1, %xmm1
13+ ; SSE-NEXT: retq
14+ ;
15+ ; AVX-LABEL: concat_sqrt_v4f64_v2f64:
16+ ; AVX: # %bb.0:
17+ ; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
18+ ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
19+ ; AVX-NEXT: vsqrtpd %ymm0, %ymm0
20+ ; AVX-NEXT: retq
21+ %v0 = call <2 x double > @llvm.sqrt.v2f64 (<2 x double > %a0 )
22+ %v1 = call <2 x double > @llvm.sqrt.v2f64 (<2 x double > %a1 )
23+ %res = shufflevector <2 x double > %v0 , <2 x double > %v1 , <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
24+ ret <4 x double > %res
25+ }
26+
827define <8 x float > @concat_sqrt_v8f32_v4f32 (<4 x float > %a0 , <4 x float > %a1 ) {
928; SSE-LABEL: concat_sqrt_v8f32_v4f32:
1029; SSE: # %bb.0:
@@ -24,6 +43,44 @@ define <8 x float> @concat_sqrt_v8f32_v4f32(<4 x float> %a0, <4 x float> %a1) {
2443 ret <8 x float > %res
2544}
2645
46+ define <8 x double > @concat_sqrt_v8f64_v2f64 (<2 x double > %a0 , <2 x double > %a1 , <2 x double > %a2 , <2 x double > %a3 ) {
47+ ; SSE-LABEL: concat_sqrt_v8f64_v2f64:
48+ ; SSE: # %bb.0:
49+ ; SSE-NEXT: sqrtpd %xmm0, %xmm0
50+ ; SSE-NEXT: sqrtpd %xmm1, %xmm1
51+ ; SSE-NEXT: sqrtpd %xmm2, %xmm2
52+ ; SSE-NEXT: sqrtpd %xmm3, %xmm3
53+ ; SSE-NEXT: retq
54+ ;
55+ ; AVX1OR2-LABEL: concat_sqrt_v8f64_v2f64:
56+ ; AVX1OR2: # %bb.0:
57+ ; AVX1OR2-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
58+ ; AVX1OR2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
59+ ; AVX1OR2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
60+ ; AVX1OR2-NEXT: vsqrtpd %ymm0, %ymm0
61+ ; AVX1OR2-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
62+ ; AVX1OR2-NEXT: vsqrtpd %ymm1, %ymm1
63+ ; AVX1OR2-NEXT: retq
64+ ;
65+ ; AVX512-LABEL: concat_sqrt_v8f64_v2f64:
66+ ; AVX512: # %bb.0:
67+ ; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
68+ ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
69+ ; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
70+ ; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
71+ ; AVX512-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
72+ ; AVX512-NEXT: vsqrtpd %zmm0, %zmm0
73+ ; AVX512-NEXT: retq
74+ %v0 = call <2 x double > @llvm.sqrt.v2f64 (<2 x double > %a0 )
75+ %v1 = call <2 x double > @llvm.sqrt.v2f64 (<2 x double > %a1 )
76+ %v2 = call <2 x double > @llvm.sqrt.v2f64 (<2 x double > %a2 )
77+ %v3 = call <2 x double > @llvm.sqrt.v2f64 (<2 x double > %a3 )
78+ %r01 = shufflevector <2 x double > %v0 , <2 x double > %v1 , <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
79+ %r23 = shufflevector <2 x double > %v2 , <2 x double > %v3 , <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
80+ %res = shufflevector <4 x double > %r01 , <4 x double > %r23 , <8 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 >
81+ ret <8 x double > %res
82+ }
83+
2784define <16 x float > @concat_sqrt_v16f32_v4f32 (<4 x float > %a0 , <4 x float > %a1 , <4 x float > %a2 , <4 x float > %a3 ) {
2885; SSE-LABEL: concat_sqrt_v16f32_v4f32:
2986; SSE: # %bb.0:
@@ -62,6 +119,33 @@ define <16 x float> @concat_sqrt_v16f32_v4f32(<4 x float> %a0, <4 x float> %a1,
62119 ret <16 x float > %res
63120}
64121
122+ define <8 x double > @concat_sqrt_v8f64_v4f64 (<4 x double > %a0 , <4 x double > %a1 ) {
123+ ; SSE-LABEL: concat_sqrt_v8f64_v4f64:
124+ ; SSE: # %bb.0:
125+ ; SSE-NEXT: sqrtpd %xmm0, %xmm0
126+ ; SSE-NEXT: sqrtpd %xmm1, %xmm1
127+ ; SSE-NEXT: sqrtpd %xmm2, %xmm2
128+ ; SSE-NEXT: sqrtpd %xmm3, %xmm3
129+ ; SSE-NEXT: retq
130+ ;
131+ ; AVX1OR2-LABEL: concat_sqrt_v8f64_v4f64:
132+ ; AVX1OR2: # %bb.0:
133+ ; AVX1OR2-NEXT: vsqrtpd %ymm0, %ymm0
134+ ; AVX1OR2-NEXT: vsqrtpd %ymm1, %ymm1
135+ ; AVX1OR2-NEXT: retq
136+ ;
137+ ; AVX512-LABEL: concat_sqrt_v8f64_v4f64:
138+ ; AVX512: # %bb.0:
139+ ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
140+ ; AVX512-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
141+ ; AVX512-NEXT: vsqrtpd %zmm0, %zmm0
142+ ; AVX512-NEXT: retq
143+ %v0 = call <4 x double > @llvm.sqrt.v4f64 (<4 x double > %a0 )
144+ %v1 = call <4 x double > @llvm.sqrt.v4f64 (<4 x double > %a1 )
145+ %res = shufflevector <4 x double > %v0 , <4 x double > %v1 , <8 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 >
146+ ret <8 x double > %res
147+ }
148+
65149define <16 x float > @concat_sqrt_v16f32_v8f32 (<8 x float > %a0 , <8 x float > %a1 ) {
66150; SSE-LABEL: concat_sqrt_v16f32_v8f32:
67151; SSE: # %bb.0:
0 commit comments