@@ -977,3 +977,65 @@ define <16 x i32> @blend_of_permutes_v16i32(<8 x i64> %a0, <8x i64> %a1) {
977977 %r = shufflevector <16 x i32 > %x0 , <16 x i32 > %x1 , <16 x i32 > <i32 0 , i32 17 , i32 2 , i32 19 , i32 20 , i32 5 , i32 6 , i32 23 , i32 8 , i32 25 , i32 10 , i32 27 , i32 28 , i32 13 , i32 14 , i32 31 >
978978 ret <16 x i32 > %r
979979}
980+
981+ define <8 x double > @concat_vpermilvar_v8f64_v2f64 (<2 x double > %a0 , <2 x double > %a1 , <2 x double > %a2 , <2 x double > %a3 , <8 x i64 > %m ) nounwind {
982+ ; X86-LABEL: concat_vpermilvar_v8f64_v2f64:
983+ ; X86: # %bb.0:
984+ ; X86-NEXT: pushl %ebp
985+ ; X86-NEXT: movl %esp, %ebp
986+ ; X86-NEXT: andl $-64, %esp
987+ ; X86-NEXT: subl $64, %esp
988+ ; X86-NEXT: vmovapd 8(%ebp), %xmm3
989+ ; X86-NEXT: vpermilpd 72(%ebp), %xmm0, %xmm0
990+ ; X86-NEXT: vpermilpd 88(%ebp), %xmm1, %xmm1
991+ ; X86-NEXT: vpermilpd 104(%ebp), %xmm2, %xmm2
992+ ; X86-NEXT: vpermilpd 120(%ebp), %xmm3, %xmm3
993+ ; X86-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
994+ ; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
995+ ; X86-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
996+ ; X86-NEXT: movl %ebp, %esp
997+ ; X86-NEXT: popl %ebp
998+ ; X86-NEXT: retl
999+ ;
1000+ ; X64-LABEL: concat_vpermilvar_v8f64_v2f64:
1001+ ; X64: # %bb.0:
1002+ ; X64-NEXT: vextractf128 $1, %ymm4, %xmm5
1003+ ; X64-NEXT: vextractf32x4 $2, %zmm4, %xmm6
1004+ ; X64-NEXT: vextractf32x4 $3, %zmm4, %xmm7
1005+ ; X64-NEXT: vpermilpd %xmm4, %xmm0, %xmm0
1006+ ; X64-NEXT: vpermilpd %xmm5, %xmm1, %xmm1
1007+ ; X64-NEXT: vpermilpd %xmm6, %xmm2, %xmm2
1008+ ; X64-NEXT: vpermilpd %xmm7, %xmm3, %xmm3
1009+ ; X64-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
1010+ ; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1011+ ; X64-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
1012+ ; X64-NEXT: retq
1013+ %m0 = shufflevector <8 x i64 > %m , <8 x i64 > poison, <2 x i32 > <i32 0 , i32 1 >
1014+ %m1 = shufflevector <8 x i64 > %m , <8 x i64 > poison, <2 x i32 > <i32 2 , i32 3 >
1015+ %m2 = shufflevector <8 x i64 > %m , <8 x i64 > poison, <2 x i32 > <i32 4 , i32 5 >
1016+ %m3 = shufflevector <8 x i64 > %m , <8 x i64 > poison, <2 x i32 > <i32 6 , i32 7 >
1017+ %v0 = tail call noundef <2 x double > @llvm.x86.avx.vpermilvar.pd (<2 x double > %a0 , <2 x i64 > %m0 )
1018+ %v1 = tail call noundef <2 x double > @llvm.x86.avx.vpermilvar.pd (<2 x double > %a1 , <2 x i64 > %m1 )
1019+ %v2 = tail call noundef <2 x double > @llvm.x86.avx.vpermilvar.pd (<2 x double > %a2 , <2 x i64 > %m2 )
1020+ %v3 = tail call noundef <2 x double > @llvm.x86.avx.vpermilvar.pd (<2 x double > %a3 , <2 x i64 > %m3 )
1021+ %lo = shufflevector <2 x double > %v0 , <2 x double > %v1 , <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
1022+ %hi = shufflevector <2 x double > %v2 , <2 x double > %v3 , <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
1023+ %res = shufflevector <4 x double > %lo , <4 x double > %hi , <8 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 >
1024+ ret <8 x double > %res
1025+ }
1026+
1027+ define <8 x double > @concat_vpermilvar_v8f64_v4f64 (<4 x double > %a0 , <4 x double > %a1 , <8 x i64 > %m ) nounwind {
1028+ ; CHECK-LABEL: concat_vpermilvar_v8f64_v4f64:
1029+ ; CHECK: # %bb.0:
1030+ ; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3
1031+ ; CHECK-NEXT: vpermilpd %ymm2, %ymm0, %ymm0
1032+ ; CHECK-NEXT: vpermilpd %ymm3, %ymm1, %ymm1
1033+ ; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
1034+ ; CHECK-NEXT: ret{{[l|q]}}
1035+ %m0 = shufflevector <8 x i64 > %m , <8 x i64 > poison, <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
1036+ %m1 = shufflevector <8 x i64 > %m , <8 x i64 > poison, <4 x i32 > <i32 4 , i32 5 , i32 6 , i32 7 >
1037+ %v0 = tail call noundef <4 x double > @llvm.x86.avx.vpermilvar.pd.256 (<4 x double > %a0 , <4 x i64 > %m0 )
1038+ %v1 = tail call noundef <4 x double > @llvm.x86.avx.vpermilvar.pd.256 (<4 x double > %a1 , <4 x i64 > %m1 )
1039+ %res = shufflevector <4 x double > %v0 , <4 x double > %v1 , <8 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 >
1040+ ret <8 x double > %res
1041+ }
0 commit comments