@@ -1808,6 +1808,49 @@ define <4 x double> @broadcast_v4f64_0000_from_v2i64(<2 x i64> %a0) {
18081808 ret <4 x double > %3
18091809}
18101810
1811+ ; PR114959
1812+ define <4 x double > @concat_v4f64_0213_broadcasts (ptr %src ) {
1813+ ; AVX1OR2-LABEL: concat_v4f64_0213_broadcasts:
1814+ ; AVX1OR2: # %bb.0:
1815+ ; AVX1OR2-NEXT: vmovups (%rdi), %xmm0
1816+ ; AVX1OR2-NEXT: vmovups 32(%rdi), %xmm1
1817+ ; AVX1OR2-NEXT: vmovlhps {{.*#+}} xmm2 = xmm0[0],xmm1[0]
1818+ ; AVX1OR2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1819+ ; AVX1OR2-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
1820+ ; AVX1OR2-NEXT: retq
1821+ ;
1822+ ; AVX512VL-SLOW-LABEL: concat_v4f64_0213_broadcasts:
1823+ ; AVX512VL-SLOW: # %bb.0:
1824+ ; AVX512VL-SLOW-NEXT: vmovups (%rdi), %xmm0
1825+ ; AVX512VL-SLOW-NEXT: vmovups 32(%rdi), %xmm1
1826+ ; AVX512VL-SLOW-NEXT: vmovlhps {{.*#+}} xmm2 = xmm0[0],xmm1[0]
1827+ ; AVX512VL-SLOW-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1828+ ; AVX512VL-SLOW-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
1829+ ; AVX512VL-SLOW-NEXT: retq
1830+ ;
1831+ ; AVX512VL-FAST-ALL-LABEL: concat_v4f64_0213_broadcasts:
1832+ ; AVX512VL-FAST-ALL: # %bb.0:
1833+ ; AVX512VL-FAST-ALL-NEXT: vmovupd (%rdi), %xmm1
1834+ ; AVX512VL-FAST-ALL-NEXT: vmovupd 32(%rdi), %xmm2
1835+ ; AVX512VL-FAST-ALL-NEXT: vmovapd {{.*#+}} ymm0 = [0,4,1,5]
1836+ ; AVX512VL-FAST-ALL-NEXT: vpermi2pd %ymm2, %ymm1, %ymm0
1837+ ; AVX512VL-FAST-ALL-NEXT: retq
1838+ ;
1839+ ; AVX512VL-FAST-PERLANE-LABEL: concat_v4f64_0213_broadcasts:
1840+ ; AVX512VL-FAST-PERLANE: # %bb.0:
1841+ ; AVX512VL-FAST-PERLANE-NEXT: vmovups (%rdi), %xmm0
1842+ ; AVX512VL-FAST-PERLANE-NEXT: vmovups 32(%rdi), %xmm1
1843+ ; AVX512VL-FAST-PERLANE-NEXT: vmovlhps {{.*#+}} xmm2 = xmm0[0],xmm1[0]
1844+ ; AVX512VL-FAST-PERLANE-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1845+ ; AVX512VL-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
1846+ ; AVX512VL-FAST-PERLANE-NEXT: retq
1847+ %src.hi = getelementptr inbounds i8 , ptr %src , i64 32
1848+ %lo = load <2 x double >, ptr %src , align 1
1849+ %hi = load <2 x double >, ptr %src.hi , align 1
1850+ %shuffle = shufflevector <2 x double > %lo , <2 x double > %hi , <4 x i32 > <i32 0 , i32 2 , i32 1 , i32 3 >
1851+ ret <4 x double > %shuffle
1852+ }
1853+
18111854define <4 x double > @bitcast_v4f64_0426 (<4 x double > %a , <4 x double > %b ) {
18121855; ALL-LABEL: bitcast_v4f64_0426:
18131856; ALL: # %bb.0:
0 commit comments