@@ -846,6 +846,26 @@ define <32 x i8> @concat_pshufb_unnecessary(<16 x i8> %a0, <16 x i8> %a1, <16 x
846846 ret <32 x i8 > %res
847847}
848848
849+ define <8 x float > @demandedelts_vpermps (<8 x float > %a0 , <8 x float > %a1 ) {
850+ ; AVX2-LABEL: demandedelts_vpermps:
851+ ; AVX2: # %bb.0:
852+ ; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,1,1,0]
853+ ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
854+ ; AVX2-NEXT: ret{{[l|q]}}
855+ ;
856+ ; AVX512-LABEL: demandedelts_vpermps:
857+ ; AVX512: # %bb.0:
858+ ; AVX512-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
859+ ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
860+ ; AVX512-NEXT: vpmovsxbd {{.*#+}} ymm2 = [3,1,1,0,20,21,22,23]
861+ ; AVX512-NEXT: vpermt2ps %zmm1, %zmm2, %zmm0
862+ ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
863+ ; AVX512-NEXT: ret{{[l|q]}}
864+ %lo = call <8 x float > @llvm.x86.avx2.permps (<8 x float > %a0 , <8 x i32 > <i32 3 , i32 1 , i32 1 , i32 0 , i32 0 , i32 0 , i32 7 , i32 7 >)
865+ %hi = shufflevector <8 x float > %lo , <8 x float > %a1 , <8 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 12 , i32 13 , i32 14 , i32 15 >
866+ ret <8 x float > %hi
867+ }
868+
849869define <8 x i32 > @constant_fold_permd () {
850870; AVX2-LABEL: constant_fold_permd:
851871; AVX2: # %bb.0:
0 commit comments