@@ -7376,6 +7376,160 @@ define <16 x i16> @shuffle_v16i16_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_1
73767376 ret <16 x i16 > %shuffle
73777377}
73787378
7379+ define <16 x i16 > @shuffle_v16i16_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08 (<16 x i16 > noundef %x ) {
7380+ ; AVX1-LABEL: shuffle_v16i16_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08:
7381+ ; AVX1: # %bb.0:
7382+ ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
7383+ ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
7384+ ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
7385+ ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7386+ ; AVX1-NEXT: retq
7387+ ;
7388+ ; AVX2-LABEL: shuffle_v16i16_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08:
7389+ ; AVX2: # %bb.0:
7390+ ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
7391+ ; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
7392+ ; AVX2-NEXT: retq
7393+ ;
7394+ ; AVX512VL-SLOW-LABEL: shuffle_v16i16_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08:
7395+ ; AVX512VL-SLOW: # %bb.0:
7396+ ; AVX512VL-SLOW-NEXT: vextracti128 $1, %ymm0, %xmm0
7397+ ; AVX512VL-SLOW-NEXT: vpbroadcastw %xmm0, %ymm0
7398+ ; AVX512VL-SLOW-NEXT: retq
7399+ ;
7400+ ; AVX512VL-FAST-CROSSLANE-LABEL: shuffle_v16i16_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08:
7401+ ; AVX512VL-FAST-CROSSLANE: # %bb.0:
7402+ ; AVX512VL-FAST-CROSSLANE-NEXT: vpbroadcastw {{.*#+}} ymm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
7403+ ; AVX512VL-FAST-CROSSLANE-NEXT: vpermw %ymm0, %ymm1, %ymm0
7404+ ; AVX512VL-FAST-CROSSLANE-NEXT: retq
7405+ ;
7406+ ; AVX512VL-FAST-PERLANE-LABEL: shuffle_v16i16_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08:
7407+ ; AVX512VL-FAST-PERLANE: # %bb.0:
7408+ ; AVX512VL-FAST-PERLANE-NEXT: vextracti128 $1, %ymm0, %xmm0
7409+ ; AVX512VL-FAST-PERLANE-NEXT: vpbroadcastw %xmm0, %ymm0
7410+ ; AVX512VL-FAST-PERLANE-NEXT: retq
7411+ ;
7412+ ; XOPAVX1-LABEL: shuffle_v16i16_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08:
7413+ ; XOPAVX1: # %bb.0:
7414+ ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
7415+ ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
7416+ ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
7417+ ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7418+ ; XOPAVX1-NEXT: retq
7419+ ;
7420+ ; XOPAVX2-LABEL: shuffle_v16i16_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08:
7421+ ; XOPAVX2: # %bb.0:
7422+ ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
7423+ ; XOPAVX2-NEXT: vpbroadcastw %xmm0, %ymm0
7424+ ; XOPAVX2-NEXT: retq
7425+ %r = shufflevector <16 x i16 > %x , <16 x i16 > poison, <16 x i32 > splat(i32 8 )
7426+ ret <16 x i16 > %r
7427+ }
7428+
7429+ define <16 x i16 > @shuffle_v16i16_11_11_11_11_11_11_11_11_11_11_11_11_11_11_11_11 (<16 x i16 > noundef %x ) {
7430+ ; AVX1-LABEL: shuffle_v16i16_11_11_11_11_11_11_11_11_11_11_11_11_11_11_11_11:
7431+ ; AVX1: # %bb.0:
7432+ ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
7433+ ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
7434+ ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
7435+ ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7436+ ; AVX1-NEXT: retq
7437+ ;
7438+ ; AVX2-LABEL: shuffle_v16i16_11_11_11_11_11_11_11_11_11_11_11_11_11_11_11_11:
7439+ ; AVX2: # %bb.0:
7440+ ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,3,3,3,4,5,6,7,11,11,11,11,12,13,14,15]
7441+ ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
7442+ ; AVX2-NEXT: retq
7443+ ;
7444+ ; AVX512VL-SLOW-LABEL: shuffle_v16i16_11_11_11_11_11_11_11_11_11_11_11_11_11_11_11_11:
7445+ ; AVX512VL-SLOW: # %bb.0:
7446+ ; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,3,3,3,4,5,6,7,11,11,11,11,12,13,14,15]
7447+ ; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
7448+ ; AVX512VL-SLOW-NEXT: retq
7449+ ;
7450+ ; AVX512VL-FAST-CROSSLANE-LABEL: shuffle_v16i16_11_11_11_11_11_11_11_11_11_11_11_11_11_11_11_11:
7451+ ; AVX512VL-FAST-CROSSLANE: # %bb.0:
7452+ ; AVX512VL-FAST-CROSSLANE-NEXT: vpbroadcastw {{.*#+}} ymm1 = [11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11]
7453+ ; AVX512VL-FAST-CROSSLANE-NEXT: vpermw %ymm0, %ymm1, %ymm0
7454+ ; AVX512VL-FAST-CROSSLANE-NEXT: retq
7455+ ;
7456+ ; AVX512VL-FAST-PERLANE-LABEL: shuffle_v16i16_11_11_11_11_11_11_11_11_11_11_11_11_11_11_11_11:
7457+ ; AVX512VL-FAST-PERLANE: # %bb.0:
7458+ ; AVX512VL-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,3,3,3,4,5,6,7,11,11,11,11,12,13,14,15]
7459+ ; AVX512VL-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
7460+ ; AVX512VL-FAST-PERLANE-NEXT: retq
7461+ ;
7462+ ; XOPAVX1-LABEL: shuffle_v16i16_11_11_11_11_11_11_11_11_11_11_11_11_11_11_11_11:
7463+ ; XOPAVX1: # %bb.0:
7464+ ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
7465+ ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
7466+ ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
7467+ ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7468+ ; XOPAVX1-NEXT: retq
7469+ ;
7470+ ; XOPAVX2-LABEL: shuffle_v16i16_11_11_11_11_11_11_11_11_11_11_11_11_11_11_11_11:
7471+ ; XOPAVX2: # %bb.0:
7472+ ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,3,3,3,4,5,6,7,11,11,11,11,12,13,14,15]
7473+ ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
7474+ ; XOPAVX2-NEXT: retq
7475+ %r = shufflevector <16 x i16 > %x , <16 x i16 > poison, <16 x i32 > splat(i32 11 )
7476+ ret <16 x i16 > %r
7477+ }
7478+
7479+ ; PR129276
7480+ define <16 x i16 > @shuffle_v16i16_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15 (<16 x i16 > noundef %x ) {
7481+ ; AVX1-LABEL: shuffle_v16i16_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15:
7482+ ; AVX1: # %bb.0:
7483+ ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
7484+ ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
7485+ ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
7486+ ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7487+ ; AVX1-NEXT: retq
7488+ ;
7489+ ; AVX2-SLOW-LABEL: shuffle_v16i16_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15:
7490+ ; AVX2-SLOW: # %bb.0:
7491+ ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
7492+ ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,2,2,6,6,6,6]
7493+ ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
7494+ ; AVX2-SLOW-NEXT: retq
7495+ ;
7496+ ; AVX2-FAST-ALL-LABEL: shuffle_v16i16_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15:
7497+ ; AVX2-FAST-ALL: # %bb.0:
7498+ ; AVX2-FAST-ALL-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
7499+ ; AVX2-FAST-ALL-NEXT: vpbroadcastd {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6]
7500+ ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
7501+ ; AVX2-FAST-ALL-NEXT: retq
7502+ ;
7503+ ; AVX2-FAST-PERLANE-LABEL: shuffle_v16i16_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15:
7504+ ; AVX2-FAST-PERLANE: # %bb.0:
7505+ ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,30,31,30,31,30,31,30,31,u,u,u,u,u,u,u,u]
7506+ ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
7507+ ; AVX2-FAST-PERLANE-NEXT: retq
7508+ ;
7509+ ; AVX512VL-LABEL: shuffle_v16i16_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15:
7510+ ; AVX512VL: # %bb.0:
7511+ ; AVX512VL-NEXT: vpbroadcastw {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
7512+ ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
7513+ ; AVX512VL-NEXT: retq
7514+ ;
7515+ ; XOPAVX1-LABEL: shuffle_v16i16_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15:
7516+ ; XOPAVX1: # %bb.0:
7517+ ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
7518+ ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
7519+ ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
7520+ ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
7521+ ; XOPAVX1-NEXT: retq
7522+ ;
7523+ ; XOPAVX2-LABEL: shuffle_v16i16_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15:
7524+ ; XOPAVX2: # %bb.0:
7525+ ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
7526+ ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,2,2,6,6,6,6]
7527+ ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
7528+ ; XOPAVX2-NEXT: retq
7529+ %r = shufflevector <16 x i16 > %x , <16 x i16 > poison, <16 x i32 > splat(i32 15 )
7530+ ret <16 x i16 > %r
7531+ }
7532+
73797533define <16 x i16 > @insert_v16i16_0elt_into_zero_vector (ptr %ptr ) {
73807534; ALL-LABEL: insert_v16i16_0elt_into_zero_vector:
73817535; ALL: # %bb.0:
0 commit comments