@@ -2028,6 +2028,57 @@ define <32 x i8> @shuffle_v32i8_32_01_34_03_36_05_38_07_40_09_42_11_44_13_46_15_
2028
2028
ret <32 x i8 > %shuffle
2029
2029
}
2030
2030
2031
+ ; PR159670
2032
+ define <32 x i8 > @shuffle_v32i8_00_40_01_41_02_42_03_43_04_44_05_45_06_46_07_47_16_56_17_57_18_58_19_59_20_60_21_61_22_62_23_63 (<32 x i8 > %a , <32 x i8 > %b ) {
2033
+ ; AVX1-LABEL: shuffle_v32i8_00_40_01_41_02_42_03_43_04_44_05_45_06_46_07_47_16_56_17_57_18_58_19_59_20_60_21_61_22_62_23_63:
2034
+ ; AVX1: # %bb.0:
2035
+ ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2036
+ ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
2037
+ ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
2038
+ ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
2039
+ ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
2040
+ ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2041
+ ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2042
+ ; AVX1-NEXT: retq
2043
+ ;
2044
+ ; AVX2-LABEL: shuffle_v32i8_00_40_01_41_02_42_03_43_04_44_05_45_06_46_07_47_16_56_17_57_18_58_19_59_20_60_21_61_22_62_23_63:
2045
+ ; AVX2: # %bb.0:
2046
+ ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
2047
+ ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8,1,9,2,10,3,11,4,12,5,13,6,14,7,15,16,24,17,25,18,26,19,27,20,28,21,29,22,30,23,31]
2048
+ ; AVX2-NEXT: retq
2049
+ ;
2050
+ ; AVX512VLBW-LABEL: shuffle_v32i8_00_40_01_41_02_42_03_43_04_44_05_45_06_46_07_47_16_56_17_57_18_58_19_59_20_60_21_61_22_62_23_63:
2051
+ ; AVX512VLBW: # %bb.0:
2052
+ ; AVX512VLBW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
2053
+ ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8,1,9,2,10,3,11,4,12,5,13,6,14,7,15,16,24,17,25,18,26,19,27,20,28,21,29,22,30,23,31]
2054
+ ; AVX512VLBW-NEXT: retq
2055
+ ;
2056
+ ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_40_01_41_02_42_03_43_04_44_05_45_06_46_07_47_16_56_17_57_18_58_19_59_20_60_21_61_22_62_23_63:
2057
+ ; AVX512VLVBMI: # %bb.0:
2058
+ ; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [0,40,1,41,2,42,3,43,4,44,5,45,6,46,7,47,16,56,17,57,18,58,19,59,20,60,21,61,22,62,23,63]
2059
+ ; AVX512VLVBMI-NEXT: vpermt2b %ymm1, %ymm2, %ymm0
2060
+ ; AVX512VLVBMI-NEXT: retq
2061
+ ;
2062
+ ; XOPAVX1-LABEL: shuffle_v32i8_00_40_01_41_02_42_03_43_04_44_05_45_06_46_07_47_16_56_17_57_18_58_19_59_20_60_21_61_22_62_23_63:
2063
+ ; XOPAVX1: # %bb.0:
2064
+ ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2065
+ ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
2066
+ ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
2067
+ ; XOPAVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
2068
+ ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
2069
+ ; XOPAVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2070
+ ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2071
+ ; XOPAVX1-NEXT: retq
2072
+ ;
2073
+ ; XOPAVX2-LABEL: shuffle_v32i8_00_40_01_41_02_42_03_43_04_44_05_45_06_46_07_47_16_56_17_57_18_58_19_59_20_60_21_61_22_62_23_63:
2074
+ ; XOPAVX2: # %bb.0:
2075
+ ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
2076
+ ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8,1,9,2,10,3,11,4,12,5,13,6,14,7,15,16,24,17,25,18,26,19,27,20,28,21,29,22,30,23,31]
2077
+ ; XOPAVX2-NEXT: retq
2078
+ %shuffle = shufflevector <32 x i8 > %a , <32 x i8 > %b , <32 x i32 > <i32 0 , i32 40 , i32 1 , i32 41 , i32 2 , i32 42 , i32 3 , i32 43 , i32 4 , i32 44 , i32 5 , i32 45 , i32 6 , i32 46 , i32 7 , i32 47 , i32 16 , i32 56 , i32 17 , i32 57 , i32 18 , i32 58 , i32 19 , i32 59 , i32 20 , i32 60 , i32 21 , i32 61 , i32 22 , i32 62 , i32 23 , i32 63 >
2079
+ ret <32 x i8 > %shuffle
2080
+ }
2081
+
2031
2082
; PR27780 - https://bugs.llvm.org/show_bug.cgi?id=27780
2032
2083
2033
2084
define <32 x i8 > @load_fold_pblendvb (ptr %px , <32 x i8 > %y ) {
0 commit comments