@@ -1101,6 +1101,121 @@ define <64 x i8> @shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_
11011101 ret <64 x i8 > %r
11021102}
11031103
1104+ define <64 x i8 > @shuffle_v64i8_61_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124 (<64 x i8 > %a0 , <64 x i8 > %a1 ) {
1105+ ; AVX512F-LABEL: shuffle_v64i8_61_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124:
1106+ ; AVX512F: # %bb.0:
1107+ ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
1108+ ; AVX512F-NEXT: vperm2i128 {{.*#+}} ymm3 = ymm0[2,3],ymm2[0,1]
1109+ ; AVX512F-NEXT: vpalignr {{.*#+}} ymm2 = ymm3[13,14,15],ymm2[0,1,2,3,4,5,6,7,8,9,10,11,12],ymm3[29,30,31],ymm2[16,17,18,19,20,21,22,23,24,25,26,27,28]
1110+ ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
1111+ ; AVX512F-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm0[0,1]
1112+ ; AVX512F-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],ymm1[29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28]
1113+ ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
1114+ ; AVX512F-NEXT: retq
1115+ ;
1116+ ; AVX512BW-LABEL: shuffle_v64i8_61_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124:
1117+ ; AVX512BW: # %bb.0:
1118+ ; AVX512BW-NEXT: valignq {{.*#+}} zmm1 = zmm1[6,7],zmm0[0,1,2,3,4,5]
1119+ ; AVX512BW-NEXT: vpalignr {{.*#+}} zmm0 = zmm1[13,14,15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zmm1[29,30,31],zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28],zmm1[45,46,47],zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44],zmm1[61,62,63],zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60]
1120+ ; AVX512BW-NEXT: retq
1121+ ;
1122+ ; AVX512DQ-LABEL: shuffle_v64i8_61_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124:
1123+ ; AVX512DQ: # %bb.0:
1124+ ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
1125+ ; AVX512DQ-NEXT: vperm2i128 {{.*#+}} ymm3 = ymm0[2,3],ymm2[0,1]
1126+ ; AVX512DQ-NEXT: vpalignr {{.*#+}} ymm2 = ymm3[13,14,15],ymm2[0,1,2,3,4,5,6,7,8,9,10,11,12],ymm3[29,30,31],ymm2[16,17,18,19,20,21,22,23,24,25,26,27,28]
1127+ ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1
1128+ ; AVX512DQ-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm0[0,1]
1129+ ; AVX512DQ-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],ymm1[29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28]
1130+ ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
1131+ ; AVX512DQ-NEXT: retq
1132+ ;
1133+ ; AVX512VBMI-LABEL: shuffle_v64i8_61_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124:
1134+ ; AVX512VBMI: # %bb.0:
1135+ ; AVX512VBMI-NEXT: valignq {{.*#+}} zmm1 = zmm1[6,7],zmm0[0,1,2,3,4,5]
1136+ ; AVX512VBMI-NEXT: vpalignr {{.*#+}} zmm0 = zmm1[13,14,15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zmm1[29,30,31],zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28],zmm1[45,46,47],zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44],zmm1[61,62,63],zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60]
1137+ ; AVX512VBMI-NEXT: retq
1138+ %r = shufflevector <64 x i8 > %a1 , <64 x i8 > %a0 , <64 x i32 > <i32 61 , i32 62 , i32 63 , i32 64 , i32 65 , i32 66 , i32 67 , i32 68 , i32 69 , i32 70 , i32 71 , i32 72 , i32 73 , i32 74 , i32 75 , i32 76 , i32 77 , i32 78 , i32 79 , i32 80 , i32 81 , i32 82 , i32 83 , i32 84 , i32 85 , i32 86 , i32 87 , i32 88 , i32 89 , i32 90 , i32 91 , i32 92 , i32 93 , i32 94 , i32 95 , i32 96 , i32 97 , i32 98 , i32 99 , i32 100 , i32 101 , i32 102 , i32 103 , i32 104 , i32 105 , i32 106 , i32 107 , i32 108 , i32 109 , i32 110 , i32 111 , i32 112 , i32 113 , i32 114 , i32 115 , i32 116 , i32 117 , i32 118 , i32 119 , i32 120 , i32 121 , i32 122 , i32 123 , i32 124 >
1139+ ret <64 x i8 > %r
1140+ }
1141+
1142+ ; PR79799
1143+ define <64 x i8 > @shuffle_v64i8_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125 (<64 x i8 > %a0 , <64 x i8 > %a1 ) {
1144+ ; AVX512F-LABEL: shuffle_v64i8_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125:
1145+ ; AVX512F: # %bb.0:
1146+ ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
1147+ ; AVX512F-NEXT: vperm2i128 {{.*#+}} ymm3 = ymm0[2,3],ymm2[0,1]
1148+ ; AVX512F-NEXT: vpalignr {{.*#+}} ymm2 = ymm3[14,15],ymm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm3[30,31],ymm2[16,17,18,19,20,21,22,23,24,25,26,27,28,29]
1149+ ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
1150+ ; AVX512F-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm0[0,1]
1151+ ; AVX512F-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm1[30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29]
1152+ ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
1153+ ; AVX512F-NEXT: retq
1154+ ;
1155+ ; AVX512BW-LABEL: shuffle_v64i8_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125:
1156+ ; AVX512BW: # %bb.0:
1157+ ; AVX512BW-NEXT: vpmovsxbw {{.*#+}} zmm2 = [63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
1158+ ; AVX512BW-NEXT: vpermt2w %zmm1, %zmm2, %zmm0
1159+ ; AVX512BW-NEXT: retq
1160+ ;
1161+ ; AVX512DQ-LABEL: shuffle_v64i8_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125:
1162+ ; AVX512DQ: # %bb.0:
1163+ ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
1164+ ; AVX512DQ-NEXT: vperm2i128 {{.*#+}} ymm3 = ymm0[2,3],ymm2[0,1]
1165+ ; AVX512DQ-NEXT: vpalignr {{.*#+}} ymm2 = ymm3[14,15],ymm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm3[30,31],ymm2[16,17,18,19,20,21,22,23,24,25,26,27,28,29]
1166+ ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1
1167+ ; AVX512DQ-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm0[0,1]
1168+ ; AVX512DQ-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm1[30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29]
1169+ ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
1170+ ; AVX512DQ-NEXT: retq
1171+ ;
1172+ ; AVX512VBMI-LABEL: shuffle_v64i8_62_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125:
1173+ ; AVX512VBMI: # %bb.0:
1174+ ; AVX512VBMI-NEXT: vpmovsxbw {{.*#+}} zmm2 = [63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
1175+ ; AVX512VBMI-NEXT: vpermt2w %zmm1, %zmm2, %zmm0
1176+ ; AVX512VBMI-NEXT: retq
1177+ %r = shufflevector <64 x i8 > %a1 , <64 x i8 > %a0 , <64 x i32 > <i32 62 , i32 63 , i32 64 , i32 65 , i32 66 , i32 67 , i32 68 , i32 69 , i32 70 , i32 71 , i32 72 , i32 73 , i32 74 , i32 75 , i32 76 , i32 77 , i32 78 , i32 79 , i32 80 , i32 81 , i32 82 , i32 83 , i32 84 , i32 85 , i32 86 , i32 87 , i32 88 , i32 89 , i32 90 , i32 91 , i32 92 , i32 93 , i32 94 , i32 95 , i32 96 , i32 97 , i32 98 , i32 99 , i32 100 , i32 101 , i32 102 , i32 103 , i32 104 , i32 105 , i32 106 , i32 107 , i32 108 , i32 109 , i32 110 , i32 111 , i32 112 , i32 113 , i32 114 , i32 115 , i32 116 , i32 117 , i32 118 , i32 119 , i32 120 , i32 121 , i32 122 , i32 123 , i32 124 , i32 125 >
1178+ ret <64 x i8 > %r
1179+ }
1180+
1181+ define <64 x i8 > @shuffle_v64i8_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125_126 (<64 x i8 > %a0 , <64 x i8 > %a1 ) {
1182+ ; AVX512F-LABEL: shuffle_v64i8_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125_126:
1183+ ; AVX512F: # %bb.0:
1184+ ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
1185+ ; AVX512F-NEXT: vperm2i128 {{.*#+}} ymm3 = ymm0[2,3],ymm2[0,1]
1186+ ; AVX512F-NEXT: vpalignr {{.*#+}} ymm2 = ymm3[15],ymm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm3[31],ymm2[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
1187+ ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
1188+ ; AVX512F-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm0[0,1]
1189+ ; AVX512F-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
1190+ ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
1191+ ; AVX512F-NEXT: retq
1192+ ;
1193+ ; AVX512BW-LABEL: shuffle_v64i8_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125_126:
1194+ ; AVX512BW: # %bb.0:
1195+ ; AVX512BW-NEXT: valignq {{.*#+}} zmm1 = zmm1[6,7],zmm0[0,1,2,3,4,5]
1196+ ; AVX512BW-NEXT: vpalignr {{.*#+}} zmm0 = zmm1[15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zmm1[31],zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30],zmm1[47],zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44,45,46],zmm1[63],zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60,61,62]
1197+ ; AVX512BW-NEXT: retq
1198+ ;
1199+ ; AVX512DQ-LABEL: shuffle_v64i8_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125_126:
1200+ ; AVX512DQ: # %bb.0:
1201+ ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2
1202+ ; AVX512DQ-NEXT: vperm2i128 {{.*#+}} ymm3 = ymm0[2,3],ymm2[0,1]
1203+ ; AVX512DQ-NEXT: vpalignr {{.*#+}} ymm2 = ymm3[15],ymm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm3[31],ymm2[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
1204+ ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1
1205+ ; AVX512DQ-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm1[2,3],ymm0[0,1]
1206+ ; AVX512DQ-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
1207+ ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
1208+ ; AVX512DQ-NEXT: retq
1209+ ;
1210+ ; AVX512VBMI-LABEL: shuffle_v64i8_63_64_65_66_67_68_69_70_71_72_73_74_75_76_77_78_79_80_81_82_83_84_85_86_87_88_89_90_91_92_93_94_95_96_97_98_99_100_101_102_103_104_105_106_107_108_109_110_111_112_113_114_115_116_117_118_119_120_121_122_123_124_125_126:
1211+ ; AVX512VBMI: # %bb.0:
1212+ ; AVX512VBMI-NEXT: valignq {{.*#+}} zmm1 = zmm1[6,7],zmm0[0,1,2,3,4,5]
1213+ ; AVX512VBMI-NEXT: vpalignr {{.*#+}} zmm0 = zmm1[15],zmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zmm1[31],zmm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30],zmm1[47],zmm0[32,33,34,35,36,37,38,39,40,41,42,43,44,45,46],zmm1[63],zmm0[48,49,50,51,52,53,54,55,56,57,58,59,60,61,62]
1214+ ; AVX512VBMI-NEXT: retq
1215+ %r = shufflevector <64 x i8 > %a1 , <64 x i8 > %a0 , <64 x i32 > <i32 63 , i32 64 , i32 65 , i32 66 , i32 67 , i32 68 , i32 69 , i32 70 , i32 71 , i32 72 , i32 73 , i32 74 , i32 75 , i32 76 , i32 77 , i32 78 , i32 79 , i32 80 , i32 81 , i32 82 , i32 83 , i32 84 , i32 85 , i32 86 , i32 87 , i32 88 , i32 89 , i32 90 , i32 91 , i32 92 , i32 93 , i32 94 , i32 95 , i32 96 , i32 97 , i32 98 , i32 99 , i32 100 , i32 101 , i32 102 , i32 103 , i32 104 , i32 105 , i32 106 , i32 107 , i32 108 , i32 109 , i32 110 , i32 111 , i32 112 , i32 113 , i32 114 , i32 115 , i32 116 , i32 117 , i32 118 , i32 119 , i32 120 , i32 121 , i32 122 , i32 123 , i32 124 , i32 125 , i32 126 >
1216+ ret <64 x i8 > %r
1217+ }
1218+
11041219define <64 x i8 > @shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_42_66_68_72_74_78_80_84_86_90_92_96_98_102_104_108_110_114_116_120_122_126 (<64 x i8 > %a0 , <64 x i8 > %a1 ) {
11051220; AVX512F-LABEL: shuffle_v64i8_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_32_33_34_35_36_37_38_39_40_41_42_66_68_72_74_78_80_84_86_90_92_96_98_102_104_108_110_114_116_120_122_126:
11061221; AVX512F: # %bb.0:
0 commit comments