@@ -1228,13 +1228,14 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %
12281228;
12291229; AVX512VBMI2-LABEL: splatvar_funnnel_v32i8:
12301230; AVX512VBMI2: # %bb.0:
1231- ; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm3 = [0,2,4,6,8,10,12,14,64,66,68,70,72,74,76,78,16,18,20,22,24,26,28,30,80,82,84,86,88,90,92,94]
1232- ; AVX512VBMI2-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
1233- ; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1234- ; AVX512VBMI2-NEXT: vpsrlw %xmm2, %ymm4, %ymm4
1235- ; AVX512VBMI2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
1236- ; AVX512VBMI2-NEXT: vpsrlw %xmm2, %ymm0, %ymm0
1237- ; AVX512VBMI2-NEXT: vpermt2b %zmm4, %zmm3, %zmm0
1231+ ; AVX512VBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
1232+ ; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1233+ ; AVX512VBMI2-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,64,1,65,2,66,3,67,4,68,5,69,6,70,7,71,16,80,17,81,18,82,19,83,20,84,21,85,22,86,23,87,8,72,9,73,10,74,11,75,12,76,13,77,14,78,15,79,24,88,25,89,26,90,27,91,28,92,29,93,30,94,31,95]
1234+ ; AVX512VBMI2-NEXT: vpermi2b %zmm0, %zmm1, %zmm3
1235+ ; AVX512VBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
1236+ ; AVX512VBMI2-NEXT: vpsrlw %xmm0, %zmm3, %zmm0
1237+ ; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,2,4,6,8,10,12,14,32,34,36,38,40,42,44,46,16,18,20,22,24,26,28,30,48,50,52,54,56,58,60,62]
1238+ ; AVX512VBMI2-NEXT: vpermb %zmm0, %zmm1, %zmm0
12381239; AVX512VBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
12391240; AVX512VBMI2-NEXT: retq
12401241;
@@ -1251,16 +1252,29 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %
12511252; AVX512VLBW-NEXT: vpackuswb %ymm3, %ymm0, %ymm0
12521253; AVX512VLBW-NEXT: retq
12531254;
1254- ; AVX10-LABEL: splatvar_funnnel_v32i8:
1255- ; AVX10: # %bb.0:
1256- ; AVX10-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
1257- ; AVX10-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1258- ; AVX10-NEXT: vpsrlw %xmm2, %ymm3, %ymm3
1259- ; AVX10-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
1260- ; AVX10-NEXT: vpsrlw %xmm2, %ymm0, %ymm1
1261- ; AVX10-NEXT: vmovdqa {{.*#+}} ymm0 = [0,2,4,6,8,10,12,14,32,34,36,38,40,42,44,46,16,18,20,22,24,26,28,30,48,50,52,54,56,58,60,62]
1262- ; AVX10-NEXT: vpermi2b %ymm3, %ymm1, %ymm0
1263- ; AVX10-NEXT: retq
1255+ ; AVX512VLVBMI2-LABEL: splatvar_funnnel_v32i8:
1256+ ; AVX512VLVBMI2: # %bb.0:
1257+ ; AVX512VLVBMI2-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
1258+ ; AVX512VLVBMI2-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
1259+ ; AVX512VLVBMI2-NEXT: vmovdqa64 {{.*#+}} zmm3 = [8,72,9,73,10,74,11,75,12,76,13,77,14,78,15,79,24,88,25,89,26,90,27,91,28,92,29,93,30,94,31,95,0,64,1,65,2,66,3,67,4,68,5,69,6,70,7,71,16,80,17,81,18,82,19,83,20,84,21,85,22,86,23,87]
1260+ ; AVX512VLVBMI2-NEXT: vpermi2b %zmm0, %zmm1, %zmm3
1261+ ; AVX512VLVBMI2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
1262+ ; AVX512VLVBMI2-NEXT: vpsrlw %xmm0, %zmm3, %zmm0
1263+ ; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} ymm1 = [32,34,36,38,40,42,44,46,0,2,4,6,8,10,12,14,48,50,52,54,56,58,60,62,16,18,20,22,24,26,28,30]
1264+ ; AVX512VLVBMI2-NEXT: vpermb %zmm0, %zmm1, %zmm0
1265+ ; AVX512VLVBMI2-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
1266+ ; AVX512VLVBMI2-NEXT: retq
1267+ ;
1268+ ; AVX10_256-LABEL: splatvar_funnnel_v32i8:
1269+ ; AVX10_256: # %bb.0:
1270+ ; AVX10_256-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
1271+ ; AVX10_256-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
1272+ ; AVX10_256-NEXT: vpsrlw %xmm2, %ymm3, %ymm3
1273+ ; AVX10_256-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
1274+ ; AVX10_256-NEXT: vpsrlw %xmm2, %ymm0, %ymm1
1275+ ; AVX10_256-NEXT: vmovdqa {{.*#+}} ymm0 = [0,2,4,6,8,10,12,14,32,34,36,38,40,42,44,46,16,18,20,22,24,26,28,30,48,50,52,54,56,58,60,62]
1276+ ; AVX10_256-NEXT: vpermi2b %ymm3, %ymm1, %ymm0
1277+ ; AVX10_256-NEXT: retq
12641278;
12651279; XOPAVX1-LABEL: splatvar_funnnel_v32i8:
12661280; XOPAVX1: # %bb.0:
0 commit comments