@@ -1105,19 +1105,18 @@ define void @load_i16_stride2_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1) no
11051105; AVX512-VL-NEXT: vmovdqa64 64(%rdi), %zmm1
11061106; AVX512-VL-NEXT: vmovdqa64 128(%rdi), %zmm2
11071107; AVX512-VL-NEXT: vmovdqa64 192(%rdi), %zmm3
1108- ; AVX512-VL-NEXT: vpmovdw %zmm1 , %ymm4
1109- ; AVX512-VL-NEXT: vpsrld $16, %zmm1, %zmm1
1110- ; AVX512-VL-NEXT: vpsrld $16, %zmm0 , %zmm5
1111- ; AVX512-VL-NEXT: vpsrld $16, %zmm3, %zmm6
1112- ; AVX512-VL-NEXT: vpsrld $16, %zmm2, %zmm7
1108+ ; AVX512-VL-NEXT: vpsrld $16, %zmm0 , %zmm4
1109+ ; AVX512-VL-NEXT: vpsrld $16, %zmm1, %zmm5
1110+ ; AVX512-VL-NEXT: vpsrld $16, %zmm2 , %zmm6
1111+ ; AVX512-VL-NEXT: vpsrld $16, %zmm3, %zmm7
1112+ ; AVX512-VL-NEXT: vpmovdw %zmm1, 32(%rsi)
11131113; AVX512-VL-NEXT: vpmovdw %zmm0, (%rsi)
1114- ; AVX512-VL-NEXT: vmovdqa %ymm4, 32(%rsi)
1115- ; AVX512-VL-NEXT: vpmovdw %zmm2, 64(%rsi)
11161114; AVX512-VL-NEXT: vpmovdw %zmm3, 96(%rsi)
1117- ; AVX512-VL-NEXT: vpmovdw %zmm7, 64(%rdx)
1118- ; AVX512-VL-NEXT: vpmovdw %zmm6, 96(%rdx)
1119- ; AVX512-VL-NEXT: vpmovdw %zmm5, (%rdx)
1120- ; AVX512-VL-NEXT: vpmovdw %zmm1, 32(%rdx)
1115+ ; AVX512-VL-NEXT: vpmovdw %zmm2, 64(%rsi)
1116+ ; AVX512-VL-NEXT: vpmovdw %zmm7, 96(%rdx)
1117+ ; AVX512-VL-NEXT: vpmovdw %zmm6, 64(%rdx)
1118+ ; AVX512-VL-NEXT: vpmovdw %zmm5, 32(%rdx)
1119+ ; AVX512-VL-NEXT: vpmovdw %zmm4, (%rdx)
11211120; AVX512-VL-NEXT: vzeroupper
11221121; AVX512-VL-NEXT: retq
11231122;
@@ -1127,19 +1126,18 @@ define void @load_i16_stride2_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1) no
11271126; AVX512-FCP-NEXT: vmovdqa64 64(%rdi), %zmm1
11281127; AVX512-FCP-NEXT: vmovdqa64 128(%rdi), %zmm2
11291128; AVX512-FCP-NEXT: vmovdqa64 192(%rdi), %zmm3
1130- ; AVX512-FCP-NEXT: vpmovdw %zmm1 , %ymm4
1131- ; AVX512-FCP-NEXT: vpsrld $16, %zmm1, %zmm1
1132- ; AVX512-FCP-NEXT: vpsrld $16, %zmm0 , %zmm5
1133- ; AVX512-FCP-NEXT: vpsrld $16, %zmm3, %zmm6
1134- ; AVX512-FCP-NEXT: vpsrld $16, %zmm2, %zmm7
1129+ ; AVX512-FCP-NEXT: vpsrld $16, %zmm0 , %zmm4
1130+ ; AVX512-FCP-NEXT: vpsrld $16, %zmm1, %zmm5
1131+ ; AVX512-FCP-NEXT: vpsrld $16, %zmm2 , %zmm6
1132+ ; AVX512-FCP-NEXT: vpsrld $16, %zmm3, %zmm7
1133+ ; AVX512-FCP-NEXT: vpmovdw %zmm1, 32(%rsi)
11351134; AVX512-FCP-NEXT: vpmovdw %zmm0, (%rsi)
1136- ; AVX512-FCP-NEXT: vmovdqa %ymm4, 32(%rsi)
1137- ; AVX512-FCP-NEXT: vpmovdw %zmm2, 64(%rsi)
11381135; AVX512-FCP-NEXT: vpmovdw %zmm3, 96(%rsi)
1139- ; AVX512-FCP-NEXT: vpmovdw %zmm7, 64(%rdx)
1140- ; AVX512-FCP-NEXT: vpmovdw %zmm6, 96(%rdx)
1141- ; AVX512-FCP-NEXT: vpmovdw %zmm5, (%rdx)
1142- ; AVX512-FCP-NEXT: vpmovdw %zmm1, 32(%rdx)
1136+ ; AVX512-FCP-NEXT: vpmovdw %zmm2, 64(%rsi)
1137+ ; AVX512-FCP-NEXT: vpmovdw %zmm7, 96(%rdx)
1138+ ; AVX512-FCP-NEXT: vpmovdw %zmm6, 64(%rdx)
1139+ ; AVX512-FCP-NEXT: vpmovdw %zmm5, 32(%rdx)
1140+ ; AVX512-FCP-NEXT: vpmovdw %zmm4, (%rdx)
11431141; AVX512-FCP-NEXT: vzeroupper
11441142; AVX512-FCP-NEXT: retq
11451143;
@@ -1149,19 +1147,18 @@ define void @load_i16_stride2_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1) no
11491147; AVX512DQ-NEXT: vmovdqa64 64(%rdi), %zmm1
11501148; AVX512DQ-NEXT: vmovdqa64 128(%rdi), %zmm2
11511149; AVX512DQ-NEXT: vmovdqa64 192(%rdi), %zmm3
1152- ; AVX512DQ-NEXT: vpmovdw %zmm1 , %ymm4
1153- ; AVX512DQ-NEXT: vpsrld $16, %zmm1, %zmm1
1154- ; AVX512DQ-NEXT: vpsrld $16, %zmm0 , %zmm5
1155- ; AVX512DQ-NEXT: vpsrld $16, %zmm3, %zmm6
1156- ; AVX512DQ-NEXT: vpsrld $16, %zmm2, %zmm7
1150+ ; AVX512DQ-NEXT: vpsrld $16, %zmm0 , %zmm4
1151+ ; AVX512DQ-NEXT: vpsrld $16, %zmm1, %zmm5
1152+ ; AVX512DQ-NEXT: vpsrld $16, %zmm2 , %zmm6
1153+ ; AVX512DQ-NEXT: vpsrld $16, %zmm3, %zmm7
1154+ ; AVX512DQ-NEXT: vpmovdw %zmm1, 32(%rsi)
11571155; AVX512DQ-NEXT: vpmovdw %zmm0, (%rsi)
1158- ; AVX512DQ-NEXT: vmovdqa %ymm4, 32(%rsi)
1159- ; AVX512DQ-NEXT: vpmovdw %zmm2, 64(%rsi)
11601156; AVX512DQ-NEXT: vpmovdw %zmm3, 96(%rsi)
1161- ; AVX512DQ-NEXT: vpmovdw %zmm7, 64(%rdx)
1162- ; AVX512DQ-NEXT: vpmovdw %zmm6, 96(%rdx)
1163- ; AVX512DQ-NEXT: vpmovdw %zmm5, (%rdx)
1164- ; AVX512DQ-NEXT: vpmovdw %zmm1, 32(%rdx)
1157+ ; AVX512DQ-NEXT: vpmovdw %zmm2, 64(%rsi)
1158+ ; AVX512DQ-NEXT: vpmovdw %zmm7, 96(%rdx)
1159+ ; AVX512DQ-NEXT: vpmovdw %zmm6, 64(%rdx)
1160+ ; AVX512DQ-NEXT: vpmovdw %zmm5, 32(%rdx)
1161+ ; AVX512DQ-NEXT: vpmovdw %zmm4, (%rdx)
11651162; AVX512DQ-NEXT: vzeroupper
11661163; AVX512DQ-NEXT: retq
11671164;
@@ -1171,19 +1168,18 @@ define void @load_i16_stride2_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1) no
11711168; AVX512DQ-FCP-NEXT: vmovdqa64 64(%rdi), %zmm1
11721169; AVX512DQ-FCP-NEXT: vmovdqa64 128(%rdi), %zmm2
11731170; AVX512DQ-FCP-NEXT: vmovdqa64 192(%rdi), %zmm3
1174- ; AVX512DQ-FCP-NEXT: vpmovdw %zmm1 , %ymm4
1175- ; AVX512DQ-FCP-NEXT: vpsrld $16, %zmm1, %zmm1
1176- ; AVX512DQ-FCP-NEXT: vpsrld $16, %zmm0 , %zmm5
1177- ; AVX512DQ-FCP-NEXT: vpsrld $16, %zmm3, %zmm6
1178- ; AVX512DQ-FCP-NEXT: vpsrld $16, %zmm2, %zmm7
1171+ ; AVX512DQ-FCP-NEXT: vpsrld $16, %zmm0 , %zmm4
1172+ ; AVX512DQ-FCP-NEXT: vpsrld $16, %zmm1, %zmm5
1173+ ; AVX512DQ-FCP-NEXT: vpsrld $16, %zmm2 , %zmm6
1174+ ; AVX512DQ-FCP-NEXT: vpsrld $16, %zmm3, %zmm7
1175+ ; AVX512DQ-FCP-NEXT: vpmovdw %zmm1, 32(%rsi)
11791176; AVX512DQ-FCP-NEXT: vpmovdw %zmm0, (%rsi)
1180- ; AVX512DQ-FCP-NEXT: vmovdqa %ymm4, 32(%rsi)
1181- ; AVX512DQ-FCP-NEXT: vpmovdw %zmm2, 64(%rsi)
11821177; AVX512DQ-FCP-NEXT: vpmovdw %zmm3, 96(%rsi)
1183- ; AVX512DQ-FCP-NEXT: vpmovdw %zmm7, 64(%rdx)
1184- ; AVX512DQ-FCP-NEXT: vpmovdw %zmm6, 96(%rdx)
1185- ; AVX512DQ-FCP-NEXT: vpmovdw %zmm5, (%rdx)
1186- ; AVX512DQ-FCP-NEXT: vpmovdw %zmm1, 32(%rdx)
1178+ ; AVX512DQ-FCP-NEXT: vpmovdw %zmm2, 64(%rsi)
1179+ ; AVX512DQ-FCP-NEXT: vpmovdw %zmm7, 96(%rdx)
1180+ ; AVX512DQ-FCP-NEXT: vpmovdw %zmm6, 64(%rdx)
1181+ ; AVX512DQ-FCP-NEXT: vpmovdw %zmm5, 32(%rdx)
1182+ ; AVX512DQ-FCP-NEXT: vpmovdw %zmm4, (%rdx)
11871183; AVX512DQ-FCP-NEXT: vzeroupper
11881184; AVX512DQ-FCP-NEXT: retq
11891185;
0 commit comments