@@ -937,6 +937,77 @@ define <16 x i32> @zext_mulhuw_v16i16_lshr(<16 x i16> %a, <16 x i16> %b) {
937937 ret <16 x i32 > %d
938938}
939939
940+ ; PR109790
941+ define <16 x i16 > @zext_mulhuw_v16i16_negative_constant (<16 x i16 > %a ) {
942+ ; SSE-LABEL: zext_mulhuw_v16i16_negative_constant:
943+ ; SSE: # %bb.0:
944+ ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [32767,32767,32767,32767,32767,32767,32767,32767]
945+ ; SSE-NEXT: pand %xmm2, %xmm1
946+ ; SSE-NEXT: pand %xmm2, %xmm0
947+ ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [64536,64536,64536,64536,64536,64536,64536,64536]
948+ ; SSE-NEXT: pmulhw %xmm2, %xmm0
949+ ; SSE-NEXT: pmulhw %xmm2, %xmm1
950+ ; SSE-NEXT: retq
951+ ;
952+ ; AVX-LABEL: zext_mulhuw_v16i16_negative_constant:
953+ ; AVX: # %bb.0:
954+ ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
955+ ; AVX-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536]
956+ ; AVX-NEXT: retq
957+ %k = and <16 x i16 > %a , <i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 >
958+ %x = zext nneg <16 x i16 > %k to <16 x i32 >
959+ %m = mul nsw <16 x i32 > %x , <i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 >
960+ %s = lshr <16 x i32 > %m , <i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 >
961+ %t = trunc nuw <16 x i32 > %s to <16 x i16 >
962+ ret <16 x i16 > %t
963+ }
964+
965+ ; PR109790
966+ define <16 x i16 > @zext_mulhuw_v16i16_positive_constant (<16 x i16 > %a ) {
967+ ; SSE-LABEL: zext_mulhuw_v16i16_positive_constant:
968+ ; SSE: # %bb.0:
969+ ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [32767,32767,32767,32767,32767,32767,32767,32767]
970+ ; SSE-NEXT: pand %xmm2, %xmm1
971+ ; SSE-NEXT: pand %xmm2, %xmm0
972+ ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1000,1000,1000,1000,1000,1000,1000,1000]
973+ ; SSE-NEXT: pmulhw %xmm2, %xmm0
974+ ; SSE-NEXT: pmulhw %xmm2, %xmm1
975+ ; SSE-NEXT: retq
976+ ;
977+ ; AVX2-LABEL: zext_mulhuw_v16i16_positive_constant:
978+ ; AVX2: # %bb.0:
979+ ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
980+ ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
981+ ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
982+ ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
983+ ; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0]
984+ ; AVX2-NEXT: vpmulhuw %ymm2, %ymm0, %ymm0
985+ ; AVX2-NEXT: vpmulhuw %ymm2, %ymm1, %ymm1
986+ ; AVX2-NEXT: vpackusdw %ymm0, %ymm1, %ymm0
987+ ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
988+ ; AVX2-NEXT: retq
989+ ;
990+ ; AVX512F-LABEL: zext_mulhuw_v16i16_positive_constant:
991+ ; AVX512F: # %bb.0:
992+ ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
993+ ; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000]
994+ ; AVX512F-NEXT: retq
995+ ;
996+ ; AVX512BW-LABEL: zext_mulhuw_v16i16_positive_constant:
997+ ; AVX512BW: # %bb.0:
998+ ; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
999+ ; AVX512BW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
1000+ ; AVX512BW-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # [1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0,1000,0]
1001+ ; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
1002+ ; AVX512BW-NEXT: retq
1003+ %k = and <16 x i16 > %a , <i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 >
1004+ %x = zext nneg <16 x i16 > %k to <16 x i32 >
1005+ %m = mul nuw nsw <16 x i32 > %x , <i32 1000 , i32 1000 , i32 1000 , i32 1000 , i32 1000 , i32 1000 , i32 1000 , i32 1000 , i32 1000 , i32 1000 , i32 1000 , i32 1000 , i32 1000 , i32 1000 , i32 1000 , i32 1000 >
1006+ %s = lshr <16 x i32 > %m , <i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 >
1007+ %t = trunc nuw nsw <16 x i32 > %s to <16 x i16 >
1008+ ret <16 x i16 > %t
1009+ }
1010+
9401011define <16 x i32 > @mulhsw_v16i16_lshr (<16 x i16 > %a , <16 x i16 > %b ) {
9411012; SSE2-LABEL: mulhsw_v16i16_lshr:
9421013; SSE2: # %bb.0:
@@ -2056,3 +2127,4 @@ define <8 x i16> @sse2_pmulhu_w_const(<8 x i16> %a0, <8 x i16> %a1) {
20562127 ret <8 x i16 > %res
20572128}
20582129declare <8 x i16 > @llvm.x86.sse2.pmulhu.w (<8 x i16 >, <8 x i16 >)
2130+
0 commit comments