@@ -937,6 +937,65 @@ define <16 x i32> @zext_mulhuw_v16i16_lshr(<16 x i16> %a, <16 x i16> %b) {
937937 ret <16 x i32 > %d
938938}
939939
940+ ; PR109790
941+ define void @PR109790 (ptr sret ([32 x i8 ]) %ret , ptr %a ) {
942+ ; SSE-LABEL: PR109790:
943+ ; SSE: # %bb.0:
944+ ; SSE-NEXT: movq %rdi, %rax
945+ ; SSE-NEXT: movdqa {{.*#+}} xmm0 = [32767,32767,32767,32767,32767,32767,32767,32767]
946+ ; SSE-NEXT: movdqa (%rsi), %xmm1
947+ ; SSE-NEXT: pand %xmm0, %xmm1
948+ ; SSE-NEXT: pand 16(%rsi), %xmm0
949+ ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [64536,64536,64536,64536,64536,64536,64536,64536]
950+ ; SSE-NEXT: pmulhw %xmm2, %xmm0
951+ ; SSE-NEXT: pmulhw %xmm2, %xmm1
952+ ; SSE-NEXT: movdqa %xmm1, (%rdi)
953+ ; SSE-NEXT: movdqa %xmm0, 16(%rdi)
954+ ; SSE-NEXT: retq
955+ ;
956+ ; AVX2-LABEL: PR109790:
957+ ; AVX2: # %bb.0:
958+ ; AVX2-NEXT: movq %rdi, %rax
959+ ; AVX2-NEXT: vmovdqa (%rsi), %ymm0
960+ ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
961+ ; AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536,64536]
962+ ; AVX2-NEXT: vmovdqa %ymm0, (%rdi)
963+ ; AVX2-NEXT: vzeroupper
964+ ; AVX2-NEXT: retq
965+ ;
966+ ; AVX512F-LABEL: PR109790:
967+ ; AVX512F: # %bb.0:
968+ ; AVX512F-NEXT: movq %rdi, %rax
969+ ; AVX512F-NEXT: vmovdqa (%rsi), %ymm0
970+ ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
971+ ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
972+ ; AVX512F-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
973+ ; AVX512F-NEXT: vpsrld $16, %zmm0, %zmm0
974+ ; AVX512F-NEXT: vpmovdw %zmm0, (%rdi)
975+ ; AVX512F-NEXT: vzeroupper
976+ ; AVX512F-NEXT: retq
977+ ;
978+ ; AVX512BW-LABEL: PR109790:
979+ ; AVX512BW: # %bb.0:
980+ ; AVX512BW-NEXT: movq %rdi, %rax
981+ ; AVX512BW-NEXT: vmovdqa (%rsi), %ymm0
982+ ; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
983+ ; AVX512BW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
984+ ; AVX512BW-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # [64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0,64536,0]
985+ ; AVX512BW-NEXT: vpsrld $16, %zmm0, %zmm0
986+ ; AVX512BW-NEXT: vpmovdw %zmm0, (%rdi)
987+ ; AVX512BW-NEXT: vzeroupper
988+ ; AVX512BW-NEXT: retq
989+ %load = load <16 x i16 >, ptr %a , align 32
990+ %and = and <16 x i16 > %load , <i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 , i16 32767 >
991+ %ext = zext nneg <16 x i16 > %and to <16 x i32 >
992+ %mul = mul nsw <16 x i32 > %ext , <i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 , i32 -1000 >
993+ %srl = lshr <16 x i32 > %mul , <i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 , i32 16 >
994+ %res = trunc nuw <16 x i32 > %srl to <16 x i16 >
995+ store <16 x i16 > %res , ptr %ret , align 32
996+ ret void
997+ }
998+
940999; PR109790
9411000define <16 x i16 > @zext_mulhuw_v16i16_negative_constant (<16 x i16 > %a ) {
9421001; SSE-LABEL: zext_mulhuw_v16i16_negative_constant:
0 commit comments