Skip to content

Commit 4ee4bc3

Browse files
committed
[InstCombine][X86] Add zero/undef arg handling for MULH/PMULHU/PMULHRS intrinsics
1 parent c20695a commit 4ee4bc3

File tree

4 files changed

+71
-72
lines changed

4 files changed

+71
-72
lines changed

llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -502,6 +502,27 @@ static Value *simplifyX86pack(IntrinsicInst &II,
502502
return Builder.CreateTrunc(Shuffle, ResTy);
503503
}
504504

505+
static Value *simplifyX86pmulh(IntrinsicInst &II,
506+
InstCombiner::BuilderTy &Builder) {
507+
Value *Arg0 = II.getArgOperand(0);
508+
Value *Arg1 = II.getArgOperand(1);
509+
auto *ResTy = cast<FixedVectorType>(II.getType());
510+
[[maybe_unused]] auto *ArgTy = cast<FixedVectorType>(Arg0->getType());
511+
assert(ArgTy == ResTy && ResTy->getScalarSizeInBits() == 16 &&
512+
"Unexpected PMULH types");
513+
514+
// Multiply by undef -> zero (NOT undef!) as other arg could still be zero.
515+
if (isa<UndefValue>(Arg0) || isa<UndefValue>(Arg1))
516+
return ConstantAggregateZero::get(ResTy);
517+
518+
// Multiply by zero.
519+
if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1))
520+
return ConstantAggregateZero::get(ResTy);
521+
522+
// TODO: Constant folding.
523+
return nullptr;
524+
}
525+
505526
static Value *simplifyX86pmadd(IntrinsicInst &II,
506527
InstCombiner::BuilderTy &Builder,
507528
bool IsPMADDWD) {
@@ -2568,6 +2589,20 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
25682589
}
25692590
break;
25702591

2592+
case Intrinsic::x86_sse2_pmulh_w:
2593+
case Intrinsic::x86_avx2_pmulh_w:
2594+
case Intrinsic::x86_avx512_pmulh_w_512:
2595+
case Intrinsic::x86_sse2_pmulhu_w:
2596+
case Intrinsic::x86_avx2_pmulhu_w:
2597+
case Intrinsic::x86_avx512_pmulhu_w_512:
2598+
case Intrinsic::x86_ssse3_pmul_hr_sw_128:
2599+
case Intrinsic::x86_avx2_pmul_hr_sw:
2600+
case Intrinsic::x86_avx512_pmul_hr_sw_512:
2601+
if (Value *V = simplifyX86pmulh(II, IC.Builder)) {
2602+
return IC.replaceInstUsesWith(II, V);
2603+
}
2604+
break;
2605+
25712606
case Intrinsic::x86_sse2_pmadd_wd:
25722607
case Intrinsic::x86_avx2_pmadd_wd:
25732608
case Intrinsic::x86_avx512_pmaddw_d_512:

llvm/test/Transforms/InstCombine/X86/x86-pmulh.ll

Lines changed: 12 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -7,53 +7,47 @@
77

88
define <8 x i16> @undef_pmulh_128(<8 x i16> %a0) {
99
; CHECK-LABEL: @undef_pmulh_128(
10-
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> [[A0:%.*]], <8 x i16> undef)
11-
; CHECK-NEXT: ret <8 x i16> [[TMP1]]
10+
; CHECK-NEXT: ret <8 x i16> zeroinitializer
1211
;
1312
%1 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> undef)
1413
ret <8 x i16> %1
1514
}
1615

1716
define <8 x i16> @undef_pmulh_128_commute(<8 x i16> %a0) {
1817
; CHECK-LABEL: @undef_pmulh_128_commute(
19-
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> undef, <8 x i16> [[A0:%.*]])
20-
; CHECK-NEXT: ret <8 x i16> [[TMP1]]
18+
; CHECK-NEXT: ret <8 x i16> zeroinitializer
2119
;
2220
%1 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> undef, <8 x i16> %a0)
2321
ret <8 x i16> %1
2422
}
2523

2624
define <16 x i16> @undef_pmulh_256(<16 x i16> %a0) {
2725
; CHECK-LABEL: @undef_pmulh_256(
28-
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> [[A0:%.*]], <16 x i16> undef)
29-
; CHECK-NEXT: ret <16 x i16> [[TMP1]]
26+
; CHECK-NEXT: ret <16 x i16> zeroinitializer
3027
;
3128
%1 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> undef)
3229
ret <16 x i16> %1
3330
}
3431

3532
define <16 x i16> @undef_pmulh_256_commute(<16 x i16> %a0) {
3633
; CHECK-LABEL: @undef_pmulh_256_commute(
37-
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> undef, <16 x i16> [[A0:%.*]])
38-
; CHECK-NEXT: ret <16 x i16> [[TMP1]]
34+
; CHECK-NEXT: ret <16 x i16> zeroinitializer
3935
;
4036
%1 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> undef, <16 x i16> %a0)
4137
ret <16 x i16> %1
4238
}
4339

4440
define <32 x i16> @undef_pmulh_512(<32 x i16> %a0) {
4541
; CHECK-LABEL: @undef_pmulh_512(
46-
; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> [[A0:%.*]], <32 x i16> undef)
47-
; CHECK-NEXT: ret <32 x i16> [[TMP1]]
42+
; CHECK-NEXT: ret <32 x i16> zeroinitializer
4843
;
4944
%1 = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> %a0, <32 x i16> undef)
5045
ret <32 x i16> %1
5146
}
5247

5348
define <32 x i16> @undef_pmulh_512_commute(<32 x i16> %a0) {
5449
; CHECK-LABEL: @undef_pmulh_512_commute(
55-
; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> undef, <32 x i16> [[A0:%.*]])
56-
; CHECK-NEXT: ret <32 x i16> [[TMP1]]
50+
; CHECK-NEXT: ret <32 x i16> zeroinitializer
5751
;
5852
%1 = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> undef, <32 x i16> %a0)
5953
ret <32 x i16> %1
@@ -65,53 +59,47 @@ define <32 x i16> @undef_pmulh_512_commute(<32 x i16> %a0) {
6559

6660
define <8 x i16> @zero_pmulh_128(<8 x i16> %a0) {
6761
; CHECK-LABEL: @zero_pmulh_128(
68-
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> [[A0:%.*]], <8 x i16> zeroinitializer)
69-
; CHECK-NEXT: ret <8 x i16> [[TMP1]]
62+
; CHECK-NEXT: ret <8 x i16> zeroinitializer
7063
;
7164
%1 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> zeroinitializer)
7265
ret <8 x i16> %1
7366
}
7467

7568
define <8 x i16> @zero_pmulh_128_commute(<8 x i16> %a0) {
7669
; CHECK-LABEL: @zero_pmulh_128_commute(
77-
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> zeroinitializer, <8 x i16> [[A0:%.*]])
78-
; CHECK-NEXT: ret <8 x i16> [[TMP1]]
70+
; CHECK-NEXT: ret <8 x i16> zeroinitializer
7971
;
8072
%1 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> zeroinitializer, <8 x i16> %a0)
8173
ret <8 x i16> %1
8274
}
8375

8476
define <16 x i16> @zero_pmulh_256(<16 x i16> %a0) {
8577
; CHECK-LABEL: @zero_pmulh_256(
86-
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> [[A0:%.*]], <16 x i16> zeroinitializer)
87-
; CHECK-NEXT: ret <16 x i16> [[TMP1]]
78+
; CHECK-NEXT: ret <16 x i16> zeroinitializer
8879
;
8980
%1 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> zeroinitializer)
9081
ret <16 x i16> %1
9182
}
9283

9384
define <16 x i16> @zero_pmulh_256_commute(<16 x i16> %a0) {
9485
; CHECK-LABEL: @zero_pmulh_256_commute(
95-
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> zeroinitializer, <16 x i16> [[A0:%.*]])
96-
; CHECK-NEXT: ret <16 x i16> [[TMP1]]
86+
; CHECK-NEXT: ret <16 x i16> zeroinitializer
9787
;
9888
%1 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> zeroinitializer, <16 x i16> %a0)
9989
ret <16 x i16> %1
10090
}
10191

10292
define <32 x i16> @zero_pmulh_512(<32 x i16> %a0) {
10393
; CHECK-LABEL: @zero_pmulh_512(
104-
; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> [[A0:%.*]], <32 x i16> zeroinitializer)
105-
; CHECK-NEXT: ret <32 x i16> [[TMP1]]
94+
; CHECK-NEXT: ret <32 x i16> zeroinitializer
10695
;
10796
%1 = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> %a0, <32 x i16> zeroinitializer)
10897
ret <32 x i16> %1
10998
}
11099

111100
define <32 x i16> @zero_pmulh_512_commute(<32 x i16> %a0) {
112101
; CHECK-LABEL: @zero_pmulh_512_commute(
113-
; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> zeroinitializer, <32 x i16> [[A0:%.*]])
114-
; CHECK-NEXT: ret <32 x i16> [[TMP1]]
102+
; CHECK-NEXT: ret <32 x i16> zeroinitializer
115103
;
116104
%1 = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> zeroinitializer, <32 x i16> %a0)
117105
ret <32 x i16> %1

llvm/test/Transforms/InstCombine/X86/x86-pmulhrs.ll

Lines changed: 12 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -7,53 +7,47 @@
77

88
define <8 x i16> @undef_pmulh_128(<8 x i16> %a0) {
99
; CHECK-LABEL: @undef_pmulh_128(
10-
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> [[A0:%.*]], <8 x i16> undef)
11-
; CHECK-NEXT: ret <8 x i16> [[TMP1]]
10+
; CHECK-NEXT: ret <8 x i16> zeroinitializer
1211
;
1312
%1 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> undef)
1413
ret <8 x i16> %1
1514
}
1615

1716
define <8 x i16> @undef_pmulh_128_commute(<8 x i16> %a0) {
1817
; CHECK-LABEL: @undef_pmulh_128_commute(
19-
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> undef, <8 x i16> [[A0:%.*]])
20-
; CHECK-NEXT: ret <8 x i16> [[TMP1]]
18+
; CHECK-NEXT: ret <8 x i16> zeroinitializer
2119
;
2220
%1 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> undef, <8 x i16> %a0)
2321
ret <8 x i16> %1
2422
}
2523

2624
define <16 x i16> @undef_pmulh_256(<16 x i16> %a0) {
2725
; CHECK-LABEL: @undef_pmulh_256(
28-
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> [[A0:%.*]], <16 x i16> undef)
29-
; CHECK-NEXT: ret <16 x i16> [[TMP1]]
26+
; CHECK-NEXT: ret <16 x i16> zeroinitializer
3027
;
3128
%1 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %a0, <16 x i16> undef)
3229
ret <16 x i16> %1
3330
}
3431

3532
define <16 x i16> @undef_pmulh_256_commute(<16 x i16> %a0) {
3633
; CHECK-LABEL: @undef_pmulh_256_commute(
37-
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> undef, <16 x i16> [[A0:%.*]])
38-
; CHECK-NEXT: ret <16 x i16> [[TMP1]]
34+
; CHECK-NEXT: ret <16 x i16> zeroinitializer
3935
;
4036
%1 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> undef, <16 x i16> %a0)
4137
ret <16 x i16> %1
4238
}
4339

4440
define <32 x i16> @undef_pmulh_512(<32 x i16> %a0) {
4541
; CHECK-LABEL: @undef_pmulh_512(
46-
; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> [[A0:%.*]], <32 x i16> undef)
47-
; CHECK-NEXT: ret <32 x i16> [[TMP1]]
42+
; CHECK-NEXT: ret <32 x i16> zeroinitializer
4843
;
4944
%1 = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> %a0, <32 x i16> undef)
5045
ret <32 x i16> %1
5146
}
5247

5348
define <32 x i16> @undef_pmulh_512_commute(<32 x i16> %a0) {
5449
; CHECK-LABEL: @undef_pmulh_512_commute(
55-
; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> undef, <32 x i16> [[A0:%.*]])
56-
; CHECK-NEXT: ret <32 x i16> [[TMP1]]
50+
; CHECK-NEXT: ret <32 x i16> zeroinitializer
5751
;
5852
%1 = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> undef, <32 x i16> %a0)
5953
ret <32 x i16> %1
@@ -65,53 +59,47 @@ define <32 x i16> @undef_pmulh_512_commute(<32 x i16> %a0) {
6559

6660
define <8 x i16> @zero_pmulh_128(<8 x i16> %a0) {
6761
; CHECK-LABEL: @zero_pmulh_128(
68-
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> [[A0:%.*]], <8 x i16> zeroinitializer)
69-
; CHECK-NEXT: ret <8 x i16> [[TMP1]]
62+
; CHECK-NEXT: ret <8 x i16> zeroinitializer
7063
;
7164
%1 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> zeroinitializer)
7265
ret <8 x i16> %1
7366
}
7467

7568
define <8 x i16> @zero_pmulh_128_commute(<8 x i16> %a0) {
7669
; CHECK-LABEL: @zero_pmulh_128_commute(
77-
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> zeroinitializer, <8 x i16> [[A0:%.*]])
78-
; CHECK-NEXT: ret <8 x i16> [[TMP1]]
70+
; CHECK-NEXT: ret <8 x i16> zeroinitializer
7971
;
8072
%1 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> zeroinitializer, <8 x i16> %a0)
8173
ret <8 x i16> %1
8274
}
8375

8476
define <16 x i16> @zero_pmulh_256(<16 x i16> %a0) {
8577
; CHECK-LABEL: @zero_pmulh_256(
86-
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> [[A0:%.*]], <16 x i16> zeroinitializer)
87-
; CHECK-NEXT: ret <16 x i16> [[TMP1]]
78+
; CHECK-NEXT: ret <16 x i16> zeroinitializer
8879
;
8980
%1 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %a0, <16 x i16> zeroinitializer)
9081
ret <16 x i16> %1
9182
}
9283

9384
define <16 x i16> @zero_pmulh_256_commute(<16 x i16> %a0) {
9485
; CHECK-LABEL: @zero_pmulh_256_commute(
95-
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> zeroinitializer, <16 x i16> [[A0:%.*]])
96-
; CHECK-NEXT: ret <16 x i16> [[TMP1]]
86+
; CHECK-NEXT: ret <16 x i16> zeroinitializer
9787
;
9888
%1 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> zeroinitializer, <16 x i16> %a0)
9989
ret <16 x i16> %1
10090
}
10191

10292
define <32 x i16> @zero_pmulh_512(<32 x i16> %a0) {
10393
; CHECK-LABEL: @zero_pmulh_512(
104-
; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> [[A0:%.*]], <32 x i16> zeroinitializer)
105-
; CHECK-NEXT: ret <32 x i16> [[TMP1]]
94+
; CHECK-NEXT: ret <32 x i16> zeroinitializer
10695
;
10796
%1 = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> %a0, <32 x i16> zeroinitializer)
10897
ret <32 x i16> %1
10998
}
11099

111100
define <32 x i16> @zero_pmulh_512_commute(<32 x i16> %a0) {
112101
; CHECK-LABEL: @zero_pmulh_512_commute(
113-
; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> zeroinitializer, <32 x i16> [[A0:%.*]])
114-
; CHECK-NEXT: ret <32 x i16> [[TMP1]]
102+
; CHECK-NEXT: ret <32 x i16> zeroinitializer
115103
;
116104
%1 = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> zeroinitializer, <32 x i16> %a0)
117105
ret <32 x i16> %1

0 commit comments

Comments
 (0)