Skip to content

Commit 6338a61

Browse files
committed
Implement SplitOpsAndApply
1 parent 8042037 commit 6338a61

File tree

2 files changed

+26
-3
lines changed

2 files changed

+26
-3
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4462,6 +4462,7 @@ SDValue SplitOpsAndApply(SelectionDAG &DAG, const X86Subtarget &Subtarget,
44624462
unsigned NumSubs = 1;
44634463
if ((CheckBWI && Subtarget.useBWIRegs()) ||
44644464
(!CheckBWI && Subtarget.useAVX512Regs())) {
4465+
// if (0) {
44654466
if (VT.getSizeInBits() > 512) {
44664467
NumSubs = VT.getSizeInBits() / 512;
44674468
assert((VT.getSizeInBits() % 512) == 0 && "Illegal vector size");
@@ -57967,6 +57968,8 @@ static SDValue pushAddIntoCmovOfConsts(SDNode *N, const SDLoc &DL,
5796757968
Cmov.getOperand(3));
5796857969
}
5796957970

57971+
// Attempt to turn ADD(MUL(x, y), acc)) -> VPMADD52L
57972+
// When upper 12 bits of x, y and MUL(x, y) are known to be 0
5797057973
static SDValue matchVPMADD52(SDNode *N, SelectionDAG &DAG, const SDLoc &DL,
5797157974
EVT VT, const X86Subtarget &Subtarget) {
5797257975
using namespace SDPatternMatch;
@@ -57990,7 +57993,16 @@ static SDValue matchVPMADD52(SDNode *N, SelectionDAG &DAG, const SDLoc &DL,
5799057993
KnownMul.countMinLeadingZeros() < 12)
5799157994
return SDValue();
5799257995

57993-
return DAG.getNode(X86ISD::VPMADD52L, DL, VT, Acc, X, Y);
57996+
auto VPMADD52Builder = [](SelectionDAG &G, SDLoc DL,
57997+
ArrayRef<SDValue> SubOps) {
57998+
EVT SubVT = SubOps[0].getValueType();
57999+
assert(SubVT.getScalarSizeInBits() == 64);
58000+
return G.getNode(X86ISD::VPMADD52L, DL, SubVT, SubOps[0] /*Acc*/,
58001+
SubOps[1] /*X*/, SubOps[2] /*Y*/);
58002+
};
58003+
58004+
return SplitOpsAndApply(DAG, Subtarget, DL, VT, {Acc, X, Y}, VPMADD52Builder,
58005+
/*CheckBWI*/ false);
5799458006
}
5799558007

5799658008
static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,

llvm/test/CodeGen/X86/ifma-combine-vpmadd52.ll

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
21
; RUN: llc < %s -O1 -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64
32

43
; 67108863 == (1 << 26) - 1
@@ -200,7 +199,19 @@ entry:
200199
ret <4 x i64> %res
201200
}
202201

202+
define <16 x i64> @v16_test_split(<16 x i64> %a, <16 x i64> %b, <16 x i64> %acc) #1 {
203+
; X64-LABEL: v16_test_split:
204+
; X64: vpmadd52luq
205+
; X64: vpmadd52luq
206+
; X64: ret
207+
entry:
208+
%a26 = and <16 x i64> %a, splat (i64 67108863)
209+
%b26 = and <16 x i64> %b, splat (i64 67108863)
210+
%mul = mul <16 x i64> %a26, %b26
211+
%res = add <16 x i64> %acc, %mul
212+
ret <16 x i64> %res
213+
}
214+
203215
attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable "target-features"="+avx,+avx2,+avx512dq,+avx512f,+avx512ifma,-avx512vl,+cmov,+crc32,+evex512,+cx8,+f16c,+fma,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "tune-cpu"="generic" }
204216
attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable "target-features"="+avx,+avx2,+avx512dq,+avx512f,+avx512ifma,+avx512vl,+cmov,+crc32,+cx8,+f16c,+fma,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "tune-cpu"="generic" }
205217
attributes #2 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable "target-features"="+avx,+avx2,+avx512dq,+avx512f,+avx512vl,+avxifma,+cmov,+crc32,+cx8,+f16c,+fma,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "tune-cpu"="generic" }
206-
attributes #3 = { "target-features"="+avx512dq,+avx512f,+avx512ifma,+avx512vl,-evex512" }

0 commit comments

Comments
 (0)