-
Notifications
You must be signed in to change notification settings - Fork 15k
[RISCV] Match fmaxnum and fminnum to reduction ops. #159244
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This patch tries to match fmaxnum and fminnum to vector reductions.
|
@llvm/pr-subscribers-backend-risc-v Author: Jim Lin (tclin914) ChangesThis patch tries to match fmaxnum and fminnum to vector reductions. Full diff: https://github.com/llvm/llvm-project/pull/159244.diff 2 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 5485b916c2031..c3f39caeb4293 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -15137,6 +15137,10 @@ static unsigned getVecReduceOpcode(unsigned Opc) {
case ISD::FADD:
// Note: This is the associative form of the generic reduction opcode.
return ISD::VECREDUCE_FADD;
+ case ISD::FMAXNUM:
+ return ISD::VECREDUCE_FMAX;
+ case ISD::FMINNUM:
+ return ISD::VECREDUCE_FMIN;
}
}
@@ -15165,13 +15169,22 @@ combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG,
const EVT VT = N->getValueType(0);
const unsigned Opc = N->getOpcode();
- // For FADD, we only handle the case with reassociation allowed. We
- // could handle strict reduction order, but at the moment, there's no
- // known reason to, and the complexity isn't worth it.
- // TODO: Handle fminnum and fmaxnum here
- if (!VT.isInteger() &&
- (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation()))
- return SDValue();
+ if (!VT.isInteger()) {
+ switch (Opc) {
+ default:
+ return SDValue();
+ case ISD::FADD:
+ // For FADD, we only handle the case with reassociation allowed. We
+ // could handle strict reduction order, but at the moment, there's no
+ // known reason to, and the complexity isn't worth it.
+ if (!N->getFlags().hasAllowReassociation())
+ return SDValue();
+ break;
+ case ISD::FMAXNUM:
+ case ISD::FMINNUM:
+ break;
+ }
+ }
const unsigned ReduceOpc = getVecReduceOpcode(Opc);
assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll
index bf8baafc4a25d..754941eb93e01 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll
@@ -904,3 +904,33 @@ define float @reduce_fadd_4xi32_non_associative2(ptr %p) {
%fadd2 = fadd fast float %fadd1, %e3
ret float %fadd2
}
+
+define float @reduce_fmaxnum_16xf32_prefix2(ptr %p) {
+; CHECK-LABEL: reduce_fmaxnum_16xf32_prefix2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vfredmax.vs v8, v8, v8
+; CHECK-NEXT: vfmv.f.s fa0, v8
+; CHECK-NEXT: ret
+ %v = load <16 x float>, ptr %p, align 256
+ %e0 = extractelement <16 x float> %v, i32 0
+ %e1 = extractelement <16 x float> %v, i32 1
+ %fmax0 = call float @llvm.maxnum.f32(float %e0, float %e1)
+ ret float %fmax0
+}
+
+define float @reduce_fminnum_16xf32_prefix2(ptr %p) {
+; CHECK-LABEL: reduce_fminnum_16xf32_prefix2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vfredmin.vs v8, v8, v8
+; CHECK-NEXT: vfmv.f.s fa0, v8
+; CHECK-NEXT: ret
+ %v = load <16 x float>, ptr %p, align 256
+ %e0 = extractelement <16 x float> %v, i32 0
+ %e1 = extractelement <16 x float> %v, i32 1
+ %fmax0 = call float @llvm.minnum.f32(float %e0, float %e1)
+ ret float %fmax0
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This patch tries to match fmaxnum and fminnum to vector reductions.