-
Notifications
You must be signed in to change notification settings - Fork 15.4k
release/20.x: [HEXAGON] Fix semantics of ordered FP compares (#131089) #131270
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@iajbar What do you think about merging this PR to the release branch? |
|
@llvm/pr-subscribers-backend-hexagon Author: None (llvmbot) ChangesBackport d642eec Requested by: @androm3da Full diff: https://github.com/llvm/llvm-project/pull/131270.diff 2 Files Affected:
diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td
index cba5ff1ab0d9b..244f204539c89 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatterns.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td
@@ -721,11 +721,6 @@ def: OpR_RR_pat<A2_vcmpwgtu, setugt, v2i1, V2I32>;
def: OpR_RR_pat<F2_sfcmpeq, seteq, i1, F32>;
def: OpR_RR_pat<F2_sfcmpgt, setgt, i1, F32>;
def: OpR_RR_pat<F2_sfcmpge, setge, i1, F32>;
-def: OpR_RR_pat<F2_sfcmpeq, setoeq, i1, F32>;
-def: OpR_RR_pat<F2_sfcmpgt, setogt, i1, F32>;
-def: OpR_RR_pat<F2_sfcmpge, setoge, i1, F32>;
-def: OpR_RR_pat<F2_sfcmpgt, RevCmp<setolt>, i1, F32>;
-def: OpR_RR_pat<F2_sfcmpge, RevCmp<setole>, i1, F32>;
def: OpR_RR_pat<F2_sfcmpgt, RevCmp<setlt>, i1, F32>;
def: OpR_RR_pat<F2_sfcmpge, RevCmp<setle>, i1, F32>;
def: OpR_RR_pat<F2_sfcmpuo, setuo, i1, F32>;
@@ -733,11 +728,6 @@ def: OpR_RR_pat<F2_sfcmpuo, setuo, i1, F32>;
def: OpR_RR_pat<F2_dfcmpeq, seteq, i1, F64>;
def: OpR_RR_pat<F2_dfcmpgt, setgt, i1, F64>;
def: OpR_RR_pat<F2_dfcmpge, setge, i1, F64>;
-def: OpR_RR_pat<F2_dfcmpeq, setoeq, i1, F64>;
-def: OpR_RR_pat<F2_dfcmpgt, setogt, i1, F64>;
-def: OpR_RR_pat<F2_dfcmpge, setoge, i1, F64>;
-def: OpR_RR_pat<F2_dfcmpgt, RevCmp<setolt>, i1, F64>;
-def: OpR_RR_pat<F2_dfcmpge, RevCmp<setole>, i1, F64>;
def: OpR_RR_pat<F2_dfcmpgt, RevCmp<setlt>, i1, F64>;
def: OpR_RR_pat<F2_dfcmpge, RevCmp<setle>, i1, F64>;
def: OpR_RR_pat<F2_dfcmpuo, setuo, i1, F64>;
@@ -900,15 +890,35 @@ def: OpmR_RR_pat<Cmpud<F2_dfcmpge>, RevCmp<setule>, i1, F64>;
def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>, RevCmp<setult>, i1, F64>;
def: OpmR_RR_pat<Cmpudn<F2_dfcmpeq>, setune, i1, F64>;
-def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setone, i1, F32>;
-def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setne, i1, F32>;
+class T4<InstHexagon MI1, InstHexagon MI2, InstHexagon MI3, InstHexagon MI4>
+ : OutPatFrag<(ops node:$Rs, node:$Rt),
+ (MI1 (MI2 (MI3 $Rs, $Rt), (MI4 $Rs, $Rt)))>;
-def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setone, i1, F64>;
-def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setne, i1, F64>;
+class Cmpof<InstHexagon MI>: T3<C2_andn, MI, F2_sfcmpuo>;
+class Cmpod<InstHexagon MI>: T3<C2_andn, MI, F2_dfcmpuo>;
+
+class Cmpofn<InstHexagon MI>: T4<C2_not, C2_or, MI, F2_sfcmpuo>;
+class Cmpodn<InstHexagon MI>: T4<C2_not, C2_or, MI, F2_dfcmpuo>;
+
+def: OpmR_RR_pat<Cmpof<F2_sfcmpeq>, setoeq, i1, F32>;
+def: OpmR_RR_pat<Cmpof<F2_sfcmpge>, setoge, i1, F32>;
+def: OpmR_RR_pat<Cmpof<F2_sfcmpgt>, setogt, i1, F32>;
+def: OpmR_RR_pat<Cmpof<F2_sfcmpge>, RevCmp<setole>, i1, F32>;
+def: OpmR_RR_pat<Cmpof<F2_sfcmpgt>, RevCmp<setolt>, i1, F32>;
+def: OpmR_RR_pat<Cmpofn<F2_sfcmpeq>, setone, i1, F32>;
+
+def: OpmR_RR_pat<Cmpod<F2_dfcmpeq>, setoeq, i1, F64>;
+def: OpmR_RR_pat<Cmpod<F2_dfcmpge>, setoge, i1, F64>;
+def: OpmR_RR_pat<Cmpod<F2_dfcmpgt>, setogt, i1, F64>;
+def: OpmR_RR_pat<Cmpod<F2_dfcmpge>, RevCmp<setole>, i1, F64>;
+def: OpmR_RR_pat<Cmpod<F2_dfcmpgt>, RevCmp<setolt>, i1, F64>;
+def: OpmR_RR_pat<Cmpodn<F2_dfcmpeq>, setone, i1, F64>;
def: OpmR_RR_pat<Outn<F2_sfcmpuo>, seto, i1, F32>;
def: OpmR_RR_pat<Outn<F2_dfcmpuo>, seto, i1, F64>;
+def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setne, i1, F32>;
+def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setne, i1, F64>;
// --(6) Select ----------------------------------------------------------
//
diff --git a/llvm/test/CodeGen/Hexagon/fcmp-nan.ll b/llvm/test/CodeGen/Hexagon/fcmp-nan.ll
new file mode 100644
index 0000000000000..1469402911601
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/fcmp-nan.ll
@@ -0,0 +1,189 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+;
+; Test that all FP ordered compare instructions generate the correct
+; post-processing to accommodate NaNs.
+;
+; Specifically for ordered FP compares, we have to check if one of
+; the operands was a NaN to comform to the semantics of the ordered
+; fcmp bitcode instruction
+;
+target triple = "hexagon"
+
+;
+; Functions for float:
+;
+
+;
+; CHECK-DAG: [[REG0:p([0-3])]] = sfcmp.eq(r0,r1)
+; CHECK-DAG: [[REG1:p([0-3])]] = sfcmp.uo(r0,r1)
+; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]])
+; CHECK: r0 = mux([[REG2]],#1,#0)
+;
+define i32 @compare_oeq_f(float %val, float %val2) local_unnamed_addr #0 {
+entry:
+ %cmpinf = fcmp oeq float %val, %val2
+ %0 = zext i1 %cmpinf to i32
+ ret i32 %0
+}
+
+
+;
+; CHECK-DAG: [[REG0:p([0-3])]] = sfcmp.eq(r0,r1)
+; CHECK-DAG: [[REG1:p([0-3])]] = sfcmp.uo(r0,r1)
+; CHECK: [[REG2:p([0-3])]] = or([[REG0]],[[REG1]])
+; CHECK: r0 = mux([[REG2]],#0,#1)
+;
+define i32 @compare_one_f(float %val, float %val2) local_unnamed_addr #0 {
+entry:
+ %cmpinf = fcmp one float %val, %val2
+ %0 = zext i1 %cmpinf to i32
+ ret i32 %0
+}
+
+
+;
+; CHECK-DAG: [[REG0:p([0-3])]] = sfcmp.gt(r0,r1)
+; CHECK-DAG: [[REG1:p([0-3])]] = sfcmp.uo(r0,r1)
+; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]])
+; CHECK: r0 = mux([[REG2]],#1,#0)
+;
+define i32 @compare_ogt_f(float %val, float %val2) local_unnamed_addr #0 {
+entry:
+ %cmpinf = fcmp ogt float %val, %val2
+ %0 = zext i1 %cmpinf to i32
+ ret i32 %0
+}
+
+
+;
+; CHECK-DAG: [[REG0:p([0-3])]] = sfcmp.ge(r1,r0)
+; CHECK-DAG: [[REG1:p([0-3])]] = sfcmp.uo(r1,r0)
+; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]])
+; CHECK: r0 = mux([[REG2]],#1,#0)
+;
+define i32 @compare_ole_f(float %val, float %val2) local_unnamed_addr #0 {
+entry:
+ %cmpinf = fcmp ole float %val, %val2
+ %0 = zext i1 %cmpinf to i32
+ ret i32 %0
+}
+
+
+
+;
+; CHECK-DAG: [[REG0:p([0-3])]] = sfcmp.ge(r0,r1)
+; CHECK-DAG: [[REG1:p([0-3])]] = sfcmp.uo(r0,r1)
+; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]])
+; CHECK: r0 = mux([[REG2]],#1,#0)
+;
+define i32 @compare_oge_f(float %val, float %val2) local_unnamed_addr #0 {
+entry:
+ %cmpinf = fcmp oge float %val, %val2
+ %0 = zext i1 %cmpinf to i32
+ ret i32 %0
+}
+
+
+;
+; CHECK-DAG: [[REG0:p([0-3])]] = sfcmp.gt(r1,r0)
+; CHECK-DAG: [[REG1:p([0-3])]] = sfcmp.uo(r1,r0)
+; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]])
+; CHECK: r0 = mux([[REG2]],#1,#0)
+;
+define i32 @compare_olt_f(float %val, float %val2) local_unnamed_addr #0 {
+entry:
+ %cmpinf = fcmp olt float %val, %val2
+ %0 = zext i1 %cmpinf to i32
+ ret i32 %0
+}
+
+
+
+;
+; Functions for double:
+;
+
+;
+; CHECK-DAG: [[REG0:p([0-3])]] = dfcmp.eq(r1:0,r3:2)
+; CHECK-DAG: [[REG1:p([0-3])]] = dfcmp.uo(r1:0,r3:2)
+; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]])
+; CHECK: r0 = mux([[REG2]],#1,#0)
+;
+define i32 @compare_oeq_d(double %val, double %val2) local_unnamed_addr #0 {
+entry:
+ %cmpinf = fcmp oeq double %val, %val2
+ %0 = zext i1 %cmpinf to i32
+ ret i32 %0
+}
+
+
+;
+; CHECK-DAG: [[REG0:p([0-3])]] = dfcmp.eq(r1:0,r3:2)
+; CHECK-DAG: [[REG1:p([0-3])]] = dfcmp.uo(r1:0,r3:2)
+; CHECK: [[REG2:p([0-3])]] = or([[REG0]],[[REG1]])
+; CHECK: r0 = mux([[REG2]],#0,#1)
+;
+define i32 @compare_one_d(double %val, double %val2) local_unnamed_addr #0 {
+entry:
+ %cmpinf = fcmp one double %val, %val2
+ %0 = zext i1 %cmpinf to i32
+ ret i32 %0
+}
+
+
+
+;
+; CHECK-DAG: [[REG0:p([0-3])]] = dfcmp.gt(r1:0,r3:2)
+; CHECK-DAG: [[REG1:p([0-3])]] = dfcmp.uo(r1:0,r3:2)
+; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]])
+; CHECK: r0 = mux([[REG2]],#1,#0)
+;
+define i32 @compare_ogt_d(double %val, double %val2) local_unnamed_addr #0 {
+entry:
+ %cmpinf = fcmp ogt double %val, %val2
+ %0 = zext i1 %cmpinf to i32
+ ret i32 %0
+}
+
+
+;
+; CHECK-DAG: [[REG0:p([0-3])]] = dfcmp.ge(r3:2,r1:0)
+; CHECK-DAG: [[REG1:p([0-3])]] = dfcmp.uo(r3:2,r1:0)
+; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]])
+; CHECK: r0 = mux([[REG2]],#1,#0)
+;
+define i32 @compare_ole_d(double %val, double %val2) local_unnamed_addr #0 {
+entry:
+ %cmpinf = fcmp ole double %val, %val2
+ %0 = zext i1 %cmpinf to i32
+ ret i32 %0
+}
+
+
+;
+; CHECK-DAG: [[REG0:p([0-3])]] = dfcmp.ge(r1:0,r3:2)
+; CHECK-DAG: [[REG1:p([0-3])]] = dfcmp.uo(r1:0,r3:2)
+; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]])
+; CHECK: r0 = mux([[REG2]],#1,#0)
+;
+define i32 @compare_oge_d(double %val, double %val2) local_unnamed_addr #0 {
+entry:
+ %cmpinf = fcmp oge double %val, %val2
+ %0 = zext i1 %cmpinf to i32
+ ret i32 %0
+}
+
+
+;
+; CHECK-DAG: [[REG0:p([0-3])]] = dfcmp.gt(r3:2,r1:0)
+; CHECK-DAG: [[REG1:p([0-3])]] = dfcmp.uo(r3:2,r1:0)
+; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]])
+; CHECK: r0 = mux([[REG2]],#1,#0)
+;
+define i32 @compare_olt_d(double %val, double %val2) local_unnamed_addr #0 {
+entry:
+ %cmpinf = fcmp olt double %val, %val2
+ %0 = zext i1 %cmpinf to i32
+ ret i32 %0
+}
+
|
|
@iajbar can you review the cherry-pick of this floating point comparison fix? |
For the ordered FP compare bitcode instructions, the Hexagon backend was assuming that no operand could be a NaN. This assumption is flawed. This patch fixes the code-generation to produce fpcmp.uo and and appropriate bit comparison operators to account for the case when an operand to a FP compare is a NaN. Fix for llvm#129391 Co-authored-by: aankit-quic <[email protected]> (cherry picked from commit d642eec)
|
@androm3da (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. |
Backport d642eec
Requested by: @androm3da