Skip to content

Conversation

@llvmbot
Copy link
Member

@llvmbot llvmbot commented Mar 14, 2025

Backport d642eec

Requested by: @androm3da

@llvmbot
Copy link
Member Author

llvmbot commented Mar 14, 2025

@iajbar What do you think about merging this PR to the release branch?

@llvmbot
Copy link
Member Author

llvmbot commented Mar 14, 2025

@llvm/pr-subscribers-backend-hexagon

Author: None (llvmbot)

Changes

Backport d642eec

Requested by: @androm3da


Full diff: https://github.com/llvm/llvm-project/pull/131270.diff

2 Files Affected:

  • (modified) llvm/lib/Target/Hexagon/HexagonPatterns.td (+24-14)
  • (added) llvm/test/CodeGen/Hexagon/fcmp-nan.ll (+189)
diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td
index cba5ff1ab0d9b..244f204539c89 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatterns.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td
@@ -721,11 +721,6 @@ def: OpR_RR_pat<A2_vcmpwgtu,  setugt,         v2i1, V2I32>;
 def: OpR_RR_pat<F2_sfcmpeq,   seteq,          i1, F32>;
 def: OpR_RR_pat<F2_sfcmpgt,   setgt,          i1, F32>;
 def: OpR_RR_pat<F2_sfcmpge,   setge,          i1, F32>;
-def: OpR_RR_pat<F2_sfcmpeq,   setoeq,         i1, F32>;
-def: OpR_RR_pat<F2_sfcmpgt,   setogt,         i1, F32>;
-def: OpR_RR_pat<F2_sfcmpge,   setoge,         i1, F32>;
-def: OpR_RR_pat<F2_sfcmpgt,   RevCmp<setolt>, i1, F32>;
-def: OpR_RR_pat<F2_sfcmpge,   RevCmp<setole>, i1, F32>;
 def: OpR_RR_pat<F2_sfcmpgt,   RevCmp<setlt>,  i1, F32>;
 def: OpR_RR_pat<F2_sfcmpge,   RevCmp<setle>,  i1, F32>;
 def: OpR_RR_pat<F2_sfcmpuo,   setuo,          i1, F32>;
@@ -733,11 +728,6 @@ def: OpR_RR_pat<F2_sfcmpuo,   setuo,          i1, F32>;
 def: OpR_RR_pat<F2_dfcmpeq,   seteq,          i1, F64>;
 def: OpR_RR_pat<F2_dfcmpgt,   setgt,          i1, F64>;
 def: OpR_RR_pat<F2_dfcmpge,   setge,          i1, F64>;
-def: OpR_RR_pat<F2_dfcmpeq,   setoeq,         i1, F64>;
-def: OpR_RR_pat<F2_dfcmpgt,   setogt,         i1, F64>;
-def: OpR_RR_pat<F2_dfcmpge,   setoge,         i1, F64>;
-def: OpR_RR_pat<F2_dfcmpgt,   RevCmp<setolt>, i1, F64>;
-def: OpR_RR_pat<F2_dfcmpge,   RevCmp<setole>, i1, F64>;
 def: OpR_RR_pat<F2_dfcmpgt,   RevCmp<setlt>,  i1, F64>;
 def: OpR_RR_pat<F2_dfcmpge,   RevCmp<setle>,  i1, F64>;
 def: OpR_RR_pat<F2_dfcmpuo,   setuo,          i1, F64>;
@@ -900,15 +890,35 @@ def: OpmR_RR_pat<Cmpud<F2_dfcmpge>,  RevCmp<setule>, i1, F64>;
 def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>,  RevCmp<setult>, i1, F64>;
 def: OpmR_RR_pat<Cmpudn<F2_dfcmpeq>, setune,         i1, F64>;
 
-def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setone, i1, F32>;
-def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setne,  i1, F32>;
+class T4<InstHexagon MI1, InstHexagon MI2, InstHexagon MI3, InstHexagon MI4>
+  : OutPatFrag<(ops node:$Rs, node:$Rt),
+               (MI1 (MI2 (MI3 $Rs, $Rt), (MI4 $Rs, $Rt)))>;
 
-def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setone, i1, F64>;
-def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setne,  i1, F64>;
+class Cmpof<InstHexagon MI>: T3<C2_andn, MI,  F2_sfcmpuo>;
+class Cmpod<InstHexagon MI>: T3<C2_andn, MI,  F2_dfcmpuo>;
+
+class Cmpofn<InstHexagon MI>: T4<C2_not,  C2_or, MI,  F2_sfcmpuo>;
+class Cmpodn<InstHexagon MI>: T4<C2_not,  C2_or, MI,  F2_dfcmpuo>;
+
+def: OpmR_RR_pat<Cmpof<F2_sfcmpeq>,  setoeq,         i1, F32>;
+def: OpmR_RR_pat<Cmpof<F2_sfcmpge>,  setoge,         i1, F32>;
+def: OpmR_RR_pat<Cmpof<F2_sfcmpgt>,  setogt,         i1, F32>;
+def: OpmR_RR_pat<Cmpof<F2_sfcmpge>,  RevCmp<setole>, i1, F32>;
+def: OpmR_RR_pat<Cmpof<F2_sfcmpgt>,  RevCmp<setolt>, i1, F32>;
+def: OpmR_RR_pat<Cmpofn<F2_sfcmpeq>, setone,         i1, F32>;
+
+def: OpmR_RR_pat<Cmpod<F2_dfcmpeq>,  setoeq,         i1, F64>;
+def: OpmR_RR_pat<Cmpod<F2_dfcmpge>,  setoge,         i1, F64>;
+def: OpmR_RR_pat<Cmpod<F2_dfcmpgt>,  setogt,         i1, F64>;
+def: OpmR_RR_pat<Cmpod<F2_dfcmpge>,  RevCmp<setole>, i1, F64>;
+def: OpmR_RR_pat<Cmpod<F2_dfcmpgt>,  RevCmp<setolt>, i1, F64>;
+def: OpmR_RR_pat<Cmpodn<F2_dfcmpeq>, setone,         i1, F64>;
 
 def: OpmR_RR_pat<Outn<F2_sfcmpuo>, seto,   i1, F32>;
 def: OpmR_RR_pat<Outn<F2_dfcmpuo>, seto,   i1, F64>;
 
+def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setne,  i1, F32>;
+def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setne,  i1, F64>;
 
 // --(6) Select ----------------------------------------------------------
 //
diff --git a/llvm/test/CodeGen/Hexagon/fcmp-nan.ll b/llvm/test/CodeGen/Hexagon/fcmp-nan.ll
new file mode 100644
index 0000000000000..1469402911601
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/fcmp-nan.ll
@@ -0,0 +1,189 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+;
+; Test that all FP ordered compare instructions generate the correct
+; post-processing to accommodate NaNs.
+;
+; Specifically for ordered FP compares, we have to check if one of
+; the operands was a NaN to comform to the semantics of the ordered
+; fcmp bitcode instruction
+;
+target triple = "hexagon"
+
+;
+; Functions for float:
+;
+
+;
+; CHECK-DAG: [[REG0:p([0-3])]] = sfcmp.eq(r0,r1)
+; CHECK-DAG: [[REG1:p([0-3])]] = sfcmp.uo(r0,r1)
+; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]])
+; CHECK: r0 = mux([[REG2]],#1,#0)
+;
+define i32 @compare_oeq_f(float %val, float %val2) local_unnamed_addr #0 {
+entry:
+  %cmpinf = fcmp oeq float %val, %val2
+  %0 = zext i1 %cmpinf to i32
+  ret i32 %0
+}
+
+
+;
+; CHECK-DAG: [[REG0:p([0-3])]] = sfcmp.eq(r0,r1)
+; CHECK-DAG: [[REG1:p([0-3])]] = sfcmp.uo(r0,r1)
+; CHECK: [[REG2:p([0-3])]] = or([[REG0]],[[REG1]])
+; CHECK: r0 = mux([[REG2]],#0,#1)
+;
+define i32 @compare_one_f(float %val, float %val2) local_unnamed_addr #0 {
+entry:
+  %cmpinf = fcmp one float %val, %val2
+  %0 = zext i1 %cmpinf to i32
+  ret i32 %0
+}
+
+
+;
+; CHECK-DAG: [[REG0:p([0-3])]] = sfcmp.gt(r0,r1)
+; CHECK-DAG: [[REG1:p([0-3])]] = sfcmp.uo(r0,r1)
+; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]])
+; CHECK: r0 = mux([[REG2]],#1,#0)
+;
+define i32 @compare_ogt_f(float %val, float %val2) local_unnamed_addr #0 {
+entry:
+  %cmpinf = fcmp ogt float %val, %val2
+  %0 = zext i1 %cmpinf to i32
+  ret i32 %0
+}
+
+
+;
+; CHECK-DAG: [[REG0:p([0-3])]] = sfcmp.ge(r1,r0)
+; CHECK-DAG: [[REG1:p([0-3])]] = sfcmp.uo(r1,r0)
+; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]])
+; CHECK: r0 = mux([[REG2]],#1,#0)
+;
+define i32 @compare_ole_f(float %val, float %val2) local_unnamed_addr #0 {
+entry:
+  %cmpinf = fcmp ole float %val, %val2
+  %0 = zext i1 %cmpinf to i32
+  ret i32 %0
+}
+
+
+
+;
+; CHECK-DAG: [[REG0:p([0-3])]] = sfcmp.ge(r0,r1)
+; CHECK-DAG: [[REG1:p([0-3])]] = sfcmp.uo(r0,r1)
+; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]])
+; CHECK: r0 = mux([[REG2]],#1,#0)
+;
+define i32 @compare_oge_f(float %val, float %val2) local_unnamed_addr #0 {
+entry:
+  %cmpinf = fcmp oge float %val, %val2
+  %0 = zext i1 %cmpinf to i32
+  ret i32 %0
+}
+
+
+;
+; CHECK-DAG: [[REG0:p([0-3])]] = sfcmp.gt(r1,r0)
+; CHECK-DAG: [[REG1:p([0-3])]] = sfcmp.uo(r1,r0)
+; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]])
+; CHECK: r0 = mux([[REG2]],#1,#0)
+;
+define i32 @compare_olt_f(float %val, float %val2) local_unnamed_addr #0 {
+entry:
+  %cmpinf = fcmp olt float %val, %val2
+  %0 = zext i1 %cmpinf to i32
+  ret i32 %0
+}
+
+
+
+;
+; Functions for double:
+;
+
+;
+; CHECK-DAG: [[REG0:p([0-3])]] = dfcmp.eq(r1:0,r3:2)
+; CHECK-DAG: [[REG1:p([0-3])]] = dfcmp.uo(r1:0,r3:2)
+; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]])
+; CHECK: r0 = mux([[REG2]],#1,#0)
+;
+define i32 @compare_oeq_d(double %val, double %val2) local_unnamed_addr #0 {
+entry:
+  %cmpinf = fcmp oeq double %val, %val2
+  %0 = zext i1 %cmpinf to i32
+  ret i32 %0
+}
+
+
+;
+; CHECK-DAG: [[REG0:p([0-3])]] = dfcmp.eq(r1:0,r3:2)
+; CHECK-DAG: [[REG1:p([0-3])]] = dfcmp.uo(r1:0,r3:2)
+; CHECK: [[REG2:p([0-3])]] = or([[REG0]],[[REG1]])
+; CHECK: r0 = mux([[REG2]],#0,#1)
+;
+define i32 @compare_one_d(double %val, double %val2) local_unnamed_addr #0 {
+entry:
+  %cmpinf = fcmp one double %val, %val2
+  %0 = zext i1 %cmpinf to i32
+  ret i32 %0
+}
+
+
+
+;
+; CHECK-DAG: [[REG0:p([0-3])]] = dfcmp.gt(r1:0,r3:2)
+; CHECK-DAG: [[REG1:p([0-3])]] = dfcmp.uo(r1:0,r3:2)
+; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]])
+; CHECK: r0 = mux([[REG2]],#1,#0)
+;
+define i32 @compare_ogt_d(double %val, double %val2) local_unnamed_addr #0 {
+entry:
+  %cmpinf = fcmp ogt double %val, %val2
+  %0 = zext i1 %cmpinf to i32
+  ret i32 %0
+}
+
+
+;
+; CHECK-DAG: [[REG0:p([0-3])]] = dfcmp.ge(r3:2,r1:0)
+; CHECK-DAG: [[REG1:p([0-3])]] = dfcmp.uo(r3:2,r1:0)
+; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]])
+; CHECK: r0 = mux([[REG2]],#1,#0)
+;
+define i32 @compare_ole_d(double %val, double %val2) local_unnamed_addr #0 {
+entry:
+  %cmpinf = fcmp ole double %val, %val2
+  %0 = zext i1 %cmpinf to i32
+  ret i32 %0
+}
+
+
+;
+; CHECK-DAG: [[REG0:p([0-3])]] = dfcmp.ge(r1:0,r3:2)
+; CHECK-DAG: [[REG1:p([0-3])]] = dfcmp.uo(r1:0,r3:2)
+; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]])
+; CHECK: r0 = mux([[REG2]],#1,#0)
+;
+define i32 @compare_oge_d(double %val, double %val2) local_unnamed_addr #0 {
+entry:
+  %cmpinf = fcmp oge double %val, %val2
+  %0 = zext i1 %cmpinf to i32
+  ret i32 %0
+}
+
+
+;
+; CHECK-DAG: [[REG0:p([0-3])]] = dfcmp.gt(r3:2,r1:0)
+; CHECK-DAG: [[REG1:p([0-3])]] = dfcmp.uo(r3:2,r1:0)
+; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]])
+; CHECK: r0 = mux([[REG2]],#1,#0)
+;
+define i32 @compare_olt_d(double %val, double %val2) local_unnamed_addr #0 {
+entry:
+  %cmpinf = fcmp olt double %val, %val2
+  %0 = zext i1 %cmpinf to i32
+  ret i32 %0
+}
+

@tstellar tstellar moved this from Needs Triage to Needs Review in LLVM Release Status Mar 17, 2025
@androm3da
Copy link
Member

@iajbar can you review the cherry-pick of this floating point comparison fix?

@github-project-automation github-project-automation bot moved this from Needs Review to Needs Merge in LLVM Release Status Mar 27, 2025
For the ordered FP compare bitcode instructions, the Hexagon backend was
assuming that no operand could be a NaN. This assumption is flawed. This
patch fixes the code-generation to produce fpcmp.uo and and appropriate
bit comparison operators to account for the case when an operand to a FP
compare is a NaN.

Fix for llvm#129391

Co-authored-by: aankit-quic <[email protected]>
(cherry picked from commit d642eec)
@tstellar tstellar merged commit d60baf3 into llvm:release/20.x Mar 27, 2025
7 of 9 checks passed
@github-project-automation github-project-automation bot moved this from Needs Merge to Done in LLVM Release Status Mar 27, 2025
@github-actions
Copy link

@androm3da (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

Development

Successfully merging this pull request may close these issues.

4 participants