From d60baf3d47863083079e840efc8912d37399076f Mon Sep 17 00:00:00 2001 From: aankit-ca Date: Thu, 13 Mar 2025 12:48:31 -0700 Subject: [PATCH] [HEXAGON] Fix semantics of ordered FP compares (#131089) For the ordered FP compare bitcode instructions, the Hexagon backend was assuming that no operand could be a NaN. This assumption is flawed. This patch fixes the code-generation to produce fpcmp.uo and and appropriate bit comparison operators to account for the case when an operand to a FP compare is a NaN. Fix for https://github.com/llvm/llvm-project/issues/129391 Co-authored-by: aankit-quic (cherry picked from commit d642eec78fc94ef3c5266dc0b10b8c51ea046e7a) --- llvm/lib/Target/Hexagon/HexagonPatterns.td | 38 +++-- llvm/test/CodeGen/Hexagon/fcmp-nan.ll | 189 +++++++++++++++++++++ 2 files changed, 213 insertions(+), 14 deletions(-) create mode 100644 llvm/test/CodeGen/Hexagon/fcmp-nan.ll diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td index cba5ff1ab0d9b..244f204539c89 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatterns.td +++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td @@ -721,11 +721,6 @@ def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; -def: OpR_RR_pat; -def: OpR_RR_pat; -def: OpR_RR_pat; -def: OpR_RR_pat, i1, F32>; -def: OpR_RR_pat, i1, F32>; def: OpR_RR_pat, i1, F32>; def: OpR_RR_pat, i1, F32>; def: OpR_RR_pat; @@ -733,11 +728,6 @@ def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; def: OpR_RR_pat; -def: OpR_RR_pat; -def: OpR_RR_pat; -def: OpR_RR_pat; -def: OpR_RR_pat, i1, F64>; -def: OpR_RR_pat, i1, F64>; def: OpR_RR_pat, i1, F64>; def: OpR_RR_pat, i1, F64>; def: OpR_RR_pat; @@ -900,15 +890,35 @@ def: OpmR_RR_pat, RevCmp, i1, F64>; def: OpmR_RR_pat, RevCmp, i1, F64>; def: OpmR_RR_pat, setune, i1, F64>; -def: OpmR_RR_pat, setone, i1, F32>; -def: OpmR_RR_pat, setne, i1, F32>; +class T4 + : OutPatFrag<(ops node:$Rs, node:$Rt), + (MI1 (MI2 (MI3 $Rs, $Rt), (MI4 $Rs, $Rt)))>; -def: OpmR_RR_pat, setone, i1, F64>; -def: OpmR_RR_pat, setne, i1, F64>; +class Cmpof: T3; +class Cmpod: T3; + +class Cmpofn: T4; +class Cmpodn: T4; + +def: OpmR_RR_pat, setoeq, i1, F32>; +def: OpmR_RR_pat, setoge, i1, F32>; +def: OpmR_RR_pat, setogt, i1, F32>; +def: OpmR_RR_pat, RevCmp, i1, F32>; +def: OpmR_RR_pat, RevCmp, i1, F32>; +def: OpmR_RR_pat, setone, i1, F32>; + +def: OpmR_RR_pat, setoeq, i1, F64>; +def: OpmR_RR_pat, setoge, i1, F64>; +def: OpmR_RR_pat, setogt, i1, F64>; +def: OpmR_RR_pat, RevCmp, i1, F64>; +def: OpmR_RR_pat, RevCmp, i1, F64>; +def: OpmR_RR_pat, setone, i1, F64>; def: OpmR_RR_pat, seto, i1, F32>; def: OpmR_RR_pat, seto, i1, F64>; +def: OpmR_RR_pat, setne, i1, F32>; +def: OpmR_RR_pat, setne, i1, F64>; // --(6) Select ---------------------------------------------------------- // diff --git a/llvm/test/CodeGen/Hexagon/fcmp-nan.ll b/llvm/test/CodeGen/Hexagon/fcmp-nan.ll new file mode 100644 index 0000000000000..1469402911601 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/fcmp-nan.ll @@ -0,0 +1,189 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; +; Test that all FP ordered compare instructions generate the correct +; post-processing to accommodate NaNs. +; +; Specifically for ordered FP compares, we have to check if one of +; the operands was a NaN to comform to the semantics of the ordered +; fcmp bitcode instruction +; +target triple = "hexagon" + +; +; Functions for float: +; + +; +; CHECK-DAG: [[REG0:p([0-3])]] = sfcmp.eq(r0,r1) +; CHECK-DAG: [[REG1:p([0-3])]] = sfcmp.uo(r0,r1) +; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]]) +; CHECK: r0 = mux([[REG2]],#1,#0) +; +define i32 @compare_oeq_f(float %val, float %val2) local_unnamed_addr #0 { +entry: + %cmpinf = fcmp oeq float %val, %val2 + %0 = zext i1 %cmpinf to i32 + ret i32 %0 +} + + +; +; CHECK-DAG: [[REG0:p([0-3])]] = sfcmp.eq(r0,r1) +; CHECK-DAG: [[REG1:p([0-3])]] = sfcmp.uo(r0,r1) +; CHECK: [[REG2:p([0-3])]] = or([[REG0]],[[REG1]]) +; CHECK: r0 = mux([[REG2]],#0,#1) +; +define i32 @compare_one_f(float %val, float %val2) local_unnamed_addr #0 { +entry: + %cmpinf = fcmp one float %val, %val2 + %0 = zext i1 %cmpinf to i32 + ret i32 %0 +} + + +; +; CHECK-DAG: [[REG0:p([0-3])]] = sfcmp.gt(r0,r1) +; CHECK-DAG: [[REG1:p([0-3])]] = sfcmp.uo(r0,r1) +; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]]) +; CHECK: r0 = mux([[REG2]],#1,#0) +; +define i32 @compare_ogt_f(float %val, float %val2) local_unnamed_addr #0 { +entry: + %cmpinf = fcmp ogt float %val, %val2 + %0 = zext i1 %cmpinf to i32 + ret i32 %0 +} + + +; +; CHECK-DAG: [[REG0:p([0-3])]] = sfcmp.ge(r1,r0) +; CHECK-DAG: [[REG1:p([0-3])]] = sfcmp.uo(r1,r0) +; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]]) +; CHECK: r0 = mux([[REG2]],#1,#0) +; +define i32 @compare_ole_f(float %val, float %val2) local_unnamed_addr #0 { +entry: + %cmpinf = fcmp ole float %val, %val2 + %0 = zext i1 %cmpinf to i32 + ret i32 %0 +} + + + +; +; CHECK-DAG: [[REG0:p([0-3])]] = sfcmp.ge(r0,r1) +; CHECK-DAG: [[REG1:p([0-3])]] = sfcmp.uo(r0,r1) +; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]]) +; CHECK: r0 = mux([[REG2]],#1,#0) +; +define i32 @compare_oge_f(float %val, float %val2) local_unnamed_addr #0 { +entry: + %cmpinf = fcmp oge float %val, %val2 + %0 = zext i1 %cmpinf to i32 + ret i32 %0 +} + + +; +; CHECK-DAG: [[REG0:p([0-3])]] = sfcmp.gt(r1,r0) +; CHECK-DAG: [[REG1:p([0-3])]] = sfcmp.uo(r1,r0) +; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]]) +; CHECK: r0 = mux([[REG2]],#1,#0) +; +define i32 @compare_olt_f(float %val, float %val2) local_unnamed_addr #0 { +entry: + %cmpinf = fcmp olt float %val, %val2 + %0 = zext i1 %cmpinf to i32 + ret i32 %0 +} + + + +; +; Functions for double: +; + +; +; CHECK-DAG: [[REG0:p([0-3])]] = dfcmp.eq(r1:0,r3:2) +; CHECK-DAG: [[REG1:p([0-3])]] = dfcmp.uo(r1:0,r3:2) +; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]]) +; CHECK: r0 = mux([[REG2]],#1,#0) +; +define i32 @compare_oeq_d(double %val, double %val2) local_unnamed_addr #0 { +entry: + %cmpinf = fcmp oeq double %val, %val2 + %0 = zext i1 %cmpinf to i32 + ret i32 %0 +} + + +; +; CHECK-DAG: [[REG0:p([0-3])]] = dfcmp.eq(r1:0,r3:2) +; CHECK-DAG: [[REG1:p([0-3])]] = dfcmp.uo(r1:0,r3:2) +; CHECK: [[REG2:p([0-3])]] = or([[REG0]],[[REG1]]) +; CHECK: r0 = mux([[REG2]],#0,#1) +; +define i32 @compare_one_d(double %val, double %val2) local_unnamed_addr #0 { +entry: + %cmpinf = fcmp one double %val, %val2 + %0 = zext i1 %cmpinf to i32 + ret i32 %0 +} + + + +; +; CHECK-DAG: [[REG0:p([0-3])]] = dfcmp.gt(r1:0,r3:2) +; CHECK-DAG: [[REG1:p([0-3])]] = dfcmp.uo(r1:0,r3:2) +; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]]) +; CHECK: r0 = mux([[REG2]],#1,#0) +; +define i32 @compare_ogt_d(double %val, double %val2) local_unnamed_addr #0 { +entry: + %cmpinf = fcmp ogt double %val, %val2 + %0 = zext i1 %cmpinf to i32 + ret i32 %0 +} + + +; +; CHECK-DAG: [[REG0:p([0-3])]] = dfcmp.ge(r3:2,r1:0) +; CHECK-DAG: [[REG1:p([0-3])]] = dfcmp.uo(r3:2,r1:0) +; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]]) +; CHECK: r0 = mux([[REG2]],#1,#0) +; +define i32 @compare_ole_d(double %val, double %val2) local_unnamed_addr #0 { +entry: + %cmpinf = fcmp ole double %val, %val2 + %0 = zext i1 %cmpinf to i32 + ret i32 %0 +} + + +; +; CHECK-DAG: [[REG0:p([0-3])]] = dfcmp.ge(r1:0,r3:2) +; CHECK-DAG: [[REG1:p([0-3])]] = dfcmp.uo(r1:0,r3:2) +; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]]) +; CHECK: r0 = mux([[REG2]],#1,#0) +; +define i32 @compare_oge_d(double %val, double %val2) local_unnamed_addr #0 { +entry: + %cmpinf = fcmp oge double %val, %val2 + %0 = zext i1 %cmpinf to i32 + ret i32 %0 +} + + +; +; CHECK-DAG: [[REG0:p([0-3])]] = dfcmp.gt(r3:2,r1:0) +; CHECK-DAG: [[REG1:p([0-3])]] = dfcmp.uo(r3:2,r1:0) +; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]]) +; CHECK: r0 = mux([[REG2]],#1,#0) +; +define i32 @compare_olt_d(double %val, double %val2) local_unnamed_addr #0 { +entry: + %cmpinf = fcmp olt double %val, %val2 + %0 = zext i1 %cmpinf to i32 + ret i32 %0 +} +