python3kgae
diff --git a/‎llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
Lines changed: 46 additions & 0 deletions b/‎llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
Lines changed: 46 additions & 0 deletions
diff --git a/‎llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
Lines changed: 40 additions & 0 deletions b/‎llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
Lines changed: 40 additions & 0 deletions
diff --git a/‎llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
Lines changed: 49 additions & 0 deletions b/‎llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
Lines changed: 49 additions & 0 deletions
diff --git a/‎llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
Lines changed: 8 additions & 0 deletions b/‎llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
Lines changed: 8 additions & 0 deletions
diff --git a/‎llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
Lines changed: 190 additions & 0 deletions b/‎llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
Lines changed: 190 additions & 0 deletions
@@ -128,4 +128,50 @@ def : PatFprFpr<fmul, FMUL_S, FPR32>;
 def : PatFprFpr<fdiv, FDIV_S, FPR32>;
 def : PatFpr<fneg, FNEG_S, FPR32>;
 
+/// Setcc
+
+// Match non-signaling comparison
+
+// TODO: change setcc to any_fsetcc after call is supported because
+// we need to call llvm.experimental.constrained.fcmp.f32 in testcase.
+// See RISCV float-fcmp-strict.ll for reference.
+class PatFPSetcc<CondCode cc, LAInst CmpInst, RegisterClass RegTy>
+    : Pat<(setcc RegTy:$fj, RegTy:$fk, cc),
+          (MOVCF2GR (CmpInst RegTy:$fj, RegTy:$fk))>;
+// SETOGT/SETOGE/SETUGT/SETUGE will expand into SETOLT/SETOLE/SETULT/SETULE.
+def : PatFPSetcc<SETOEQ, FCMP_CEQ_S,  FPR32>;
+def : PatFPSetcc<SETOLT, FCMP_CLT_S,  FPR32>;
+def : PatFPSetcc<SETOLE, FCMP_CLE_S,  FPR32>;
+def : PatFPSetcc<SETONE, FCMP_CNE_S,  FPR32>;
+def : PatFPSetcc<SETO,   FCMP_COR_S,  FPR32>;
+def : PatFPSetcc<SETUEQ, FCMP_CUEQ_S, FPR32>;
+def : PatFPSetcc<SETULT, FCMP_CULT_S, FPR32>;
+def : PatFPSetcc<SETULE, FCMP_CULE_S, FPR32>;
+def : PatFPSetcc<SETUNE, FCMP_CUNE_S, FPR32>;
+def : PatFPSetcc<SETUO,  FCMP_CUN_S,  FPR32>;
+
+// TODO: Match signaling comparison strict_fsetccs with FCMP_S*_S instructions.
+
+/// Select
+
+def : Pat<(select GPR:$cc, FPR32:$fk, FPR32:$fj),
+          (FSEL_S FPR32:$fj, FPR32:$fk, (MOVGR2CF GPR:$cc))>;
+
+/// Selectcc
+
+class PatFPSelectcc<CondCode cc, LAInst CmpInst, LAInst SelInst,
+                    RegisterClass RegTy>
+    : Pat<(select (GRLenVT (setcc RegTy:$a, RegTy:$b, cc)), RegTy:$t, RegTy:$f),
+          (SelInst RegTy:$f, RegTy:$t, (CmpInst RegTy:$a, RegTy:$b))>;
+def : PatFPSelectcc<SETOEQ, FCMP_CEQ_S,  FSEL_S, FPR32>;
+def : PatFPSelectcc<SETOLT, FCMP_CLT_S,  FSEL_S, FPR32>;
+def : PatFPSelectcc<SETOLE, FCMP_CLE_S,  FSEL_S, FPR32>;
+def : PatFPSelectcc<SETONE, FCMP_CNE_S,  FSEL_S, FPR32>;
+def : PatFPSelectcc<SETO,   FCMP_COR_S,  FSEL_S, FPR32>;
+def : PatFPSelectcc<SETUEQ, FCMP_CUEQ_S, FSEL_S, FPR32>;
+def : PatFPSelectcc<SETULT, FCMP_CULT_S, FSEL_S, FPR32>;
+def : PatFPSelectcc<SETULE, FCMP_CULE_S, FSEL_S, FPR32>;
+def : PatFPSelectcc<SETUNE, FCMP_CUNE_S, FSEL_S, FPR32>;
+def : PatFPSelectcc<SETUO,  FCMP_CUN_S,  FSEL_S, FPR32>;
+
 } // Predicates = [HasBasicF]
@@ -145,4 +145,44 @@ def : PatFprFpr<fmul, FMUL_D, FPR64>;
 def : PatFprFpr<fdiv, FDIV_D, FPR64>;
 def : PatFpr<fneg, FNEG_D, FPR64>;
 
+/// Setcc
+
+// Match non-signaling comparison
+
+// TODO: Change setcc to any_fsetcc after call is supported because
+// we need to call llvm.experimental.constrained.fcmp.f64 in testcase.
+// See RISCV float-fcmp-strict.ll for reference.
+
+// SETOGT/SETOGE/SETUGT/SETUGE will expand into SETOLT/SETOLE/SETULT/SETULE.
+def : PatFPSetcc<SETOEQ, FCMP_CEQ_D,  FPR64>;
+def : PatFPSetcc<SETOLT, FCMP_CLT_D,  FPR64>;
+def : PatFPSetcc<SETOLE, FCMP_CLE_D,  FPR64>;
+def : PatFPSetcc<SETONE, FCMP_CNE_D,  FPR64>;
+def : PatFPSetcc<SETO,   FCMP_COR_D,  FPR64>;
+def : PatFPSetcc<SETUEQ, FCMP_CUEQ_D, FPR64>;
+def : PatFPSetcc<SETULT, FCMP_CULT_D, FPR64>;
+def : PatFPSetcc<SETULE, FCMP_CULE_D, FPR64>;
+def : PatFPSetcc<SETUNE, FCMP_CUNE_D, FPR64>;
+def : PatFPSetcc<SETUO,  FCMP_CUN_D,  FPR64>;
+
+// TODO: Match signaling comparison strict_fsetccs with FCMP_S*_D instructions.
+
+/// Select
+
+def : Pat<(select GPR:$cc, FPR64:$fk, FPR64:$fj),
+          (FSEL_D FPR64:$fj, FPR64:$fk, (MOVGR2CF GPR:$cc))>;
+
+/// Selectcc
+
+def : PatFPSelectcc<SETOEQ, FCMP_CEQ_D,  FSEL_D, FPR64>;
+def : PatFPSelectcc<SETOLT, FCMP_CLT_D,  FSEL_D, FPR64>;
+def : PatFPSelectcc<SETOLE, FCMP_CLE_D,  FSEL_D, FPR64>;
+def : PatFPSelectcc<SETONE, FCMP_CNE_D,  FSEL_D, FPR64>;
+def : PatFPSelectcc<SETO,   FCMP_COR_D,  FSEL_D, FPR64>;
+def : PatFPSelectcc<SETUEQ, FCMP_CUEQ_D, FSEL_D, FPR64>;
+def : PatFPSelectcc<SETULT, FCMP_CULT_D, FSEL_D, FPR64>;
+def : PatFPSelectcc<SETULE, FCMP_CULE_D, FSEL_D, FPR64>;
+def : PatFPSelectcc<SETUNE, FCMP_CUNE_D, FSEL_D, FPR64>;
+def : PatFPSelectcc<SETUO,  FCMP_CUN_D,  FSEL_D, FPR64>;
+
 } // Predicates = [HasBasicD]
@@ -13,6 +13,7 @@
 #include "LoongArchISelDAGToDAG.h"
 #include "MCTargetDesc/LoongArchMCTargetDesc.h"
 #include "MCTargetDesc/LoongArchMatInt.h"
+#include "llvm/Support/KnownBits.h"
 
 using namespace llvm;
 
@@ -64,6 +65,54 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) {
   // Select the default instruction.
   SelectCode(Node);
 }
+
+bool LoongArchDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
+                                            SDValue &ShAmt) {
+  // Shift instructions on LoongArch only read the lower 5 or 6 bits of the
+  // shift amount. If there is an AND on the shift amount, we can bypass it if
+  // it doesn't affect any of those bits.
+  if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
+    const APInt &AndMask = N->getConstantOperandAPInt(1);
+
+    // Since the max shift amount is a power of 2 we can subtract 1 to make a
+    // mask that covers the bits needed to represent all shift amounts.
+    assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
+    APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
+
+    if (ShMask.isSubsetOf(AndMask)) {
+      ShAmt = N.getOperand(0);
+      return true;
+    }
+
+    // SimplifyDemandedBits may have optimized the mask so try restoring any
+    // bits that are known zero.
+    KnownBits Known = CurDAG->computeKnownBits(N->getOperand(0));
+    if (ShMask.isSubsetOf(AndMask | Known.Zero)) {
+      ShAmt = N.getOperand(0);
+      return true;
+    }
+  } else if (N.getOpcode() == ISD::SUB &&
+             isa<ConstantSDNode>(N.getOperand(0))) {
+    uint64_t Imm = N.getConstantOperandVal(0);
+    // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
+    // generate a NEG instead of a SUB of a constant.
+    if (Imm != 0 && Imm % ShiftWidth == 0) {
+      SDLoc DL(N);
+      EVT VT = N.getValueType();
+      SDValue Zero =
+          CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, LoongArch::R0, VT);
+      unsigned NegOpc = VT == MVT::i64 ? LoongArch::SUB_D : LoongArch::SUB_W;
+      MachineSDNode *Neg =
+          CurDAG->getMachineNode(NegOpc, DL, VT, Zero, N.getOperand(1));
+      ShAmt = SDValue(Neg, 0);
+      return true;
+    }
+  }
+
+  ShAmt = N;
+  return true;
+}
+
 // This pass converts a legalized DAG into a LoongArch-specific DAG, ready
 // for instruction scheduling.
 FunctionPass *llvm::createLoongArchISelDag(LoongArchTargetMachine &TM) {
 
@@ -38,6 +38,14 @@ class LoongArchDAGToDAGISel : public SelectionDAGISel {
 
   void Select(SDNode *Node) override;
 
+  bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt);
+  bool selectShiftMaskGRLen(SDValue N, SDValue &ShAmt) {
+    return selectShiftMask(N, Subtarget->getGRLen(), ShAmt);
+  }
+  bool selectShiftMask32(SDValue N, SDValue &ShAmt) {
+    return selectShiftMask(N, 32, ShAmt);
+  }
+
 // Include the pieces autogenerated from the target description.
 #include "LoongArchGenDAGISel.inc"
 };
 
@@ -18,6 +18,7 @@
 #include "LoongArchSubtarget.h"
 #include "LoongArchTargetMachine.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/Support/Debug.h"
 
 using namespace llvm;
@@ -37,6 +38,29 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
     addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
 
   // TODO: add necessary setOperationAction calls later.
+  setOperationAction(ISD::SHL_PARTS, GRLenVT, Custom);
+  setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom);
+  setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom);
+
+  if (Subtarget.is64Bit()) {
+    setOperationAction(ISD::SHL, MVT::i32, Custom);
+    setOperationAction(ISD::SRA, MVT::i32, Custom);
+    setOperationAction(ISD::SRL, MVT::i32, Custom);
+  }
+
+  static const ISD::CondCode FPCCToExpand[] = {ISD::SETOGT, ISD::SETOGE,
+                                               ISD::SETUGT, ISD::SETUGE};
+
+  if (Subtarget.hasBasicF()) {
+    setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
+    setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
+  }
+  if (Subtarget.hasBasicD()) {
+    setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
+    setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
+  }
+
+  setOperationAction(ISD::SELECT_CC, GRLenVT, Expand);
 
   // Compute derived properties from the register classes.
   computeRegisterProperties(STI.getRegisterInfo());
@@ -50,6 +74,169 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
   setMinFunctionAlignment(FunctionAlignment);
 }
 
+SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  switch (Op.getOpcode()) {
+  default:
+    report_fatal_error("unimplemented operand");
+  case ISD::SHL_PARTS:
+    return lowerShiftLeftParts(Op, DAG);
+  case ISD::SRA_PARTS:
+    return lowerShiftRightParts(Op, DAG, true);
+  case ISD::SRL_PARTS:
+    return lowerShiftRightParts(Op, DAG, false);
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:
+    // This can be called for an i32 shift amount that needs to be promoted.
+    assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
+           "Unexpected custom legalisation");
+    return SDValue();
+  }
+}
+
+SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
+                                                     SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  SDValue Lo = Op.getOperand(0);
+  SDValue Hi = Op.getOperand(1);
+  SDValue Shamt = Op.getOperand(2);
+  EVT VT = Lo.getValueType();
+
+  // if Shamt-GRLen < 0: // Shamt < GRLen
+  //   Lo = Lo << Shamt
+  //   Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
+  // else:
+  //   Lo = 0
+  //   Hi = Lo << (Shamt-GRLen)
+
+  SDValue Zero = DAG.getConstant(0, DL, VT);
+  SDValue One = DAG.getConstant(1, DL, VT);
+  SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
+  SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
+  SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
+  SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
+
+  SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
+  SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
+  SDValue ShiftRightLo =
+      DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
+  SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
+  SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
+  SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
+
+  SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
+
+  Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
+  Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
+
+  SDValue Parts[2] = {Lo, Hi};
+  return DAG.getMergeValues(Parts, DL);
+}
+
+SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
+                                                      SelectionDAG &DAG,
+                                                      bool IsSRA) const {
+  SDLoc DL(Op);
+  SDValue Lo = Op.getOperand(0);
+  SDValue Hi = Op.getOperand(1);
+  SDValue Shamt = Op.getOperand(2);
+  EVT VT = Lo.getValueType();
+
+  // SRA expansion:
+  //   if Shamt-GRLen < 0: // Shamt < GRLen
+  //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
+  //     Hi = Hi >>s Shamt
+  //   else:
+  //     Lo = Hi >>s (Shamt-GRLen);
+  //     Hi = Hi >>s (GRLen-1)
+  //
+  // SRL expansion:
+  //   if Shamt-GRLen < 0: // Shamt < GRLen
+  //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
+  //     Hi = Hi >>u Shamt
+  //   else:
+  //     Lo = Hi >>u (Shamt-GRLen);
+  //     Hi = 0;
+
+  unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
+
+  SDValue Zero = DAG.getConstant(0, DL, VT);
+  SDValue One = DAG.getConstant(1, DL, VT);
+  SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
+  SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
+  SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
+  SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
+
+  SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
+  SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
+  SDValue ShiftLeftHi =
+      DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
+  SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
+  SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
+  SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
+  SDValue HiFalse =
+      IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
+
+  SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
+
+  Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
+  Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
+
+  SDValue Parts[2] = {Lo, Hi};
+  return DAG.getMergeValues(Parts, DL);
+}
+
+// Returns the opcode of the target-specific SDNode that implements the 32-bit
+// form of the given Opcode.
+static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) {
+  switch (Opcode) {
+  default:
+    llvm_unreachable("Unexpected opcode");
+  case ISD::SHL:
+    return LoongArchISD::SLL_W;
+  case ISD::SRA:
+    return LoongArchISD::SRA_W;
+  case ISD::SRL:
+    return LoongArchISD::SRL_W;
+  }
+}
+
+// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
+// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
+// otherwise be promoted to i64, making it difficult to select the
+// SLL_W/.../*W later one because the fact the operation was originally of
+// type i8/i16/i32 is lost.
+static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG,
+                                   unsigned ExtOpc = ISD::ANY_EXTEND) {
+  SDLoc DL(N);
+  LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
+  SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
+  SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
+  SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
+  // ReplaceNodeResults requires we maintain the same type for the return value.
+  return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
+}
+
+void LoongArchTargetLowering::ReplaceNodeResults(
+    SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
+  SDLoc DL(N);
+  switch (N->getOpcode()) {
+  default:
+    llvm_unreachable("Don't know how to legalize this operation");
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:
+    assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
+           "Unexpected custom legalisation");
+    if (N->getOperand(1).getOpcode() != ISD::Constant) {
+      Results.push_back(customLegalizeToWOp(N, DAG));
+      break;
+    }
+    break;
+  }
+}
+
 const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
   switch ((LoongArchISD::NodeType)Opcode) {
   case LoongArchISD::FIRST_NUMBER:
@@ -61,6 +248,9 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
 
     // TODO: Add more target-dependent nodes later.
     NODE_NAME_CASE(RET)
+    NODE_NAME_CASE(SLL_W)
+    NODE_NAME_CASE(SRA_W)
+    NODE_NAME_CASE(SRL_W)
   }
 #undef NODE_NAME_CASE
   return nullptr;