llvm · koachan · Apr 26, 2025 · Apr 16, 2025 · Apr 16, 2025 · Apr 16, 2025
diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -27,6 +27,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/DiagnosticInfo.h"
@@ -1704,8 +1705,10 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::FP16_TO_FP, MVT::f128, Expand);
   setOperationAction(ISD::FP_TO_FP16, MVT::f128, Expand);
 
-  setOperationAction(ISD::BITCAST, MVT::f32, Expand);
-  setOperationAction(ISD::BITCAST, MVT::i32, Expand);
+  setOperationAction(ISD::BITCAST, MVT::f32,
+                     Subtarget->isVIS3() ? Legal : Expand);
+  setOperationAction(ISD::BITCAST, MVT::i32,
+                     Subtarget->isVIS3() ? Legal : Expand);
 
   // Sparc has no select or setcc: expand to SELECT_CC.
   setOperationAction(ISD::SELECT, MVT::i32, Expand);
@@ -1743,17 +1746,17 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
   }
 
   if (Subtarget->is64Bit()) {
-    setOperationAction(ISD::BITCAST, MVT::f64, Expand);
-    setOperationAction(ISD::BITCAST, MVT::i64, Expand);
+    setOperationAction(ISD::BITCAST, MVT::f64,
+                       Subtarget->isVIS3() ? Legal : Expand);
+    setOperationAction(ISD::BITCAST, MVT::i64,
+                       Subtarget->isVIS3() ? Legal : Expand);
     setOperationAction(ISD::SELECT, MVT::i64, Expand);
     setOperationAction(ISD::SETCC, MVT::i64, Expand);
     setOperationAction(ISD::BR_CC, MVT::i64, Custom);
     setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
 
     setOperationAction(ISD::CTPOP, MVT::i64,
                        Subtarget->usePopc() ? Legal : Expand);
-    setOperationAction(ISD::CTTZ , MVT::i64, Expand);
-    setOperationAction(ISD::CTLZ , MVT::i64, Expand);
     setOperationAction(ISD::BSWAP, MVT::i64, Expand);
     setOperationAction(ISD::ROTL , MVT::i64, Expand);
     setOperationAction(ISD::ROTR , MVT::i64, Expand);
@@ -1813,9 +1816,7 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::FCOS , MVT::f32, Expand);
   setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
   setOperationAction(ISD::FREM , MVT::f32, Expand);
-  setOperationAction(ISD::FMA  , MVT::f32, Expand);
-  setOperationAction(ISD::CTTZ , MVT::i32, Expand);
-  setOperationAction(ISD::CTLZ , MVT::i32, Expand);
+  setOperationAction(ISD::FMA, MVT::f32, Expand);
   setOperationAction(ISD::ROTL , MVT::i32, Expand);
   setOperationAction(ISD::ROTR , MVT::i32, Expand);
   setOperationAction(ISD::BSWAP, MVT::i32, Expand);
@@ -1986,6 +1987,42 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
   if (Subtarget->hasLeonCycleCounter())
     setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
 
+  if (Subtarget->isVIS3()) {
+    setOperationAction(ISD::CTLZ, MVT::i32, Legal);
+    setOperationAction(ISD::CTLZ, MVT::i64, Legal);
+    setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Legal);
+    setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Legal);
+
+    setOperationAction(ISD::CTTZ, MVT::i32,
+                       Subtarget->is64Bit() ? Promote : Expand);
+    setOperationAction(ISD::CTTZ, MVT::i64, Expand);
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32,
+                       Subtarget->is64Bit() ? Promote : Expand);
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
+  } else if (Subtarget->usePopc()) {
+    setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+    setOperationAction(ISD::CTLZ, MVT::i64, Expand);
+    setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
+    setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
+
+    setOperationAction(ISD::CTTZ, MVT::i32, Expand);
+    setOperationAction(ISD::CTTZ, MVT::i64, Expand);
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
+  } else {
+    setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+    setOperationAction(ISD::CTLZ, MVT::i64, Expand);
+    setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32,
+                       Subtarget->is64Bit() ? Promote : LibCall);
+    setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, LibCall);
+
+    // FIXME make these LibCalls.
+    setOperationAction(ISD::CTTZ, MVT::i32, Expand);
+    setOperationAction(ISD::CTTZ, MVT::i64, Expand);
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
+  }
+
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
 
   setMinFunctionAlignment(Align(4));
@@ -3567,8 +3604,24 @@ bool SparcTargetLowering::useLoadStackGuardNode(const Module &M) const {
 
 bool SparcTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
                                        bool ForCodeSize) const {
-  return Subtarget->isVIS() && (VT == MVT::f32 || VT == MVT::f64) &&
-         Imm.isZero();
+  if (VT != MVT::f32 && VT != MVT::f64)
+    return false;
+  if (Imm.isZero())
+    return Subtarget->isVIS();
+  if (Imm.isExactlyValue(+0.5) || Imm.isExactlyValue(-0.5))
+    return Subtarget->isVIS3();
+  return false;
+}
+
+bool SparcTargetLowering::isCtlzFast() const { return Subtarget->isVIS3(); }
+
+bool SparcTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
+  // We lack native cttz, however,
+  // On 64-bit targets it is cheap to implement it in terms of popc.
+  if (Subtarget->is64Bit() && Subtarget->usePopc())
+    return true;
+  // Otherwise, implementing cttz in terms of ctlz is still cheap.
+  return isCheapToSpeculateCtlz(Ty);
 }
 
 // Override to disable global variable loading on Linux.

diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.h b/llvm/lib/Target/Sparc/SparcISelLowering.h
@@ -210,6 +210,14 @@ namespace llvm {
     bool isFPImmLegal(const APFloat &Imm, EVT VT,
                       bool ForCodeSize) const override;
 
+    bool isCtlzFast() const override;
+
+    bool isCheapToSpeculateCtlz(Type *Ty) const override {
+      return isCtlzFast();
+    }
+
+    bool isCheapToSpeculateCttz(Type *Ty) const override;
+
     bool shouldInsertFencesForAtomic(const Instruction *I) const override {
       // FIXME: We insert fences for each atomics and generate
       // sub-optimal code for PSO/TSO. (Approximately nobody uses any

diff --git a/llvm/lib/Target/Sparc/SparcInstrVIS.td b/llvm/lib/Target/Sparc/SparcInstrVIS.td
@@ -259,14 +259,14 @@ def LZCNT     : VISInstFormat<0b000010111, (outs I64Regs:$rd),
                    (ins I64Regs:$rs2), "lzcnt $rs2, $rd">;
 
 let rs1 = 0 in {
-def MOVSTOSW : VISInstFormat<0b100010011, (outs I64Regs:$rd),
-                   (ins DFPRegs:$rs2), "movstosw $rs2, $rd">;
-def MOVSTOUW : VISInstFormat<0b100010001, (outs I64Regs:$rd),
-                   (ins DFPRegs:$rs2), "movstouw $rs2, $rd">;
+def MOVSTOSW : VISInstFormat<0b100010011, (outs IntRegs:$rd),
+                   (ins FPRegs:$rs2), "movstosw $rs2, $rd">;
+def MOVSTOUW : VISInstFormat<0b100010001, (outs IntRegs:$rd),
+                   (ins FPRegs:$rs2), "movstouw $rs2, $rd">;
 def MOVDTOX  : VISInstFormat<0b100010000, (outs I64Regs:$rd),
                    (ins DFPRegs:$rs2), "movdtox $rs2, $rd">;
-def MOVWTOS  :  VISInstFormat<0b100011001, (outs DFPRegs:$rd),
-                   (ins I64Regs:$rs2), "movwtos $rs2, $rd">;
+def MOVWTOS  :  VISInstFormat<0b100011001, (outs FPRegs:$rd),
+                   (ins IntRegs:$rs2), "movwtos $rs2, $rd">;
 def MOVXTOD  :  VISInstFormat<0b100011000, (outs DFPRegs:$rd),
                    (ins I64Regs:$rs2), "movxtod $rs2, $rd">;
 }
@@ -281,6 +281,8 @@ def XMULXHI  : VISInst<0b100010110, "xmulxhi", I64Regs>;
 // FP immediate patterns.
 def fpimm0 : FPImmLeaf<fAny, [{return Imm.isExactlyValue(+0.0);}]>;
 def fpnegimm0 : FPImmLeaf<fAny, [{return Imm.isExactlyValue(-0.0);}]>;
+def fpimmhalf : FPImmLeaf<fAny, [{return Imm.isExactlyValue(+0.5);}]>;
+def fpnegimmhalf : FPImmLeaf<fAny, [{return Imm.isExactlyValue(-0.5);}]>;
 
 // VIS instruction patterns.
 let Predicates = [HasVIS] in {
@@ -293,5 +295,53 @@ def : Pat<(f32 fpnegimm0), (FNEGS (FZEROS))>;
 
 // VIS3 instruction patterns.
 let Predicates = [HasVIS3] in {
+// +/-0.5 immediate.
+// This is needed to enable halving instructions.
+// FIXME generalize this to arbitrary immediates.
+// SET/MOVWTOS or SETX/MOVXTOD pair should let us materialize FP constants
+// faster than constant pool loading.
+def : Pat<(f32 fpimmhalf), (MOVWTOS (SETHIi 0x0FC000))>;
+def : Pat<(f32 fpnegimmhalf), (MOVWTOS (SETHIi 0x2FC000))>;
+def : Pat<(f64 fpimmhalf), (MOVXTOD (SLLXri (SETHIi 0x0FF800), 32))>;
+def : Pat<(f64 fpnegimmhalf), (MOVXTOD (SLLXri (SETHIi 0x2FF800), 32))>;
+
 def : Pat<(i64 (adde i64:$lhs, i64:$rhs)), (ADDXCCC $lhs, $rhs)>;
+
+def : Pat<(i64 (ctlz i64:$src)), (LZCNT $src)>;
+def : Pat<(i64 (ctlz_zero_undef i64:$src)), (LZCNT $src)>;
+// 32-bit LZCNT.
+// The zero extension will leave us with 32 extra leading zeros,
+// so we need to compensate for it.
+// FIXME remove this when the codegen supports using 64-bit values directly
+// in V8+ mode.
+def : Pat<(i32 (ctlz i32:$src)), (ADDri (LZCNT (SRLri $src, 0)), (i32 -32))>;
+def : Pat<(i32 (ctlz_zero_undef i32:$src)), (ADDri (LZCNT (SRLri $src, 0)), (i32 -32))>;
+
+def : Pat<(i32 (bitconvert f32:$src)), (MOVSTOUW $src)>;
+def : Pat<(i64 (zanyext (i32 (bitconvert f32:$src)))), (MOVSTOUW $src)>;
+def : Pat<(i64 (sext (i32 (bitconvert f32:$src)))), (MOVSTOSW $src)>;
+def : Pat<(f32 (bitconvert i32:$src)), (MOVWTOS $src)>;
+def : Pat<(i64 (bitconvert f64:$src)), (MOVDTOX $src)>;
+def : Pat<(f64 (bitconvert i64:$src)), (MOVXTOD $src)>;
+
+// OP-then-neg FP operations.
+def : Pat<(f32 (fneg (fadd f32:$rs1, f32:$rs2))), (FNADDS $rs1, $rs2)>;
+def : Pat<(f64 (fneg (fadd f64:$rs1, f64:$rs2))), (FNADDD $rs1, $rs2)>;
+def : Pat<(f32 (fneg (fmul f32:$rs1, f32:$rs2))), (FNMULS $rs1, $rs2)>;
+def : Pat<(f32 (fmul (fneg f32:$rs1), f32:$rs2)), (FNMULS $rs1, $rs2)>;
+def : Pat<(f32 (fmul f32:$rs1, (fneg f32:$rs2))), (FNMULS $rs1, $rs2)>;
+def : Pat<(f64 (fneg (fmul f64:$rs1, f64:$rs2))), (FNMULD $rs1, $rs2)>;
+def : Pat<(f64 (fmul (fneg f64:$rs1), f64:$rs2)), (FNMULD $rs1, $rs2)>;
+def : Pat<(f64 (fmul f64:$rs1, (fneg f64:$rs2))), (FNMULD $rs1, $rs2)>;
+def : Pat<(f64 (fneg (fmul (fpextend f32:$rs1), (fpextend f32:$rs2)))), (FNSMULD $rs1, $rs2)>;
+def : Pat<(f64 (fmul (fneg (fpextend f32:$rs1)), (fpextend f32:$rs2))), (FNSMULD $rs1, $rs2)>;
+def : Pat<(f64 (fmul (fpextend f32:$rs1), (fneg (fpextend f32:$rs2)))), (FNSMULD $rs1, $rs2)>;
+
+// Op-then-halve FP operations.
+def : Pat<(f32 (fmul (fadd f32:$rs1, f32:$rs2), fpimmhalf)), (FHADDS $rs1, $rs2)>;
+def : Pat<(f64 (fmul (fadd f64:$rs1, f64:$rs2), fpimmhalf)), (FHADDD $rs1, $rs2)>;
+def : Pat<(f32 (fmul (fsub f32:$rs1, f32:$rs2), fpimmhalf)), (FHSUBS $rs1, $rs2)>;
+def : Pat<(f64 (fmul (fsub f64:$rs1, f64:$rs2), fpimmhalf)), (FHSUBD $rs1, $rs2)>;
+def : Pat<(f32 (fmul (fadd f32:$rs1, f32:$rs2), fpnegimmhalf)), (FNHADDS $rs1, $rs2)>;
+def : Pat<(f64 (fmul (fadd f64:$rs1, f64:$rs2), fpnegimmhalf)), (FNHADDD $rs1, $rs2)>;
 } // Predicates = [HasVIS3]
diff --git a/llvm/test/CodeGen/SPARC/bitcast.ll b/llvm/test/CodeGen/SPARC/bitcast.ll
@@ -0,0 +1,139 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=sparcv9 | FileCheck %s -check-prefix=V9
+; RUN: llc < %s -mtriple=sparcv9 -mattr=vis3 | FileCheck %s -check-prefix=VIS3
+
+define i32 @stow(float %float) nounwind {
+; V9-LABEL: stow:
+; V9:       ! %bb.0:
+; V9-NEXT:    add %sp, -144, %sp
+; V9-NEXT:    st %f1, [%sp+2187]
+; V9-NEXT:    ld [%sp+2187], %o0
+; V9-NEXT:    retl
+; V9-NEXT:    add %sp, 144, %sp
+;
+; VIS3-LABEL: stow:
+; VIS3:       ! %bb.0:
+; VIS3-NEXT:    retl
+; VIS3-NEXT:    movstouw %f1, %o0
+  %w = bitcast float %float to i32
+  ret i32 %w
+}
+
+define zeroext i32 @stouw(float %float) nounwind {
+; V9-LABEL: stouw:
+; V9:       ! %bb.0:
+; V9-NEXT:    add %sp, -144, %sp
+; V9-NEXT:    st %f1, [%sp+2187]
+; V9-NEXT:    ld [%sp+2187], %o0
+; V9-NEXT:    retl
+; V9-NEXT:    add %sp, 144, %sp
+;
+; VIS3-LABEL: stouw:
+; VIS3:       ! %bb.0:
+; VIS3-NEXT:    retl
+; VIS3-NEXT:    movstouw %f1, %o0
+  %uw = bitcast float %float to i32
+  ret i32 %uw
+}
+
+define signext i32 @stosw(float %float) nounwind {
+; V9-LABEL: stosw:
+; V9:       ! %bb.0:
+; V9-NEXT:    add %sp, -144, %sp
+; V9-NEXT:    st %f1, [%sp+2187]
+; V9-NEXT:    ldsw [%sp+2187], %o0
+; V9-NEXT:    retl
+; V9-NEXT:    add %sp, 144, %sp
+;
+; VIS3-LABEL: stosw:
+; VIS3:       ! %bb.0:
+; VIS3-NEXT:    retl
+; VIS3-NEXT:    movstosw %f1, %o0
+  %sw = bitcast float %float to i32
+  ret i32 %sw
+}
+
+define float @wtos(i32 %w) nounwind {
+; V9-LABEL: wtos:
+; V9:       ! %bb.0:
+; V9-NEXT:    add %sp, -144, %sp
+; V9-NEXT:    st %o0, [%sp+2187]
+; V9-NEXT:    ld [%sp+2187], %f0
+; V9-NEXT:    retl
+; V9-NEXT:    add %sp, 144, %sp
+;
+; VIS3-LABEL: wtos:
+; VIS3:       ! %bb.0:
+; VIS3-NEXT:    retl
+; VIS3-NEXT:    movwtos %o0, %f0
+  %float = bitcast i32 %w to float
+  ret float %float
+}
+
+define float @uwtos(i32 zeroext %uw) nounwind {
+; V9-LABEL: uwtos:
+; V9:       ! %bb.0:
+; V9-NEXT:    add %sp, -144, %sp
+; V9-NEXT:    st %o0, [%sp+2187]
+; V9-NEXT:    ld [%sp+2187], %f0
+; V9-NEXT:    retl
+; V9-NEXT:    add %sp, 144, %sp
+;
+; VIS3-LABEL: uwtos:
+; VIS3:       ! %bb.0:
+; VIS3-NEXT:    retl
+; VIS3-NEXT:    movwtos %o0, %f0
+  %float = bitcast i32 %uw to float
+  ret float %float
+}
+
+define float @swtos(i32 signext %sw) nounwind {
+; V9-LABEL: swtos:
+; V9:       ! %bb.0:
+; V9-NEXT:    add %sp, -144, %sp
+; V9-NEXT:    st %o0, [%sp+2187]
+; V9-NEXT:    ld [%sp+2187], %f0
+; V9-NEXT:    retl
+; V9-NEXT:    add %sp, 144, %sp
+;
+; VIS3-LABEL: swtos:
+; VIS3:       ! %bb.0:
+; VIS3-NEXT:    retl
+; VIS3-NEXT:    movwtos %o0, %f0
+  %float = bitcast i32 %sw to float
+  ret float %float
+}
+
+define i64 @dtox(double %double) nounwind {
+; V9-LABEL: dtox:
+; V9:       ! %bb.0:
+; V9-NEXT:    add %sp, -144, %sp
+; V9-NEXT:    std %f0, [%sp+2183]
+; V9-NEXT:    ldx [%sp+2183], %o0
+; V9-NEXT:    retl
+; V9-NEXT:    add %sp, 144, %sp
+;
+; VIS3-LABEL: dtox:
+; VIS3:       ! %bb.0:
+; VIS3-NEXT:    retl
+; VIS3-NEXT:    movdtox %f0, %o0
+  %x = bitcast double %double to i64
+  ret i64 %x
+}
+
+define double @xtod(i64 %x) nounwind {
+; V9-LABEL: xtod:
+; V9:       ! %bb.0:
+; V9-NEXT:    add %sp, -144, %sp
+; V9-NEXT:    stx %o0, [%sp+2183]
+; V9-NEXT:    ldd [%sp+2183], %f0
+; V9-NEXT:    retl
+; V9-NEXT:    add %sp, 144, %sp
+;
+; VIS3-LABEL: xtod:
+; VIS3:       ! %bb.0:
+; VIS3-NEXT:    retl
+; VIS3-NEXT:    movxtod %o0, %f0
+  %double = bitcast i64 %x to double
+  ret double %double
+}