Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 64 additions & 11 deletions llvm/lib/Target/Sparc/SparcISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
Expand Down Expand Up @@ -1704,8 +1705,10 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FP16_TO_FP, MVT::f128, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f128, Expand);

setOperationAction(ISD::BITCAST, MVT::f32, Expand);
setOperationAction(ISD::BITCAST, MVT::i32, Expand);
setOperationAction(ISD::BITCAST, MVT::f32,
Subtarget->isVIS3() ? Legal : Expand);
setOperationAction(ISD::BITCAST, MVT::i32,
Subtarget->isVIS3() ? Legal : Expand);

// Sparc has no select or setcc: expand to SELECT_CC.
setOperationAction(ISD::SELECT, MVT::i32, Expand);
Expand Down Expand Up @@ -1743,17 +1746,17 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
}

if (Subtarget->is64Bit()) {
setOperationAction(ISD::BITCAST, MVT::f64, Expand);
setOperationAction(ISD::BITCAST, MVT::i64, Expand);
setOperationAction(ISD::BITCAST, MVT::f64,
Subtarget->isVIS3() ? Legal : Expand);
setOperationAction(ISD::BITCAST, MVT::i64,
Subtarget->isVIS3() ? Legal : Expand);
setOperationAction(ISD::SELECT, MVT::i64, Expand);
setOperationAction(ISD::SETCC, MVT::i64, Expand);
setOperationAction(ISD::BR_CC, MVT::i64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);

setOperationAction(ISD::CTPOP, MVT::i64,
Subtarget->usePopc() ? Legal : Expand);
setOperationAction(ISD::CTTZ , MVT::i64, Expand);
setOperationAction(ISD::CTLZ , MVT::i64, Expand);
setOperationAction(ISD::BSWAP, MVT::i64, Expand);
setOperationAction(ISD::ROTL , MVT::i64, Expand);
setOperationAction(ISD::ROTR , MVT::i64, Expand);
Expand Down Expand Up @@ -1813,9 +1816,7 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FCOS , MVT::f32, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
setOperationAction(ISD::FREM , MVT::f32, Expand);
setOperationAction(ISD::FMA , MVT::f32, Expand);
setOperationAction(ISD::CTTZ , MVT::i32, Expand);
setOperationAction(ISD::CTLZ , MVT::i32, Expand);
setOperationAction(ISD::FMA, MVT::f32, Expand);
setOperationAction(ISD::ROTL , MVT::i32, Expand);
setOperationAction(ISD::ROTR , MVT::i32, Expand);
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
Expand Down Expand Up @@ -1986,6 +1987,42 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
if (Subtarget->hasLeonCycleCounter())
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);

if (Subtarget->isVIS3()) {
setOperationAction(ISD::CTLZ, MVT::i32, Legal);
setOperationAction(ISD::CTLZ, MVT::i64, Legal);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Legal);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Legal);

setOperationAction(ISD::CTTZ, MVT::i32,
Subtarget->is64Bit() ? Promote : Expand);
setOperationAction(ISD::CTTZ, MVT::i64, Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32,
Subtarget->is64Bit() ? Promote : Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
} else if (Subtarget->usePopc()) {
setOperationAction(ISD::CTLZ, MVT::i32, Expand);
setOperationAction(ISD::CTLZ, MVT::i64, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);

setOperationAction(ISD::CTTZ, MVT::i32, Expand);
setOperationAction(ISD::CTTZ, MVT::i64, Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
} else {
setOperationAction(ISD::CTLZ, MVT::i32, Expand);
setOperationAction(ISD::CTLZ, MVT::i64, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32,
Subtarget->is64Bit() ? Promote : LibCall);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, LibCall);

// FIXME make these LibCalls.
setOperationAction(ISD::CTTZ, MVT::i32, Expand);
setOperationAction(ISD::CTTZ, MVT::i64, Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
}

setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);

setMinFunctionAlignment(Align(4));
Expand Down Expand Up @@ -3567,8 +3604,24 @@ bool SparcTargetLowering::useLoadStackGuardNode(const Module &M) const {

bool SparcTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const {
return Subtarget->isVIS() && (VT == MVT::f32 || VT == MVT::f64) &&
Imm.isZero();
if (VT != MVT::f32 && VT != MVT::f64)
return false;
if (Imm.isZero())
return Subtarget->isVIS();
if (Imm.isExactlyValue(+0.5) || Imm.isExactlyValue(-0.5))
return Subtarget->isVIS3();
return false;
}

bool SparcTargetLowering::isCtlzFast() const { return Subtarget->isVIS3(); }

bool SparcTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
// We lack native cttz, however,
// On 64-bit targets it is cheap to implement it in terms of popc.
if (Subtarget->is64Bit() && Subtarget->usePopc())
return true;
// Otherwise, implementing cttz in terms of ctlz is still cheap.
return isCheapToSpeculateCtlz(Ty);
}

// Override to disable global variable loading on Linux.
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/Sparc/SparcISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,14 @@ namespace llvm {
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;

bool isCtlzFast() const override;

bool isCheapToSpeculateCtlz(Type *Ty) const override {
return isCtlzFast();
}

bool isCheapToSpeculateCttz(Type *Ty) const override;

bool shouldInsertFencesForAtomic(const Instruction *I) const override {
// FIXME: We insert fences for each atomics and generate
// sub-optimal code for PSO/TSO. (Approximately nobody uses any
Expand Down
62 changes: 56 additions & 6 deletions llvm/lib/Target/Sparc/SparcInstrVIS.td
Original file line number Diff line number Diff line change
Expand Up @@ -259,14 +259,14 @@ def LZCNT : VISInstFormat<0b000010111, (outs I64Regs:$rd),
(ins I64Regs:$rs2), "lzcnt $rs2, $rd">;

let rs1 = 0 in {
def MOVSTOSW : VISInstFormat<0b100010011, (outs I64Regs:$rd),
(ins DFPRegs:$rs2), "movstosw $rs2, $rd">;
def MOVSTOUW : VISInstFormat<0b100010001, (outs I64Regs:$rd),
(ins DFPRegs:$rs2), "movstouw $rs2, $rd">;
def MOVSTOSW : VISInstFormat<0b100010011, (outs IntRegs:$rd),
(ins FPRegs:$rs2), "movstosw $rs2, $rd">;
def MOVSTOUW : VISInstFormat<0b100010001, (outs IntRegs:$rd),
(ins FPRegs:$rs2), "movstouw $rs2, $rd">;
def MOVDTOX : VISInstFormat<0b100010000, (outs I64Regs:$rd),
(ins DFPRegs:$rs2), "movdtox $rs2, $rd">;
def MOVWTOS : VISInstFormat<0b100011001, (outs DFPRegs:$rd),
(ins I64Regs:$rs2), "movwtos $rs2, $rd">;
def MOVWTOS : VISInstFormat<0b100011001, (outs FPRegs:$rd),
(ins IntRegs:$rs2), "movwtos $rs2, $rd">;
def MOVXTOD : VISInstFormat<0b100011000, (outs DFPRegs:$rd),
(ins I64Regs:$rs2), "movxtod $rs2, $rd">;
}
Expand All @@ -281,6 +281,8 @@ def XMULXHI : VISInst<0b100010110, "xmulxhi", I64Regs>;
// FP immediate patterns.
def fpimm0 : FPImmLeaf<fAny, [{return Imm.isExactlyValue(+0.0);}]>;
def fpnegimm0 : FPImmLeaf<fAny, [{return Imm.isExactlyValue(-0.0);}]>;
def fpimmhalf : FPImmLeaf<fAny, [{return Imm.isExactlyValue(+0.5);}]>;
def fpnegimmhalf : FPImmLeaf<fAny, [{return Imm.isExactlyValue(-0.5);}]>;

// VIS instruction patterns.
let Predicates = [HasVIS] in {
Expand All @@ -293,5 +295,53 @@ def : Pat<(f32 fpnegimm0), (FNEGS (FZEROS))>;

// VIS3 instruction patterns.
let Predicates = [HasVIS3] in {
// +/-0.5 immediate.
// This is needed to enable halving instructions.
// FIXME generalize this to arbitrary immediates.
// SET/MOVWTOS or SETX/MOVXTOD pair should let us materialize FP constants
// faster than constant pool loading.
def : Pat<(f32 fpimmhalf), (MOVWTOS (SETHIi 0x0FC000))>;
def : Pat<(f32 fpnegimmhalf), (MOVWTOS (SETHIi 0x2FC000))>;
def : Pat<(f64 fpimmhalf), (MOVXTOD (SLLXri (SETHIi 0x0FF800), 32))>;
def : Pat<(f64 fpnegimmhalf), (MOVXTOD (SLLXri (SETHIi 0x2FF800), 32))>;

def : Pat<(i64 (adde i64:$lhs, i64:$rhs)), (ADDXCCC $lhs, $rhs)>;

def : Pat<(i64 (ctlz i64:$src)), (LZCNT $src)>;
def : Pat<(i64 (ctlz_zero_undef i64:$src)), (LZCNT $src)>;
// 32-bit LZCNT.
// The zero extension will leave us with 32 extra leading zeros,
// so we need to compensate for it.
// FIXME remove this when the codegen supports using 64-bit values directly
// in V8+ mode.
def : Pat<(i32 (ctlz i32:$src)), (ADDri (LZCNT (SRLri $src, 0)), (i32 -32))>;
def : Pat<(i32 (ctlz_zero_undef i32:$src)), (ADDri (LZCNT (SRLri $src, 0)), (i32 -32))>;

def : Pat<(i32 (bitconvert f32:$src)), (MOVSTOUW $src)>;
def : Pat<(i64 (zanyext (i32 (bitconvert f32:$src)))), (MOVSTOUW $src)>;
def : Pat<(i64 (sext (i32 (bitconvert f32:$src)))), (MOVSTOSW $src)>;
def : Pat<(f32 (bitconvert i32:$src)), (MOVWTOS $src)>;
def : Pat<(i64 (bitconvert f64:$src)), (MOVDTOX $src)>;
def : Pat<(f64 (bitconvert i64:$src)), (MOVXTOD $src)>;

// OP-then-neg FP operations.
def : Pat<(f32 (fneg (fadd f32:$rs1, f32:$rs2))), (FNADDS $rs1, $rs2)>;
def : Pat<(f64 (fneg (fadd f64:$rs1, f64:$rs2))), (FNADDD $rs1, $rs2)>;
def : Pat<(f32 (fneg (fmul f32:$rs1, f32:$rs2))), (FNMULS $rs1, $rs2)>;
def : Pat<(f32 (fmul (fneg f32:$rs1), f32:$rs2)), (FNMULS $rs1, $rs2)>;
def : Pat<(f32 (fmul f32:$rs1, (fneg f32:$rs2))), (FNMULS $rs1, $rs2)>;
def : Pat<(f64 (fneg (fmul f64:$rs1, f64:$rs2))), (FNMULD $rs1, $rs2)>;
def : Pat<(f64 (fmul (fneg f64:$rs1), f64:$rs2)), (FNMULD $rs1, $rs2)>;
def : Pat<(f64 (fmul f64:$rs1, (fneg f64:$rs2))), (FNMULD $rs1, $rs2)>;
def : Pat<(f64 (fneg (fmul (fpextend f32:$rs1), (fpextend f32:$rs2)))), (FNSMULD $rs1, $rs2)>;
def : Pat<(f64 (fmul (fneg (fpextend f32:$rs1)), (fpextend f32:$rs2))), (FNSMULD $rs1, $rs2)>;
def : Pat<(f64 (fmul (fpextend f32:$rs1), (fneg (fpextend f32:$rs2)))), (FNSMULD $rs1, $rs2)>;

// Op-then-halve FP operations.
def : Pat<(f32 (fmul (fadd f32:$rs1, f32:$rs2), fpimmhalf)), (FHADDS $rs1, $rs2)>;
def : Pat<(f64 (fmul (fadd f64:$rs1, f64:$rs2), fpimmhalf)), (FHADDD $rs1, $rs2)>;
def : Pat<(f32 (fmul (fsub f32:$rs1, f32:$rs2), fpimmhalf)), (FHSUBS $rs1, $rs2)>;
def : Pat<(f64 (fmul (fsub f64:$rs1, f64:$rs2), fpimmhalf)), (FHSUBD $rs1, $rs2)>;
def : Pat<(f32 (fmul (fadd f32:$rs1, f32:$rs2), fpnegimmhalf)), (FNHADDS $rs1, $rs2)>;
def : Pat<(f64 (fmul (fadd f64:$rs1, f64:$rs2), fpnegimmhalf)), (FNHADDD $rs1, $rs2)>;
} // Predicates = [HasVIS3]
139 changes: 139 additions & 0 deletions llvm/test/CodeGen/SPARC/bitcast.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=sparcv9 | FileCheck %s -check-prefix=V9
; RUN: llc < %s -mtriple=sparcv9 -mattr=vis3 | FileCheck %s -check-prefix=VIS3

define i32 @stow(float %float) nounwind {
; V9-LABEL: stow:
; V9: ! %bb.0:
; V9-NEXT: add %sp, -144, %sp
; V9-NEXT: st %f1, [%sp+2187]
; V9-NEXT: ld [%sp+2187], %o0
; V9-NEXT: retl
; V9-NEXT: add %sp, 144, %sp
;
; VIS3-LABEL: stow:
; VIS3: ! %bb.0:
; VIS3-NEXT: retl
; VIS3-NEXT: movstouw %f1, %o0
%w = bitcast float %float to i32
ret i32 %w
}

define zeroext i32 @stouw(float %float) nounwind {
; V9-LABEL: stouw:
; V9: ! %bb.0:
; V9-NEXT: add %sp, -144, %sp
; V9-NEXT: st %f1, [%sp+2187]
; V9-NEXT: ld [%sp+2187], %o0
; V9-NEXT: retl
; V9-NEXT: add %sp, 144, %sp
;
; VIS3-LABEL: stouw:
; VIS3: ! %bb.0:
; VIS3-NEXT: retl
; VIS3-NEXT: movstouw %f1, %o0
%uw = bitcast float %float to i32
ret i32 %uw
}

define signext i32 @stosw(float %float) nounwind {
; V9-LABEL: stosw:
; V9: ! %bb.0:
; V9-NEXT: add %sp, -144, %sp
; V9-NEXT: st %f1, [%sp+2187]
; V9-NEXT: ldsw [%sp+2187], %o0
; V9-NEXT: retl
; V9-NEXT: add %sp, 144, %sp
;
; VIS3-LABEL: stosw:
; VIS3: ! %bb.0:
; VIS3-NEXT: retl
; VIS3-NEXT: movstosw %f1, %o0
%sw = bitcast float %float to i32
ret i32 %sw
}

define float @wtos(i32 %w) nounwind {
; V9-LABEL: wtos:
; V9: ! %bb.0:
; V9-NEXT: add %sp, -144, %sp
; V9-NEXT: st %o0, [%sp+2187]
; V9-NEXT: ld [%sp+2187], %f0
; V9-NEXT: retl
; V9-NEXT: add %sp, 144, %sp
;
; VIS3-LABEL: wtos:
; VIS3: ! %bb.0:
; VIS3-NEXT: retl
; VIS3-NEXT: movwtos %o0, %f0
%float = bitcast i32 %w to float
ret float %float
}

define float @uwtos(i32 zeroext %uw) nounwind {
; V9-LABEL: uwtos:
; V9: ! %bb.0:
; V9-NEXT: add %sp, -144, %sp
; V9-NEXT: st %o0, [%sp+2187]
; V9-NEXT: ld [%sp+2187], %f0
; V9-NEXT: retl
; V9-NEXT: add %sp, 144, %sp
;
; VIS3-LABEL: uwtos:
; VIS3: ! %bb.0:
; VIS3-NEXT: retl
; VIS3-NEXT: movwtos %o0, %f0
%float = bitcast i32 %uw to float
ret float %float
}

define float @swtos(i32 signext %sw) nounwind {
; V9-LABEL: swtos:
; V9: ! %bb.0:
; V9-NEXT: add %sp, -144, %sp
; V9-NEXT: st %o0, [%sp+2187]
; V9-NEXT: ld [%sp+2187], %f0
; V9-NEXT: retl
; V9-NEXT: add %sp, 144, %sp
;
; VIS3-LABEL: swtos:
; VIS3: ! %bb.0:
; VIS3-NEXT: retl
; VIS3-NEXT: movwtos %o0, %f0
%float = bitcast i32 %sw to float
ret float %float
}

define i64 @dtox(double %double) nounwind {
; V9-LABEL: dtox:
; V9: ! %bb.0:
; V9-NEXT: add %sp, -144, %sp
; V9-NEXT: std %f0, [%sp+2183]
; V9-NEXT: ldx [%sp+2183], %o0
; V9-NEXT: retl
; V9-NEXT: add %sp, 144, %sp
;
; VIS3-LABEL: dtox:
; VIS3: ! %bb.0:
; VIS3-NEXT: retl
; VIS3-NEXT: movdtox %f0, %o0
%x = bitcast double %double to i64
ret i64 %x
}

define double @xtod(i64 %x) nounwind {
; V9-LABEL: xtod:
; V9: ! %bb.0:
; V9-NEXT: add %sp, -144, %sp
; V9-NEXT: stx %o0, [%sp+2183]
; V9-NEXT: ldd [%sp+2183], %f0
; V9-NEXT: retl
; V9-NEXT: add %sp, 144, %sp
;
; VIS3-LABEL: xtod:
; VIS3: ! %bb.0:
; VIS3-NEXT: retl
; VIS3-NEXT: movxtod %o0, %f0
%double = bitcast i64 %x to double
ret double %double
}
Loading
Loading