Skip to content
Open
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {

switch (getTypeAction(InVT)) {
case TargetLowering::TypeLegal:
// Try and use in-register bitcast
if (SDValue Res = LowerBitcastInRegister(N))
return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, Res);
// Fallback to stack load store
break;

case TargetLowering::TypePromoteInteger:
if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector() && !NInVT.isVector())
// The input promotes to the same size. Convert the promoted value.
Expand Down Expand Up @@ -2174,8 +2179,11 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N) {
}

SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) {
// This should only occur in unusual situations like bitcasting to an
// x86_fp80, so just turn it into a store+load
// Try and use in register bitcast
if (SDValue Res = LowerBitcastInRegister(N))
return Res;

// Fallback
return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0));
}

Expand Down
78 changes: 78 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include "LegalizeTypes.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
Expand Down Expand Up @@ -910,6 +911,83 @@ SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op,
return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo(), Align);
}

SDValue DAGTypeLegalizer::LowerBitcastInRegister(SDNode *N) const {
// Lower a bitcast into in-register shift operations
assert(N->getOpcode() == ISD::BITCAST && "Unexpected opcode!");

EVT FromVT = N->getOperand(0)->getValueType(0);
EVT ToVT = N->getValueType(0);

SDLoc DL(N);

bool IsBigEndian = DAG.getDataLayout().isBigEndian();

if (FromVT.isVector() && ToVT.isScalarInteger()) {

EVT ElemVT = FromVT.getVectorElementType();
unsigned NumElems = FromVT.getVectorNumElements();
unsigned ElemBits = ElemVT.getSizeInBits();

unsigned PackedBits = ToVT.getSizeInBits();
assert(PackedBits >= ElemBits * NumElems &&
"Scalar type does not have enough bits to pack vector values.");

EVT PackVT = EVT::getIntegerVT(*DAG.getContext(), ElemBits * NumElems);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How can PackVT != ToVT if its a scalar integer?

SDValue Packed = DAG.getConstant(0, DL, PackVT);

EVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());

for (unsigned I = 0; I < NumElems; ++I) {
unsigned ElementIndex = IsBigEndian ? (NumElems - 1 - I) : I;

SDValue Index = DAG.getConstant(ElementIndex, DL, IdxTy);

SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
N->getOperand(0), Index);

SDValue ExtElem = DAG.getNode(ISD::ZERO_EXTEND, DL, PackVT, Elem);
SDValue ShiftAmount =
DAG.getShiftAmountConstant(ElemBits * I, PackVT, DL);
SDValue ShiftedElem =
DAG.getNode(ISD::SHL, DL, PackVT, ExtElem, ShiftAmount);

Packed = DAG.getNode(ISD::OR, DL, PackVT, Packed, ShiftedElem);
}

return DAG.getBitcast(ToVT, Packed);
}

if (FromVT.isScalarInteger() && ToVT.isVector()) {

EVT ElemVT = ToVT.getVectorElementType();
unsigned NumElems = ToVT.getVectorNumElements();
unsigned ElemBits = ElemVT.getSizeInBits();

unsigned PackedBits = FromVT.getSizeInBits();
assert(PackedBits >= ElemBits * NumElems &&
"Vector does not have enough bits to unpack scalar type.");

SmallVector<SDValue, 8> Elements(NumElems);

EVT ShiftTy = TLI.getShiftAmountTy(FromVT, DAG.getDataLayout());

for (unsigned I = 0; I < NumElems; ++I) {
unsigned ElementIndex = IsBigEndian ? (NumElems - 1 - I) : I;
unsigned ShiftAmountVal = ElemBits * ElementIndex;

SDValue ShiftAmount = DAG.getConstant(ShiftAmountVal, DL, ShiftTy);
SDValue Shifted =
DAG.getNode(ISD::SRL, DL, FromVT, N->getOperand(0), ShiftAmount);
SDValue Element = DAG.getNode(ISD::TRUNCATE, DL, ElemVT, Shifted);
Elements[I] = Element;
}

return DAG.getBuildVector(ToVT, DL, Elements);
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(style) remove braces

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we gaining anything from having PackBitcastInRegister/UnpackBitcastInRegister vs merging them inside LowerBitcastInRegister?


return {};
}

/// Replace the node's results with custom code provided by the target and
/// return "true", or do nothing and return "false".
/// The last parameter is FALSE if we are dealing with a node with legal
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue BitConvertToInteger(SDValue Op);
SDValue BitConvertVectorToIntegerVector(SDValue Op);
SDValue CreateStackStoreLoad(SDValue Op, EVT DestVT);
SDValue LowerBitcastInRegister(SDNode *N) const;
bool CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult);
bool CustomWidenLowerNode(SDNode *N, EVT VT);

Expand Down
61 changes: 0 additions & 61 deletions llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -472,13 +472,6 @@ VectorizePTXValueVTs(const SmallVectorImpl<EVT> &ValueVTs,
return VectorInfo;
}

static SDValue MaybeBitcast(SelectionDAG &DAG, SDLoc DL, EVT VT,
SDValue Value) {
if (Value->getValueType(0) == VT)
return Value;
return DAG.getNode(ISD::BITCAST, DL, VT, Value);
}

// NVPTXTargetLowering Constructor.
NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
const NVPTXSubtarget &STI)
Expand Down Expand Up @@ -622,9 +615,6 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i8, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i8, Custom);

// Custom conversions to/from v2i8.
setOperationAction(ISD::BITCAST, MVT::v2i8, Custom);

// Only logical ops can be done on v4i8 directly, others must be done
// elementwise.
setOperationAction(
Expand Down Expand Up @@ -2086,30 +2076,6 @@ NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
return DAG.getBuildVector(Node->getValueType(0), dl, Ops);
}

SDValue NVPTXTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
// Handle bitcasting from v2i8 without hitting the default promotion
// strategy which goes through stack memory.
EVT FromVT = Op->getOperand(0)->getValueType(0);
if (FromVT != MVT::v2i8) {
return Op;
}

// Pack vector elements into i16 and bitcast to final type
SDLoc DL(Op);
SDValue Vec0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i8,
Op->getOperand(0), DAG.getIntPtrConstant(0, DL));
SDValue Vec1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i8,
Op->getOperand(0), DAG.getIntPtrConstant(1, DL));
SDValue Extend0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i16, Vec0);
SDValue Extend1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i16, Vec1);
SDValue Const8 = DAG.getConstant(8, DL, MVT::i16);
SDValue AsInt = DAG.getNode(
ISD::OR, DL, MVT::i16,
{Extend0, DAG.getNode(ISD::SHL, DL, MVT::i16, {Extend1, Const8})});
EVT ToVT = Op->getValueType(0);
return MaybeBitcast(DAG, DL, ToVT, AsInt);
}

// We can init constant f16x2/v2i16/v4i8 with a single .b32 move. Normally it
// would get lowered as two constant loads and vector-packing move.
// Instead we want just a constant move:
Expand Down Expand Up @@ -2618,8 +2584,6 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return Op;
case ISD::BUILD_VECTOR:
return LowerBUILD_VECTOR(Op, DAG);
case ISD::BITCAST:
return LowerBITCAST(Op, DAG);
case ISD::EXTRACT_SUBVECTOR:
return Op;
case ISD::EXTRACT_VECTOR_ELT:
Expand Down Expand Up @@ -5202,28 +5166,6 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
return SDValue();
}

static void ReplaceBITCAST(SDNode *Node, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &Results) {
// Handle bitcasting to v2i8 without hitting the default promotion
// strategy which goes through stack memory.
SDValue Op(Node, 0);
EVT ToVT = Op->getValueType(0);
if (ToVT != MVT::v2i8) {
return;
}

// Bitcast to i16 and unpack elements into a vector
SDLoc DL(Node);
SDValue AsInt = MaybeBitcast(DAG, DL, MVT::i16, Op->getOperand(0));
SDValue Vec0 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, AsInt);
SDValue Const8 = DAG.getConstant(8, DL, MVT::i16);
SDValue Vec1 =
DAG.getNode(ISD::TRUNCATE, DL, MVT::i8,
DAG.getNode(ISD::SRL, DL, MVT::i16, {AsInt, Const8}));
Results.push_back(
DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2i8, {Vec0, Vec1}));
}

/// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &Results) {
Expand Down Expand Up @@ -5459,9 +5401,6 @@ void NVPTXTargetLowering::ReplaceNodeResults(
switch (N->getOpcode()) {
default:
report_fatal_error("Unhandled custom legalization");
case ISD::BITCAST:
ReplaceBITCAST(N, DAG, Results);
return;
case ISD::LOAD:
ReplaceLoadVector(N, DAG, Results);
return;
Expand Down
2 changes: 0 additions & 2 deletions llvm/lib/Target/NVPTX/NVPTXISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -265,8 +265,6 @@ class NVPTXTargetLowering : public TargetLowering {
const NVPTXSubtarget &STI; // cache the subtarget here
SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const;

SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;

SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
Expand Down
28 changes: 13 additions & 15 deletions llvm/test/CodeGen/AArch64/bitcast.ll
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,15 @@ define <4 x i16> @foo2(<2 x i32> %a) {
define i32 @bitcast_v4i8_i32(<4 x i8> %a, <4 x i8> %b){
; CHECK-SD-LABEL: bitcast_v4i8_i32:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: sub sp, sp, #16
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
; CHECK-SD-NEXT: add v0.4h, v0.4h, v1.4h
; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b
; CHECK-SD-NEXT: fmov w0, s0
; CHECK-SD-NEXT: add sp, sp, #16
; CHECK-SD-NEXT: add v0.4h, v0.4h, v1.4h
; CHECK-SD-NEXT: umov w8, v0.h[0]
; CHECK-SD-NEXT: umov w9, v0.h[1]
; CHECK-SD-NEXT: umov w10, v0.h[2]
; CHECK-SD-NEXT: and w8, w8, #0xff
; CHECK-SD-NEXT: bfi w8, w9, #8, #8
; CHECK-SD-NEXT: umov w9, v0.h[3]
; CHECK-SD-NEXT: bfi w8, w10, #16, #8
; CHECK-SD-NEXT: orr w0, w8, w9, lsl #24
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: bitcast_v4i8_i32:
Expand Down Expand Up @@ -99,15 +102,10 @@ define <4 x i8> @bitcast_i32_v4i8(i32 %a, i32 %b){
define i32 @bitcast_v2i16_i32(<2 x i16> %a, <2 x i16> %b){
; CHECK-SD-LABEL: bitcast_v2i16_i32:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: sub sp, sp, #16
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
; CHECK-SD-NEXT: add v0.2s, v0.2s, v1.2s
; CHECK-SD-NEXT: mov w8, v0.s[1]
; CHECK-SD-NEXT: fmov w9, s0
; CHECK-SD-NEXT: strh w9, [sp, #12]
; CHECK-SD-NEXT: strh w8, [sp, #14]
; CHECK-SD-NEXT: ldr w0, [sp, #12]
; CHECK-SD-NEXT: add sp, sp, #16
; CHECK-SD-NEXT: add v0.2s, v0.2s, v1.2s
; CHECK-SD-NEXT: mov w8, v0.s[1]
; CHECK-SD-NEXT: fmov w0, s0
; CHECK-SD-NEXT: bfi w0, w8, #16, #16
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: bitcast_v2i16_i32:
Expand Down
60 changes: 25 additions & 35 deletions llvm/test/CodeGen/AArch64/shufflevector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -229,15 +229,17 @@ define <2 x i1> @shufflevector_v2i1(<2 x i1> %a, <2 x i1> %b){
define i32 @shufflevector_v4i8(<4 x i8> %a, <4 x i8> %b){
; CHECK-SD-LABEL: shufflevector_v4i8:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: sub sp, sp, #16
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
; CHECK-SD-NEXT: ext v0.8b, v1.8b, v0.8b, #6
; CHECK-SD-NEXT: zip1 v1.4h, v1.4h, v0.4h
; CHECK-SD-NEXT: ext v0.8b, v0.8b, v1.8b, #4
; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b
; CHECK-SD-NEXT: fmov w0, s0
; CHECK-SD-NEXT: add sp, sp, #16
; CHECK-SD-NEXT: ret
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: umov w8, v0.h[1]
; CHECK-SD-NEXT: umov w9, v0.h[2]
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-SD-NEXT: umov w10, v1.h[0]
; CHECK-SD-NEXT: and w8, w8, #0xff
; CHECK-SD-NEXT: bfi w8, w9, #8, #8
; CHECK-SD-NEXT: umov w9, v1.h[3]
; CHECK-SD-NEXT: bfi w8, w10, #16, #8
; CHECK-SD-NEXT: orr w0, w8, w9, lsl #24
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shufflevector_v4i8:
; CHECK-GI: // %bb.0:
Expand Down Expand Up @@ -285,15 +287,11 @@ define <32 x i8> @shufflevector_v32i8(<32 x i8> %a, <32 x i8> %b){
define i32 @shufflevector_v2i16(<2 x i16> %a, <2 x i16> %b){
; CHECK-SD-LABEL: shufflevector_v2i16:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: sub sp, sp, #16
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
; CHECK-SD-NEXT: ext v0.8b, v0.8b, v1.8b, #4
; CHECK-SD-NEXT: mov w8, v0.s[1]
; CHECK-SD-NEXT: fmov w9, s0
; CHECK-SD-NEXT: strh w9, [sp, #12]
; CHECK-SD-NEXT: strh w8, [sp, #14]
; CHECK-SD-NEXT: ldr w0, [sp, #12]
; CHECK-SD-NEXT: add sp, sp, #16
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: mov w0, v0.s[1]
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-SD-NEXT: fmov w8, s1
; CHECK-SD-NEXT: bfi w0, w8, #16, #16
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shufflevector_v2i16:
Expand Down Expand Up @@ -462,14 +460,13 @@ define <2 x i1> @shufflevector_v2i1_zeroes(<2 x i1> %a, <2 x i1> %b){
define i32 @shufflevector_v4i8_zeroes(<4 x i8> %a, <4 x i8> %b){
; CHECK-SD-LABEL: shufflevector_v4i8_zeroes:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: sub sp, sp, #16
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: dup v0.4h, v0.h[0]
; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b
; CHECK-SD-NEXT: fmov w0, s0
; CHECK-SD-NEXT: add sp, sp, #16
; CHECK-SD-NEXT: ret
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: umov w8, v0.h[0]
; CHECK-SD-NEXT: and w9, w8, #0xff
; CHECK-SD-NEXT: orr w9, w9, w9, lsl #8
; CHECK-SD-NEXT: bfi w9, w8, #16, #8
; CHECK-SD-NEXT: orr w0, w9, w8, lsl #24
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shufflevector_v4i8_zeroes:
; CHECK-GI: // %bb.0:
Expand All @@ -495,16 +492,9 @@ define <32 x i8> @shufflevector_v32i8_zeroes(<32 x i8> %a, <32 x i8> %b){
define i32 @shufflevector_v2i16_zeroes(<2 x i16> %a, <2 x i16> %b){
; CHECK-SD-LABEL: shufflevector_v2i16_zeroes:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: sub sp, sp, #16
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: dup v1.2s, v0.s[0]
; CHECK-SD-NEXT: fmov w9, s0
; CHECK-SD-NEXT: strh w9, [sp, #12]
; CHECK-SD-NEXT: mov w8, v1.s[1]
; CHECK-SD-NEXT: strh w8, [sp, #14]
; CHECK-SD-NEXT: ldr w0, [sp, #12]
; CHECK-SD-NEXT: add sp, sp, #16
; CHECK-SD-NEXT: fmov w0, s0
; CHECK-SD-NEXT: bfi w0, w0, #16, #16
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: shufflevector_v2i16_zeroes:
Expand Down
Loading
Loading