Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 43 additions & 2 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
#include "llvm/IR/IntrinsicsR600.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/ModRef.h"
#include "llvm/Transforms/Utils/LowerAtomic.h"
Expand Down Expand Up @@ -177,6 +178,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::v32i16, &AMDGPU::SGPR_512RegClass);
addRegisterClass(MVT::v32f16, &AMDGPU::SGPR_512RegClass);
addRegisterClass(MVT::v32bf16, &AMDGPU::SGPR_512RegClass);

// We don't want the default expansion of 16-bit ABS since we can
// sign-extend and use the 32-bit ABS operation for 16-bit ABS with SGPRs
setOperationAction(ISD::ABS, {MVT::i8,MVT::i16}, Custom);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It will be easier to ignore i8 for now and just get i16 working. i8 adds way more problems if you're having trouble with the easy case

}

addRegisterClass(MVT::v32i32, &AMDGPU::VReg_1024RegClass);
Expand Down Expand Up @@ -974,7 +979,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
Custom);
}

setTargetDAGCombine({ISD::ADD,
setTargetDAGCombine({ISD::ABS,
ISD::ADD,
ISD::PTRADD,
ISD::UADDO_CARRY,
ISD::SUB,
Expand Down Expand Up @@ -6774,6 +6780,9 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::DEBUGTRAP:
return lowerDEBUGTRAP(Op, DAG);
case ISD::ABS:
if (Op.getValueType() == MVT::i16 || Op.getValueType() == MVT::i8)
return lowerABSi16(Op, DAG);
LLVM_FALLTHROUGH;
case ISD::FABS:
case ISD::FNEG:
case ISD::FCANONICALIZE:
Expand Down Expand Up @@ -7272,7 +7281,7 @@ static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
void SITargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const {
switch (N->getOpcode()) {
switch (N->getOpcode()) {
case ISD::INSERT_VECTOR_ELT: {
if (SDValue Res = lowerINSERT_VECTOR_ELT(SDValue(N, 0), DAG))
Results.push_back(Res);
Expand Down Expand Up @@ -7450,6 +7459,15 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(lowerFSQRTF16(SDValue(N, 0), DAG));
break;
}
case ISD::ABS:
if (N->getValueType(0) == MVT::i16 || N->getValueType(0) == MVT::i8) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i16 won't reach here in the relevant case. It would happen for <= gfx7

SDValue result = lowerABSi16(SDValue(N, 0), DAG);
if(result!=SDValue()) {
Results.push_back(result);
return;
}
}
LLVM_FALLTHROUGH;
default:
AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
break;
Expand Down Expand Up @@ -8139,6 +8157,25 @@ SDValue SITargetLowering::lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(AMDGPUISD::TRAP, SL, MVT::Other, Ops);
}

// sign-extend and use the 32-bit ABS operation for 16-bit ABS with SGPRs
SDValue SITargetLowering::lowerABSi16(SDValue Op, SelectionDAG &DAG) const {
assert(Op.getOpcode() == ISD::ABS &&
"Tried to select abs with non-abs opcode.");
assert((Op.getValueType() == MVT::i16 || Op.getValueType() == MVT::i8) &&
"Tried to select abs i16 lowering with non-i16 type.");

// divergent means will not end up using SGPRs
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is stronger of an assertion than is true, it's just more likely

if (Op->isDivergent())
return SDValue();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the opposite of what you want:

Suggested change
return SDValue();
return Op;

To get the default expansion, you return the original node. Return SDValue() means treat as legal


//(abs i16 (i16 op1)) -> (trunc i16 (abs i32 (sext i32 (i16 op1))))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
//(abs i16 (i16 op1)) -> (trunc i16 (abs i32 (sext i32 (i16 op1))))
// (abs i16 (i16 op1)) -> (trunc i16 (abs i32 (sext i32 (i16 op1))))

SDValue Src = Op.getOperand(0);
SDLoc DL(Src);
SDValue SExtSrc = DAG.getSExtOrTrunc(Src, DL, MVT::i32);
SDValue ExtAbs = DAG.getNode(ISD::ABS, DL, MVT::i32, SExtSrc);
return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), ExtAbs);
}

SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
SelectionDAG &DAG) const {
if (Subtarget->hasApertureRegs()) {
Expand Down Expand Up @@ -16855,6 +16892,10 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
return SDValue();

switch (N->getOpcode()) {
case ISD::ABS:
if (N->getValueType(0) == MVT::i16 || N->getValueType(0) == MVT::i8)
return lowerABSi16(SDValue(N,0), DCI.DAG);
break;
case ISD::ADD:
return performAddCombine(N, DCI);
case ISD::PTRADD:
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/SIISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
SDValue lowerTrapHsaQueuePtr(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerTrapHsa(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerABSi16(SDValue Op, SelectionDAG &DAG) const;

SDNode *adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;

Expand Down
Loading
Loading