Skip to content

Commit ecfa7f9

Browse files
committed
i8
1 parent 5a264cf commit ecfa7f9

File tree

6 files changed

+38
-29
lines changed

6 files changed

+38
-29
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5580,8 +5580,8 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase {
55805580
/// \param N Node to expand
55815581
/// \param IsNegative indicate negated abs
55825582
/// \returns The expansion result or SDValue() if it fails.
5583-
SDValue expandABS(SDNode *N, SelectionDAG &DAG,
5584-
bool IsNegative = false) const;
5583+
virtual SDValue expandABS(SDNode *N, SelectionDAG &DAG,
5584+
bool IsNegative = false) const;
55855585

55865586
/// Expand ABDS/ABDU nodes. Expands vector/scalar ABDS/ABDU nodes.
55875587
/// \param N Node to expand

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5286,6 +5286,28 @@ SDValue AMDGPUTargetLowering::performRcpCombine(SDNode *N,
52865286
return DCI.DAG.getConstantFP(One / Val, SDLoc(N), N->getValueType(0));
52875287
}
52885288

5289+
SDValue AMDGPUTargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
5290+
bool IsNegative) const {
5291+
if (N->isDivergent() ||
5292+
(N->getValueType(0) != MVT::i8 && N->getValueType(0) != MVT::i16))
5293+
return TargetLowering::expandABS(N, DAG, IsNegative);
5294+
5295+
//(abs i8/i16 (i8/i16 op1)) -> (trunc i8/i16 (abs i32 (sext i32 (i8/i16
5296+
// op1))))
5297+
SDValue Src = N->getOperand(0);
5298+
SDLoc DL(Src);
5299+
SDValue SExtSrc = DAG.getSExtOrTrunc(Src, DL, MVT::i32);
5300+
SDValue ExtAbs = DAG.getNode(ISD::ABS, DL, MVT::i32, SExtSrc);
5301+
SDValue TruncResult =
5302+
DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), ExtAbs);
5303+
5304+
if (!IsNegative)
5305+
return TruncResult;
5306+
5307+
return DAG.getNode(ISD::SUB, DL, N->getValueType(0),
5308+
DAG.getConstant(0, DL, N->getValueType(0)), TruncResult);
5309+
}
5310+
52895311
SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
52905312
DAGCombinerInfo &DCI) const {
52915313
SelectionDAG &DAG = DCI.DAG;

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ class AMDGPUTargetLowering : public TargetLowering {
4545
/// original size will not change the value.
4646
static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG);
4747

48+
virtual SDValue expandABS(SDNode *N, SelectionDAG &DAG,
49+
bool IsNegative = false) const override;
50+
4851
protected:
4952
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
5053
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -178,10 +178,6 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
178178
addRegisterClass(MVT::v32i16, &AMDGPU::SGPR_512RegClass);
179179
addRegisterClass(MVT::v32f16, &AMDGPU::SGPR_512RegClass);
180180
addRegisterClass(MVT::v32bf16, &AMDGPU::SGPR_512RegClass);
181-
182-
// We don't want the default expansion of 16-bit ABS since we can
183-
// sign-extend and use the 32-bit ABS operation for 16-bit ABS with SGPRs
184-
setOperationAction(ISD::ABS, MVT::i16, Custom);
185181
}
186182

187183
addRegisterClass(MVT::v32i32, &AMDGPU::VReg_1024RegClass);
@@ -6779,9 +6775,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
67796775
case ISD::DEBUGTRAP:
67806776
return lowerDEBUGTRAP(Op, DAG);
67816777
case ISD::ABS:
6782-
if (Op.getValueType() == MVT::i16)
6783-
return lowerABSi16(Op, DAG);
6784-
LLVM_FALLTHROUGH;
67856778
case ISD::FABS:
67866779
case ISD::FNEG:
67876780
case ISD::FCANONICALIZE:
@@ -8147,25 +8140,6 @@ SDValue SITargetLowering::lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const {
81478140
return DAG.getNode(AMDGPUISD::TRAP, SL, MVT::Other, Ops);
81488141
}
81498142

8150-
// sign-extend and use the 32-bit ABS operation for 16-bit ABS with SGPRs
8151-
SDValue SITargetLowering::lowerABSi16(SDValue Op, SelectionDAG &DAG) const {
8152-
assert(Op.getOpcode() == ISD::ABS &&
8153-
"Tried to select abs with non-abs opcode.");
8154-
assert(Op.getValueType() == MVT::i16 &&
8155-
"Tried to select abs i16 lowering with non-i16 type.");
8156-
8157-
// Divergent operands will not end up using SGPRs.
8158-
if (Op->isDivergent())
8159-
return SDValue();
8160-
8161-
//(abs i16 (i16 op1)) -> (trunc i16 (abs i32 (sext i32 (i16 op1))))
8162-
SDValue Src = Op.getOperand(0);
8163-
SDLoc DL(Src);
8164-
SDValue SExtSrc = DAG.getSExtOrTrunc(Src, DL, MVT::i32);
8165-
SDValue ExtAbs = DAG.getNode(ISD::ABS, DL, MVT::i32, SExtSrc);
8166-
return DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, ExtAbs);
8167-
}
8168-
81698143
SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
81708144
SelectionDAG &DAG) const {
81718145
if (Subtarget->hasApertureRegs()) {

llvm/lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,6 @@ class SITargetLowering final : public AMDGPUTargetLowering {
184184
SDValue lowerTrapHsaQueuePtr(SDValue Op, SelectionDAG &DAG) const;
185185
SDValue lowerTrapHsa(SDValue Op, SelectionDAG &DAG) const;
186186
SDValue lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const;
187-
SDValue lowerABSi16(SDValue Op, SelectionDAG &DAG) const;
188187

189188
SDNode *adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;
190189

llvm/test/CodeGen/AMDGPU/llvm.abs.ll

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,17 @@ declare <2 x i16> @llvm.abs.v2i16(<2 x i16>, i1)
1717
declare <3 x i16> @llvm.abs.v3i16(<3 x i16>, i1)
1818
declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1)
1919

20+
define amdgpu_cs i8 @abs_sgpr_i8(i8 inreg %arg) {
21+
%res = call i8 @llvm.abs.i8(i8 %arg, i1 false)
22+
ret i8 %res
23+
}
24+
25+
define amdgpu_cs i8 @abs_sgpr_i8_neg(i8 inreg %arg) {
26+
%res1 = call i8 @llvm.abs.i8(i8 %arg, i1 false)
27+
%res2 = sub i8 0, %res1
28+
ret i8 %res2
29+
}
30+
2031
define amdgpu_cs i16 @abs_sgpr_i16(i16 inreg %arg) {
2132
; SDAG6-LABEL: abs_sgpr_i16:
2233
; SDAG6: ; %bb.0:

0 commit comments

Comments
 (0)