Skip to content

Commit 14d4d7c

Browse files
committed
Attempt #4, with DAG again
1 parent c9b8683 commit 14d4d7c

File tree

6 files changed

+27
-183
lines changed

6 files changed

+27
-183
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.h

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ FunctionPass *createSIAnnotateControlFlowLegacyPass();
3939
FunctionPass *createSIFoldOperandsLegacyPass();
4040
FunctionPass *createSIPeepholeSDWALegacyPass();
4141
FunctionPass *createSILowerI1CopiesLegacyPass();
42-
FunctionPass *createSISAbs16FixupLegacyPass();
4342
FunctionPass *createSIShrinkInstructionsLegacyPass();
4443
FunctionPass *createSILoadStoreOptimizerLegacyPass();
4544
FunctionPass *createSIWholeQuadModeLegacyPass();
@@ -94,13 +93,6 @@ class SILowerI1CopiesPass : public PassInfoMixin<SILowerI1CopiesPass> {
9493
MachineFunctionAnalysisManager &MFAM);
9594
};
9695

97-
class SISAbs16FixupPass : public PassInfoMixin<SISAbs16FixupPass> {
98-
public:
99-
SISAbs16FixupPass() = default;
100-
PreservedAnalyses run(MachineFunction &MF,
101-
MachineFunctionAnalysisManager &MFAM);
102-
};
103-
10496
void initializeAMDGPUDAGToDAGISelLegacyPass(PassRegistry &);
10597

10698
void initializeAMDGPUAlwaysInlinePass(PassRegistry&);
@@ -205,9 +197,6 @@ extern char &SILowerWWMCopiesLegacyID;
205197
void initializeSILowerI1CopiesLegacyPass(PassRegistry &);
206198
extern char &SILowerI1CopiesLegacyID;
207199

208-
void initializeSISAbs16FixupLegacyPass(PassRegistry &);
209-
extern char &SISAbs16FixupLegacyID;
210-
211200
void initializeAMDGPUGlobalISelDivergenceLoweringPass(PassRegistry &);
212201
extern char &AMDGPUGlobalISelDivergenceLoweringID;
213202

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -551,7 +551,6 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
551551
initializeAMDGPUPrepareAGPRAllocLegacyPass(*PR);
552552
initializeGCNDPPCombineLegacyPass(*PR);
553553
initializeSILowerI1CopiesLegacyPass(*PR);
554-
initializeSISAbs16FixupLegacyPass(*PR);
555554
initializeAMDGPUGlobalISelDivergenceLoweringPass(*PR);
556555
initializeAMDGPURegBankSelectPass(*PR);
557556
initializeAMDGPURegBankLegalizePass(*PR);
@@ -1518,7 +1517,6 @@ bool GCNPassConfig::addInstSelector() {
15181517
AMDGPUPassConfig::addInstSelector();
15191518
addPass(&SIFixSGPRCopiesLegacyID);
15201519
addPass(createSILowerI1CopiesLegacyPass());
1521-
addPass(createSISAbs16FixupLegacyPass());
15221520
return false;
15231521
}
15241522

@@ -2211,7 +2209,6 @@ Error AMDGPUCodeGenPassBuilder::addInstSelector(AddMachinePass &addPass) const {
22112209
addPass(AMDGPUISelDAGToDAGPass(TM));
22122210
addPass(SIFixSGPRCopiesPass());
22132211
addPass(SILowerI1CopiesPass());
2214-
addPass(SISAbs16FixupPass());
22152212
return Error::success();
22162213
}
22172214

llvm/lib/Target/AMDGPU/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,6 @@ add_llvm_target(AMDGPUCodeGen
185185
SIPreEmitPeephole.cpp
186186
SIProgramInfo.cpp
187187
SIRegisterInfo.cpp
188-
SISAbs16Fixup.cpp
189188
SIShrinkInstructions.cpp
190189
SIWholeQuadMode.cpp
191190

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
177177
addRegisterClass(MVT::v32i16, &AMDGPU::SGPR_512RegClass);
178178
addRegisterClass(MVT::v32f16, &AMDGPU::SGPR_512RegClass);
179179
addRegisterClass(MVT::v32bf16, &AMDGPU::SGPR_512RegClass);
180+
181+
// We don't want the default expansion of 16-bit ABS since we can
182+
// sign-extend and use the 32-bit ABS operation for 16-bit ABS with SGPRs
183+
setOperationAction(ISD::ABS, MVT::i16, Custom);
180184
}
181185

182186
addRegisterClass(MVT::v32i32, &AMDGPU::VReg_1024RegClass);
@@ -6774,6 +6778,9 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
67746778
case ISD::DEBUGTRAP:
67756779
return lowerDEBUGTRAP(Op, DAG);
67766780
case ISD::ABS:
6781+
if (Op.getValueType() == MVT::i16)
6782+
return lowerABSi16(Op, DAG);
6783+
// fall through
67776784
case ISD::FABS:
67786785
case ISD::FNEG:
67796786
case ISD::FCANONICALIZE:
@@ -8133,6 +8140,25 @@ SDValue SITargetLowering::lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const {
81338140
return DAG.getNode(AMDGPUISD::TRAP, SL, MVT::Other, Ops);
81348141
}
81358142

8143+
// sign-extend and use the 32-bit ABS operation for 16-bit ABS with SGPRs
8144+
SDValue SITargetLowering::lowerABSi16(SDValue Op, SelectionDAG &DAG) const {
8145+
assert(Op.getOpcode() == ISD::ABS &&
8146+
"Tried to select abs with non-abs opcode.");
8147+
assert(Op.getValueType() == MVT::i16 &&
8148+
"Tried to select abs i16 lowering with non-i16 type.");
8149+
8150+
// divergent means will not end up using SGPRs
8151+
if (Op->isDivergent())
8152+
return SDValue();
8153+
8154+
//(abs i16 (i16 op1)) -> (trunc i16 (abs i32 (sext i32 (i16 op1))))
8155+
SDValue Src = Op.getOperand(0);
8156+
SDLoc DL(Src);
8157+
SDValue SExtSrc = DAG.getSExtOrTrunc(Src, DL, MVT::i32);
8158+
SDValue ExtAbs = DAG.getNode(ISD::ABS, DL, MVT::i32, SExtSrc);
8159+
return DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, ExtAbs);
8160+
}
8161+
81368162
SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
81378163
SelectionDAG &DAG) const {
81388164
if (Subtarget->hasApertureRegs()) {

llvm/lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
184184
SDValue lowerTrapHsaQueuePtr(SDValue Op, SelectionDAG &DAG) const;
185185
SDValue lowerTrapHsa(SDValue Op, SelectionDAG &DAG) const;
186186
SDValue lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const;
187+
SDValue lowerABSi16(SDValue Op, SelectionDAG &DAG) const;
187188

188189
SDNode *adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;
189190

llvm/lib/Target/AMDGPU/SISAbs16Fixup.cpp

Lines changed: 0 additions & 168 deletions
This file was deleted.

0 commit comments

Comments
 (0)