-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[AMDGPU] [DO NOT MERGE] Nonsuccessful Attempt At Using SelectionDAG Hooks for abs i8/i16 #167064
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
c1ef28b
97d7dc4
4a0a6fb
9f06b2e
26ced53
da97a2d
81e4dd9
dc248cd
7c81d4b
0154c94
b25fe76
3fd4934
79f6427
ea262d1
5f29238
05bb68e
bf479e5
0d9be16
043a068
f3ecbed
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -42,6 +42,7 @@ | |||||||
| #include "llvm/IR/IntrinsicsR600.h" | ||||||||
| #include "llvm/IR/MDBuilder.h" | ||||||||
| #include "llvm/Support/CommandLine.h" | ||||||||
| #include "llvm/Support/Compiler.h" | ||||||||
| #include "llvm/Support/KnownBits.h" | ||||||||
| #include "llvm/Support/ModRef.h" | ||||||||
| #include "llvm/Transforms/Utils/LowerAtomic.h" | ||||||||
|
|
@@ -177,6 +178,11 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, | |||||||
| addRegisterClass(MVT::v32i16, &AMDGPU::SGPR_512RegClass); | ||||||||
| addRegisterClass(MVT::v32f16, &AMDGPU::SGPR_512RegClass); | ||||||||
| addRegisterClass(MVT::v32bf16, &AMDGPU::SGPR_512RegClass); | ||||||||
|
|
||||||||
| // We don't want the default expansion of 16-bit ABS since we can | ||||||||
| // sign-extend and use the 32-bit ABS operation for 16-bit ABS with SGPRs | ||||||||
| setOperationAction(ISD::ABS, {MVT::i8,MVT::i16}, Custom); | ||||||||
linuxrocks123 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||
| setOperationAction(ISD::SUB, {MVT::i8}, Custom); | ||||||||
linuxrocks123 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||
| } | ||||||||
|
|
||||||||
| addRegisterClass(MVT::v32i32, &AMDGPU::VReg_1024RegClass); | ||||||||
|
|
@@ -974,7 +980,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, | |||||||
| Custom); | ||||||||
| } | ||||||||
|
|
||||||||
| setTargetDAGCombine({ISD::ADD, | ||||||||
| setTargetDAGCombine({ISD::ABS, | ||||||||
| ISD::ADD, | ||||||||
| ISD::PTRADD, | ||||||||
| ISD::UADDO_CARRY, | ||||||||
| ISD::SUB, | ||||||||
|
|
@@ -6710,6 +6717,8 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { | |||||||
| switch (Op.getOpcode()) { | ||||||||
| default: | ||||||||
| return AMDGPUTargetLowering::LowerOperation(Op, DAG); | ||||||||
| case ISD::ABS: | ||||||||
| return lowerABSi16(Op, DAG); | ||||||||
| case ISD::BRCOND: | ||||||||
| return LowerBRCOND(Op, DAG); | ||||||||
| case ISD::RETURNADDR: | ||||||||
|
|
@@ -6773,7 +6782,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { | |||||||
| return lowerTRAP(Op, DAG); | ||||||||
| case ISD::DEBUGTRAP: | ||||||||
| return lowerDEBUGTRAP(Op, DAG); | ||||||||
| case ISD::ABS: | ||||||||
| case ISD::FABS: | ||||||||
| case ISD::FNEG: | ||||||||
| case ISD::FCANONICALIZE: | ||||||||
|
|
@@ -6796,11 +6804,22 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { | |||||||
| case ISD::FP_TO_SINT: | ||||||||
| case ISD::FP_TO_UINT: | ||||||||
| return LowerFP_TO_INT(Op, DAG); | ||||||||
| case ISD::SUB: | ||||||||
| if (Op.getValueType() == MVT::i8) | ||||||||
| if (isNullConstant(Op.getOperand(0)) && | ||||||||
| Op.getOperand(1).getOpcode() == ISD::ABS) | ||||||||
| return DAG.getNode( | ||||||||
| ISD::TRUNCATE, SDLoc(Op), MVT::i8, | ||||||||
| DAG.getNode(ISD::SUB, SDLoc(Op), MVT::i32, | ||||||||
| DAG.getConstant(0, SDLoc(Op), MVT::i32), | ||||||||
| lowerABSi16(Op.getOperand(1), DAG).getOperand(0))); | ||||||||
| else | ||||||||
|
Comment on lines
+6808
to
+6816
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This looks essentially correct, but it's in the wrong place. This is not mandatory lowering, so this should go in PerformDAGCombine. Doesn't this also need the divergence check?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's also in performSubCombine. No worky there, either. And yes it's missing divergence check, but adding inhibitions to the optimization wouldn't make it worky. |
||||||||
| break; | ||||||||
| LLVM_FALLTHROUGH; | ||||||||
| case ISD::SHL: | ||||||||
| case ISD::SRA: | ||||||||
| case ISD::SRL: | ||||||||
| case ISD::ADD: | ||||||||
| case ISD::SUB: | ||||||||
| case ISD::SMIN: | ||||||||
| case ISD::SMAX: | ||||||||
| case ISD::UMIN: | ||||||||
|
|
@@ -7272,7 +7291,7 @@ static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N, | |||||||
| void SITargetLowering::ReplaceNodeResults(SDNode *N, | ||||||||
| SmallVectorImpl<SDValue> &Results, | ||||||||
| SelectionDAG &DAG) const { | ||||||||
| switch (N->getOpcode()) { | ||||||||
| switch (N->getOpcode()) { | ||||||||
| case ISD::INSERT_VECTOR_ELT: { | ||||||||
| if (SDValue Res = lowerINSERT_VECTOR_ELT(SDValue(N, 0), DAG)) | ||||||||
| Results.push_back(Res); | ||||||||
|
|
@@ -8139,6 +8158,25 @@ SDValue SITargetLowering::lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const { | |||||||
| return DAG.getNode(AMDGPUISD::TRAP, SL, MVT::Other, Ops); | ||||||||
| } | ||||||||
|
|
||||||||
| // sign-extend and use the 32-bit ABS operation for 16-bit ABS with SGPRs | ||||||||
| SDValue SITargetLowering::lowerABSi16(SDValue Op, SelectionDAG &DAG) const { | ||||||||
linuxrocks123 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||
| assert(Op.getOpcode() == ISD::ABS && | ||||||||
| "Tried to select abs with non-abs opcode."); | ||||||||
| assert((Op.getValueType() == MVT::i16 || Op.getValueType() == MVT::i8) && | ||||||||
| "Tried to select abs i16 lowering with non-i16 type."); | ||||||||
|
|
||||||||
| // divergent means will not end up using SGPRs | ||||||||
linuxrocks123 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||
| if (Op->isDivergent()) | ||||||||
| return SDValue(); | ||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the opposite of what you want:
Suggested change
To get the default expansion, you return the original node. Return SDValue() means treat as legal
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @arsenm are you sure about that? See https://github.com/linuxrocks123/llvm-project/blob/97c9dddc96f3576fed0762344ce84b2c48e16671/llvm/lib/Target/AMDGPU/SIISelLowering.cpp#L16891. I don't see how
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That is PerformDAGCombine, which is an optional optimization. This is required lowering. The custom lowering falls back on the default expansion:
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Or it's the other way around |
||||||||
|
|
||||||||
| //(abs i16 (i16 op1)) -> (trunc i16 (abs i32 (sext i32 (i16 op1)))) | ||||||||
linuxrocks123 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||
| SDValue Src = Op.getOperand(0); | ||||||||
| SDLoc DL(Src); | ||||||||
| SDValue SExtSrc = DAG.getSExtOrTrunc(Src, DL, MVT::i32); | ||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can slightly generalize this to work on vectors
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does our ISA have a vector equivalent of
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's |
||||||||
| SDValue ExtAbs = DAG.getNode(ISD::ABS, DL, MVT::i32, SExtSrc); | ||||||||
| return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), ExtAbs); | ||||||||
| } | ||||||||
|
|
||||||||
| SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL, | ||||||||
| SelectionDAG &DAG) const { | ||||||||
| if (Subtarget->hasApertureRegs()) { | ||||||||
|
|
@@ -16210,13 +16248,20 @@ SDValue SITargetLowering::performSubCombine(SDNode *N, | |||||||
| return Folded; | ||||||||
| } | ||||||||
|
|
||||||||
| if (VT != MVT::i32) | ||||||||
| return SDValue(); | ||||||||
|
|
||||||||
| SDLoc SL(N); | ||||||||
| SDValue LHS = N->getOperand(0); | ||||||||
| SDValue RHS = N->getOperand(1); | ||||||||
|
|
||||||||
| if (VT == MVT::i8) | ||||||||
| if (isNullConstant(LHS) && RHS->getOpcode() == ISD::ABS) | ||||||||
| return DAG.getNode(ISD::TRUNCATE, SL, MVT::i8, | ||||||||
| DAG.getNode(ISD::SUB, SL, MVT::i32, | ||||||||
| DAG.getConstant(0, SL, MVT::i32), | ||||||||
| lowerABSi16(RHS, DAG).getOperand(0))); | ||||||||
linuxrocks123 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||
|
|
||||||||
| if (VT != MVT::i32) | ||||||||
| return SDValue(); | ||||||||
|
|
||||||||
| // sub x, zext (setcc) => usubo_carry x, 0, setcc | ||||||||
| // sub x, sext (setcc) => uaddo_carry x, 0, setcc | ||||||||
| unsigned Opc = RHS.getOpcode(); | ||||||||
|
|
@@ -16855,6 +16900,10 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, | |||||||
| return SDValue(); | ||||||||
|
|
||||||||
| switch (N->getOpcode()) { | ||||||||
| case ISD::ABS: | ||||||||
| if (N->getValueType(0) == MVT::i16 || N->getValueType(0) == MVT::i8) | ||||||||
| return lowerABSi16(SDValue(N,0), DCI.DAG); | ||||||||
| break; | ||||||||
| case ISD::ADD: | ||||||||
| return performAddCombine(N, DCI); | ||||||||
| case ISD::PTRADD: | ||||||||
|
|
||||||||
Uh oh!
There was an error while loading. Please reload this page.