-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[AMDGPU] [DO NOT MERGE] Nonsuccessful Attempt At Using SelectionDAG Hooks for abs i8/i16 #167064
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
eb489a6
3fbeb0d
6f630bd
1136040
02e54ce
eafd0ce
d74b1f2
f02cfeb
f92fac1
43b7fa2
348729a
2f2affd
d0a4fa4
0b6e6cf
2945521
68298cf
09b3939
e134577
97c9ddd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -42,6 +42,7 @@ | |||||
| #include "llvm/IR/IntrinsicsR600.h" | ||||||
| #include "llvm/IR/MDBuilder.h" | ||||||
| #include "llvm/Support/CommandLine.h" | ||||||
| #include "llvm/Support/Compiler.h" | ||||||
| #include "llvm/Support/KnownBits.h" | ||||||
| #include "llvm/Support/ModRef.h" | ||||||
| #include "llvm/Transforms/Utils/LowerAtomic.h" | ||||||
|
|
@@ -177,6 +178,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, | |||||
| addRegisterClass(MVT::v32i16, &AMDGPU::SGPR_512RegClass); | ||||||
| addRegisterClass(MVT::v32f16, &AMDGPU::SGPR_512RegClass); | ||||||
| addRegisterClass(MVT::v32bf16, &AMDGPU::SGPR_512RegClass); | ||||||
|
|
||||||
| // We don't want the default expansion of 16-bit ABS since we can | ||||||
| // sign-extend and use the 32-bit ABS operation for 16-bit ABS with SGPRs | ||||||
| setOperationAction(ISD::ABS, {MVT::i8,MVT::i16}, Custom); | ||||||
| } | ||||||
|
|
||||||
| addRegisterClass(MVT::v32i32, &AMDGPU::VReg_1024RegClass); | ||||||
|
|
@@ -974,7 +979,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, | |||||
| Custom); | ||||||
| } | ||||||
|
|
||||||
| setTargetDAGCombine({ISD::ADD, | ||||||
| setTargetDAGCombine({ISD::ABS, | ||||||
| ISD::ADD, | ||||||
| ISD::PTRADD, | ||||||
| ISD::UADDO_CARRY, | ||||||
| ISD::SUB, | ||||||
|
|
@@ -6774,6 +6780,9 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { | |||||
| case ISD::DEBUGTRAP: | ||||||
| return lowerDEBUGTRAP(Op, DAG); | ||||||
| case ISD::ABS: | ||||||
| if (Op.getValueType() == MVT::i16 || Op.getValueType() == MVT::i8) | ||||||
| return lowerABSi16(Op, DAG); | ||||||
| LLVM_FALLTHROUGH; | ||||||
| case ISD::FABS: | ||||||
| case ISD::FNEG: | ||||||
| case ISD::FCANONICALIZE: | ||||||
|
|
@@ -7272,7 +7281,7 @@ static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N, | |||||
| void SITargetLowering::ReplaceNodeResults(SDNode *N, | ||||||
| SmallVectorImpl<SDValue> &Results, | ||||||
| SelectionDAG &DAG) const { | ||||||
| switch (N->getOpcode()) { | ||||||
| switch (N->getOpcode()) { | ||||||
| case ISD::INSERT_VECTOR_ELT: { | ||||||
| if (SDValue Res = lowerINSERT_VECTOR_ELT(SDValue(N, 0), DAG)) | ||||||
| Results.push_back(Res); | ||||||
|
|
@@ -7450,6 +7459,15 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N, | |||||
| Results.push_back(lowerFSQRTF16(SDValue(N, 0), DAG)); | ||||||
| break; | ||||||
| } | ||||||
| case ISD::ABS: | ||||||
| if (N->getValueType(0) == MVT::i16 || N->getValueType(0) == MVT::i8) { | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i16 won't reach here in the relevant case. It would happen for <= gfx7 |
||||||
| SDValue result = lowerABSi16(SDValue(N, 0), DAG); | ||||||
| if(result!=SDValue()) { | ||||||
| Results.push_back(result); | ||||||
| return; | ||||||
| } | ||||||
| } | ||||||
| LLVM_FALLTHROUGH; | ||||||
| default: | ||||||
| AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG); | ||||||
| break; | ||||||
|
|
@@ -8139,6 +8157,25 @@ SDValue SITargetLowering::lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const { | |||||
| return DAG.getNode(AMDGPUISD::TRAP, SL, MVT::Other, Ops); | ||||||
| } | ||||||
|
|
||||||
| // sign-extend and use the 32-bit ABS operation for 16-bit ABS with SGPRs | ||||||
| SDValue SITargetLowering::lowerABSi16(SDValue Op, SelectionDAG &DAG) const { | ||||||
| assert(Op.getOpcode() == ISD::ABS && | ||||||
| "Tried to select abs with non-abs opcode."); | ||||||
| assert((Op.getValueType() == MVT::i16 || Op.getValueType() == MVT::i8) && | ||||||
| "Tried to select abs i16 lowering with non-i16 type."); | ||||||
|
|
||||||
| // divergent means will not end up using SGPRs | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is stronger of an assertion than is true, it's just more likely |
||||||
| if (Op->isDivergent()) | ||||||
| return SDValue(); | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the opposite of what you want:
Suggested change
To get the default expansion, you return the original node. Return SDValue() means treat as legal |
||||||
|
|
||||||
| //(abs i16 (i16 op1)) -> (trunc i16 (abs i32 (sext i32 (i16 op1)))) | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| SDValue Src = Op.getOperand(0); | ||||||
| SDLoc DL(Src); | ||||||
| SDValue SExtSrc = DAG.getSExtOrTrunc(Src, DL, MVT::i32); | ||||||
| SDValue ExtAbs = DAG.getNode(ISD::ABS, DL, MVT::i32, SExtSrc); | ||||||
| return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), ExtAbs); | ||||||
| } | ||||||
|
|
||||||
| SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL, | ||||||
| SelectionDAG &DAG) const { | ||||||
| if (Subtarget->hasApertureRegs()) { | ||||||
|
|
@@ -16855,6 +16892,10 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, | |||||
| return SDValue(); | ||||||
|
|
||||||
| switch (N->getOpcode()) { | ||||||
| case ISD::ABS: | ||||||
| if (N->getValueType(0) == MVT::i16 || N->getValueType(0) == MVT::i8) | ||||||
| return lowerABSi16(SDValue(N,0), DCI.DAG); | ||||||
| break; | ||||||
| case ISD::ADD: | ||||||
| return performAddCombine(N, DCI); | ||||||
| case ISD::PTRADD: | ||||||
|
|
||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It will be easier to ignore i8 for now and just get i16 working. i8 adds way more problems if you're having trouble with the easy case