-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[AMDGPU] Use getSignedConstant() where necessary #117328
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Create signed constant using getSignedConstant(), to avoid future assertion failures when we disable implicit truncation in getConstant(). This also touches some generic legalization code, which apparently only AMDGPU tests.
Member
|
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-amdgpu Author: Nikita Popov (nikic) ChangesCreate signed constant using getSignedConstant(), to avoid future assertion failures when we disable implicit truncation in getConstant(). This also touches some generic legalization code, which apparently only AMDGPU tests. Full diff: https://github.com/llvm/llvm-project/pull/117328.diff 7 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 1480bd98c685e1..fbc96bade15f5a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -2330,10 +2330,10 @@ SDValue SelectionDAGLegalize::expandLdexp(SDNode *Node) const {
const APFloat::ExponentType MinExpVal = APFloat::semanticsMinExponent(FltSem);
const int Precision = APFloat::semanticsPrecision(FltSem);
- const SDValue MaxExp = DAG.getConstant(MaxExpVal, dl, ExpVT);
- const SDValue MinExp = DAG.getConstant(MinExpVal, dl, ExpVT);
+ const SDValue MaxExp = DAG.getSignedConstant(MaxExpVal, dl, ExpVT);
+ const SDValue MinExp = DAG.getSignedConstant(MinExpVal, dl, ExpVT);
- const SDValue DoubleMaxExp = DAG.getConstant(2 * MaxExpVal, dl, ExpVT);
+ const SDValue DoubleMaxExp = DAG.getSignedConstant(2 * MaxExpVal, dl, ExpVT);
const APFloat One(FltSem, "1.0");
APFloat ScaleUpK = scalbn(One, MaxExpVal, APFloat::rmNearestTiesToEven);
@@ -2375,7 +2375,7 @@ SDValue SelectionDAGLegalize::expandLdexp(SDNode *Node) const {
SDValue IncN0 = DAG.getNode(ISD::ADD, dl, ExpVT, N, Increment0, NUW_NSW);
SDValue ClampMinVal =
- DAG.getConstant(3 * MinExpVal + 2 * Precision, dl, ExpVT);
+ DAG.getSignedConstant(3 * MinExpVal + 2 * Precision, dl, ExpVT);
SDValue ClampN_Small = DAG.getNode(ISD::SMAX, dl, ExpVT, N, ClampMinVal);
SDValue IncN1 =
DAG.getNode(ISD::ADD, dl, ExpVT, ClampN_Small, Increment1, NSW);
@@ -2385,8 +2385,8 @@ SDValue SelectionDAGLegalize::expandLdexp(SDNode *Node) const {
SDValue ScaleDown1 = DAG.getNode(ISD::FMUL, dl, VT, ScaleDown0, ScaleDownVal);
SDValue ScaleDownTwice = DAG.getSetCC(
- dl, SetCCVT, N, DAG.getConstant(2 * MinExpVal + Precision, dl, ExpVT),
- ISD::SETULT);
+ dl, SetCCVT, N,
+ DAG.getSignedConstant(2 * MinExpVal + Precision, dl, ExpVT), ISD::SETULT);
SDValue SelectN_Small =
DAG.getNode(ISD::SELECT, dl, ExpVT, ScaleDownTwice, IncN1, IncN0);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 151d56292b53d6..ad61c8195f9abf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -408,7 +408,8 @@ SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const {
unsigned AS = cast<MemSDNode>(N)->getAddressSpace();
if (AS == AMDGPUAS::LOCAL_ADDRESS) {
if (Subtarget->ldsRequiresM0Init())
- return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
+ return glueCopyToM0(
+ N, CurDAG->getSignedTargetConstant(-1, SDLoc(N), MVT::i32));
} else if (AS == AMDGPUAS::REGION_ADDRESS) {
MachineFunction &MF = CurDAG->getMachineFunction();
unsigned Value = MF.getInfo<SIMachineFunctionInfo>()->getGDSSize();
@@ -1724,7 +1725,7 @@ bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(SDNode *N, SDValue Addr,
}
VAddr = Addr;
- Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
+ Offset = CurDAG->getSignedTargetConstant(OffsetVal, SDLoc(), MVT::i32);
return true;
}
@@ -1832,7 +1833,7 @@ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
}
if (SAddr) {
- Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
+ Offset = CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
return true;
}
}
@@ -1848,7 +1849,7 @@ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32,
CurDAG->getTargetConstant(0, SDLoc(), MVT::i32));
VOffset = SDValue(VMov, 0);
- Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
+ Offset = CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
return true;
}
@@ -1903,13 +1904,13 @@ bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr,
SDValue AddOffset =
SAddr.getOpcode() == ISD::TargetFrameIndex
? getMaterializedScalarImm32(Lo_32(RemainderOffset), DL)
- : CurDAG->getTargetConstant(RemainderOffset, DL, MVT::i32);
+ : CurDAG->getSignedTargetConstant(RemainderOffset, DL, MVT::i32);
SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, DL, MVT::i32,
SAddr, AddOffset),
0);
}
- Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i32);
+ Offset = CurDAG->getSignedTargetConstant(COffsetVal, DL, MVT::i32);
return true;
}
@@ -2058,7 +2059,7 @@ bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
std::optional<int64_t> EncodedOffset = AMDGPU::getSMRDEncodedOffset(
*Subtarget, ByteOffset, IsBuffer, HasSOffset);
if (EncodedOffset && Offset && !Imm32Only) {
- *Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
+ *Offset = CurDAG->getSignedTargetConstant(*EncodedOffset, SL, MVT::i32);
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 3cc4bd92f6471a..d77508227b076b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2333,7 +2333,7 @@ SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op,
SDValue RHS = Op.getOperand(1);
SDValue Zero = DAG.getConstant(0, DL, VT);
- SDValue NegOne = DAG.getConstant(-1, DL, VT);
+ SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
if (VT == MVT::i32) {
if (SDValue Res = LowerDIVREM24(Op, DAG, true))
@@ -3794,7 +3794,11 @@ static SDValue constantFoldBFE(SelectionDAG &DAG, IntTy Src0, uint32_t Offset,
if (Width + Offset < 32) {
uint32_t Shl = static_cast<uint32_t>(Src0) << (32 - Offset - Width);
IntTy Result = static_cast<IntTy>(Shl) >> (32 - Width);
- return DAG.getConstant(Result, DL, MVT::i32);
+ if constexpr (std::is_signed_v<IntTy>) {
+ return DAG.getSignedConstant(Result, DL, MVT::i32);
+ } else {
+ return DAG.getConstant(Result, DL, MVT::i32);
+ }
}
return DAG.getConstant(Src0 >> Offset, DL, MVT::i32);
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index 1b88fdd3ab2e1c..c2e952418f1be2 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -919,7 +919,7 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
} else if (CompareVT == MVT::i32) {
- HWTrue = DAG.getConstant(-1, DL, CompareVT);
+ HWTrue = DAG.getAllOnesConstant(DL, CompareVT);
HWFalse = DAG.getConstant(0, DL, CompareVT);
}
else {
@@ -949,7 +949,7 @@ SDValue R600TargetLowering::lowerADDRSPACECAST(SDValue Op,
unsigned DestAS = ASC->getDestAddressSpace();
if (isNullConstant(Op.getOperand(0)) && SrcAS == AMDGPUAS::FLAT_ADDRESS)
- return DAG.getConstant(TM.getNullPointerValue(DestAS), SL, VT);
+ return DAG.getSignedConstant(TM.getNullPointerValue(DestAS), SL, VT);
return Op;
}
@@ -1750,11 +1750,11 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
}
return DAG.getNode(ISD::SELECT_CC, DL, N->getValueType(0),
- SelectCC.getOperand(0), // LHS
- SelectCC.getOperand(1), // RHS
- DAG.getConstant(-1, DL, MVT::i32), // True
- DAG.getConstant(0, DL, MVT::i32), // False
- SelectCC.getOperand(4)); // CC
+ SelectCC.getOperand(0), // LHS
+ SelectCC.getOperand(1), // RHS
+ DAG.getAllOnesConstant(DL, MVT::i32), // True
+ DAG.getConstant(0, DL, MVT::i32), // False
+ SelectCC.getOperand(4)); // CC
}
// insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index e520dfff1016b2..35a69db3295603 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4019,10 +4019,11 @@ SDValue SITargetLowering::lowerDYNAMIC_STACKALLOCImpl(SDValue Op,
Align StackAlign = TFL->getStackAlign();
Tmp1 = DAG.getNode(Opc, dl, VT, SP, ScaledSize); // Value
if (Alignment && *Alignment > StackAlign) {
- Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1,
- DAG.getConstant(-(uint64_t)Alignment->value()
- << Subtarget->getWavefrontSizeLog2(),
- dl, VT));
+ Tmp1 = DAG.getNode(
+ ISD::AND, dl, VT, Tmp1,
+ DAG.getSignedConstant(-(uint64_t)Alignment->value()
+ << Subtarget->getWavefrontSizeLog2(),
+ dl, VT));
}
Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain
@@ -6771,10 +6772,10 @@ SDValue SITargetLowering::lowerFLDEXP(SDValue Op, SelectionDAG &DAG) const {
// TODO: This should be a generic narrowing legalization, and can easily be
// for GlobalISel.
- SDValue MinExp = DAG.getConstant(minIntN(16), DL, ExpVT);
+ SDValue MinExp = DAG.getSignedConstant(minIntN(16), DL, ExpVT);
SDValue ClampMin = DAG.getNode(ISD::SMAX, DL, ExpVT, Exp, MinExp);
- SDValue MaxExp = DAG.getConstant(maxIntN(16), DL, ExpVT);
+ SDValue MaxExp = DAG.getSignedConstant(maxIntN(16), DL, ExpVT);
SDValue Clamp = DAG.getNode(ISD::SMIN, DL, ExpVT, ClampMin, MaxExp);
SDValue TruncExp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Clamp);
@@ -7542,11 +7543,11 @@ SDValue SITargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
SDValue Vec0 = SVN->getOperand(VecIdx0);
SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, Vec0,
- DAG.getConstant(EltIdx0, SL, MVT::i32));
+ DAG.getSignedConstant(EltIdx0, SL, MVT::i32));
SDValue Vec1 = SVN->getOperand(VecIdx1);
SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, Vec1,
- DAG.getConstant(EltIdx1, SL, MVT::i32));
+ DAG.getSignedConstant(EltIdx1, SL, MVT::i32));
Pieces.push_back(DAG.getBuildVector(PackVT, SL, {Elt0, Elt1}));
}
}
@@ -9618,7 +9619,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
// On GFX12 lower s_barrier into s_barrier_signal_imm and s_barrier_wait
if (ST.hasSplitBarriers()) {
SDValue K =
- DAG.getTargetConstant(AMDGPU::Barrier::WORKGROUP, DL, MVT::i32);
+ DAG.getSignedTargetConstant(AMDGPU::Barrier::WORKGROUP, DL, MVT::i32);
SDValue BarSignal =
SDValue(DAG.getMachineNode(AMDGPU::S_BARRIER_SIGNAL_IMM, DL,
MVT::Other, K, Op.getOperand(0)),
@@ -11173,8 +11174,9 @@ SDValue SITargetLowering::lowerFSQRTF32(SDValue Op, SelectionDAG &DAG) const {
SqrtS = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, SqrtID, SqrtX, Flags);
SDValue SqrtSAsInt = DAG.getNode(ISD::BITCAST, DL, MVT::i32, SqrtS);
- SDValue SqrtSNextDownInt = DAG.getNode(ISD::ADD, DL, MVT::i32, SqrtSAsInt,
- DAG.getConstant(-1, DL, MVT::i32));
+ SDValue SqrtSNextDownInt =
+ DAG.getNode(ISD::ADD, DL, MVT::i32, SqrtSAsInt,
+ DAG.getAllOnesConstant(DL, MVT::i32));
SDValue SqrtSNextDown = DAG.getNode(ISD::BITCAST, DL, VT, SqrtSNextDownInt);
SDValue NegSqrtSNextDown =
@@ -11296,7 +11298,7 @@ SDValue SITargetLowering::lowerFSQRTF64(SDValue Op, SelectionDAG &DAG) const {
SDValue SqrtRet = DAG.getNode(ISD::FMA, DL, MVT::f64, SqrtD1, SqrtH1, SqrtS2);
- SDValue ScaleDownFactor = DAG.getConstant(-128, DL, MVT::i32);
+ SDValue ScaleDownFactor = DAG.getSignedConstant(-128, DL, MVT::i32);
SDValue ScaleDown =
DAG.getNode(ISD::SELECT, DL, MVT::i32, Scaling, ScaleDownFactor, ZeroInt);
SqrtRet = DAG.getNode(ISD::FLDEXP, DL, MVT::f64, SqrtRet, ScaleDown, Flags);
@@ -14689,7 +14691,7 @@ SDValue SITargetLowering::performSetCCCombine(SDNode *N,
(CRHS->isZero() &&
(CC == ISD::SETEQ || CC == ISD::SETGE || CC == ISD::SETULE)))
return DAG.getNode(ISD::XOR, SL, MVT::i1, LHS.getOperand(0),
- DAG.getConstant(-1, SL, MVT::i1));
+ DAG.getAllOnesConstant(SL, MVT::i1));
if ((CRHS->isAllOnes() &&
(CC == ISD::SETEQ || CC == ISD::SETLE || CC == ISD::SETUGE)) ||
(CRHS->isZero() &&
@@ -14715,7 +14717,7 @@ SDValue SITargetLowering::performSetCCCombine(SDNode *N,
if ((CF == CRHSVal && CC == ISD::SETEQ) ||
(CT == CRHSVal && CC == ISD::SETNE))
return DAG.getNode(ISD::XOR, SL, MVT::i1, LHS.getOperand(0),
- DAG.getConstant(-1, SL, MVT::i1));
+ DAG.getAllOnesConstant(SL, MVT::i1));
if ((CF == CRHSVal && CC == ISD::SETNE) ||
(CT == CRHSVal && CC == ISD::SETEQ))
return LHS.getOperand(0);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index afb0b2cede045c..73a64a7552ac8b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -812,27 +812,29 @@ def as_i8imm : SDNodeXForm<imm, [{
}]>;
def as_i8timm : SDNodeXForm<timm, [{
- return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16);
+ return CurDAG->getSignedTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16);
}]>;
def as_i16imm : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16);
+ return CurDAG->getSignedTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16);
}]>;
def as_i16timm : SDNodeXForm<timm, [{
- return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16);
+ // Explicit cast, as this is used with both signed and unsigned immediates.
+ return CurDAG->getSignedTargetConstant(int16_t(N->getSExtValue()), SDLoc(N),
+ MVT::i16);
}]>;
def as_i32imm: SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
+ return CurDAG->getSignedTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
}]>;
def as_i32timm: SDNodeXForm<timm, [{
- return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
+ return CurDAG->getSignedTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
}]>;
def as_i64imm: SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64);
+ return CurDAG->getSignedTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64);
}]>;
def cond_as_i32imm: SDNodeXForm<cond, [{
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 3f211e7cbdde50..bc25d75131cc35 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -3727,7 +3727,7 @@ def FPPow2ToExponentXForm : SDNodeXForm<fpimm, [{
const auto &APF = N->getValueAPF();
int Log2 = APF.getExactLog2Abs();
assert(Log2 != INT_MIN);
- return CurDAG->getTargetConstant(Log2, SDLoc(N), MVT::i32);
+ return CurDAG->getSignedTargetConstant(Log2, SDLoc(N), MVT::i32);
}]>;
// Check if a floating point value is a power of 2 floating-point
|
arsenm
approved these changes
Nov 22, 2024
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Create signed constant using getSignedConstant(), to avoid future assertion failures when we disable implicit truncation in getConstant().
This also touches some generic legalization code, which apparently only AMDGPU tests.