-
Notifications
You must be signed in to change notification settings - Fork 15k
[AMDGPU] Legalize 64bit elements for BUILD_VECTOR on gfx942 #145052
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -360,9 +360,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, | |
| // Most operations are naturally 32-bit vector operations. We only support | ||
| // load and store of i64 vectors, so promote v2i64 vector operations to v4i32. | ||
| for (MVT Vec64 : {MVT::v2i64, MVT::v2f64}) { | ||
| setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote); | ||
| AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v4i32); | ||
|
|
||
| if (!STI.hasMovB64()) { | ||
| setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote); | ||
| AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v4i32); | ||
| } | ||
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote); | ||
| AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v4i32); | ||
|
|
||
|
|
@@ -374,9 +375,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, | |
| } | ||
|
|
||
| for (MVT Vec64 : {MVT::v3i64, MVT::v3f64}) { | ||
| setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote); | ||
| AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v6i32); | ||
|
|
||
| if (!STI.hasMovB64()) { | ||
| setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote); | ||
| AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v6i32); | ||
| } | ||
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote); | ||
| AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v6i32); | ||
|
|
||
|
|
@@ -388,9 +390,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, | |
| } | ||
|
|
||
| for (MVT Vec64 : {MVT::v4i64, MVT::v4f64}) { | ||
| setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote); | ||
| AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v8i32); | ||
|
|
||
| if (!STI.hasMovB64()) { | ||
| setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote); | ||
| AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v8i32); | ||
| } | ||
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote); | ||
| AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v8i32); | ||
|
|
||
|
|
@@ -402,9 +405,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, | |
| } | ||
|
|
||
| for (MVT Vec64 : {MVT::v8i64, MVT::v8f64}) { | ||
| setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote); | ||
| AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v16i32); | ||
|
|
||
| if (!STI.hasMovB64()) { | ||
| setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote); | ||
| AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v16i32); | ||
| } | ||
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote); | ||
| AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v16i32); | ||
|
|
||
|
|
@@ -416,9 +420,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, | |
| } | ||
|
|
||
| for (MVT Vec64 : {MVT::v16i64, MVT::v16f64}) { | ||
| setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote); | ||
| AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v32i32); | ||
|
|
||
| if (!STI.hasMovB64()) { | ||
| setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote); | ||
| AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v32i32); | ||
| } | ||
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote); | ||
| AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v32i32); | ||
|
|
||
|
|
@@ -977,6 +982,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, | |
|
|
||
| setTargetDAGCombine({ISD::ADD, | ||
| ISD::PTRADD, | ||
| ISD::BUILD_VECTOR, | ||
| ISD::UADDO_CARRY, | ||
| ISD::SUB, | ||
| ISD::USUBO_CARRY, | ||
|
|
@@ -15103,6 +15109,18 @@ bool SITargetLowering::shouldExpandVectorDynExt(SDNode *N) const { | |
| EltSize, NumElem, Idx->isDivergent(), getSubtarget()); | ||
| } | ||
|
|
||
| static unsigned getMappedVectorIndex(unsigned Idx, EVT From, EVT To) { | ||
| assert(From.isVector() && To.isVector() && | ||
| "Expected From and To types to be vector types."); | ||
| assert(From.getSizeInBits() == To.getSizeInBits() && | ||
| "Expected From and To vector types require to have the same size."); | ||
|
|
||
| unsigned FromNumElts = From.getVectorNumElements(); | ||
| unsigned ToNumElts = To.getVectorNumElements(); | ||
|
|
||
| return (Idx * ToNumElts) / FromNumElts; | ||
| } | ||
|
|
||
| SDValue | ||
| SITargetLowering::performExtractVectorEltCombine(SDNode *N, | ||
| DAGCombinerInfo &DCI) const { | ||
|
|
@@ -15186,6 +15204,27 @@ SITargetLowering::performExtractVectorEltCombine(SDNode *N, | |
| } | ||
| } | ||
|
|
||
| // if PeekThoughBitcast(Vec)[MapIdx(CIdx)] == undef && | ||
| // VecEltSize < PeekThroughEltSize, then | ||
| // EXTRACT_VECTOR_ELT(bitcast(build_vector(..., undef, ...)), CIdx) => undef | ||
| auto *IndexC = dyn_cast<ConstantSDNode>(N->getOperand(1)); | ||
| SDValue PeekThroughVec = peekThroughBitcasts(Vec); | ||
| EVT PeekThroughVecVT = PeekThroughVec.getValueType(); | ||
| if (IndexC && PeekThroughVec.getOpcode() == ISD::BUILD_VECTOR && | ||
| PeekThroughVecVT.isFixedLengthVector()) { | ||
| EVT PeekThroughVecEltVT = PeekThroughVecVT.getVectorElementType(); | ||
| // Small elt size vectors to big elt size vectors are the cases covered for | ||
| // now (e.g., v4i32 bitcast(v2i64)) which may be conservative. | ||
| if (VecEltSize < PeekThroughVecEltVT.getSizeInBits()) { | ||
| unsigned IndexVal = IndexC->getZExtValue(); | ||
| unsigned MappedIndexVal = | ||
| getMappedVectorIndex(IndexVal, VecVT, PeekThroughVecVT); | ||
| SDValue PeekThroughElt = PeekThroughVec.getOperand(MappedIndexVal); | ||
| if (PeekThroughElt.isUndef()) | ||
| return DAG.getNode(PeekThroughElt.getOpcode(), SDLoc(), VecEltVT); | ||
| } | ||
| } | ||
|
|
||
| // EXTRACT_VECTOR_ELT (<n x e>, var-idx) => n x select (e, const-idx) | ||
| if (shouldExpandVectorDynExt(N)) { | ||
| SDLoc SL(N); | ||
|
|
@@ -16792,6 +16831,95 @@ SDValue SITargetLowering::performSelectCombine(SDNode *N, | |
| SelectLHS, SelectRHS); | ||
| } | ||
|
|
||
| SDValue | ||
| SITargetLowering::performBuildVectorCombine(SDNode *N, | ||
| DAGCombinerInfo &DCI) const { | ||
| // TODO: legalize for all targets instead of just v_mov_b64 enabled ones, | ||
| // legalizing could still enable s_mov_b64 which is supported on all targets. | ||
| const GCNSubtarget *ST = getSubtarget(); | ||
| if (DCI.Level < AfterLegalizeDAG || !ST->hasMovB64()) | ||
| return SDValue(); | ||
|
|
||
| SelectionDAG &DAG = DCI.DAG; | ||
| SDLoc SL(N); | ||
|
|
||
| EVT VT = N->getValueType(0); | ||
| EVT EltVT = VT.getVectorElementType(); | ||
| unsigned SizeBits = VT.getSizeInBits(); | ||
| unsigned EltSize = EltVT.getSizeInBits(); | ||
|
|
||
| // Skip if: | ||
| // - Value type isn't multiple of 64 bit (e.g., v3i32), or | ||
| // - Element type has already been combined into 64b elements | ||
| if ((SizeBits % 64) != 0 || EltVT == MVT::i64 || EltVT == MVT::f64) | ||
| return SDValue(); | ||
|
|
||
| // Construct the 64b values. | ||
| SmallVector<uint64_t, 8> ImmVals; | ||
| uint64_t ImmVal = 0; | ||
| uint64_t ImmSize = 0; | ||
| for (SDValue Opand : N->ops()) { | ||
| // Build_vector with constants only. | ||
| ConstantSDNode *C = dyn_cast<ConstantSDNode>(Opand); | ||
| ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Opand); | ||
| BuildVectorSDNode *BV = | ||
| dyn_cast<BuildVectorSDNode>(peekThroughBitcasts(Opand)); | ||
|
|
||
| if (!C && !FPC && !BV) | ||
| return SDValue(); | ||
|
|
||
| uint64_t Val = 0; | ||
| if (BV) { | ||
| if (!BV->isConstant()) | ||
| return SDValue(); | ||
| bool IsLE = DAG.getDataLayout().isLittleEndian(); | ||
| BitVector UndefElements; | ||
| SmallVector<APInt> RawBits; | ||
| if (!BV->getConstantRawBits(IsLE, EltSize, RawBits, UndefElements)) | ||
| return SDValue(); | ||
|
|
||
| assert(RawBits.size() == 1 && | ||
| "BuildVector constant value retrieval expected 1 element"); | ||
|
|
||
| if (UndefElements.any()) | ||
| return SDValue(); | ||
|
|
||
| Val = RawBits[0].getZExtValue(); | ||
| } else { | ||
| Val = C ? C->getZExtValue() | ||
| : FPC->getValueAPF().bitcastToAPInt().getZExtValue(); | ||
| } | ||
| ImmVal |= Val << ImmSize; | ||
JanekvO marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| ImmSize += EltSize; | ||
|
||
| if (ImmSize == 64) { | ||
| if (!isUInt<32>(ImmVal)) | ||
| return SDValue(); | ||
| ImmVals.push_back(ImmVal); | ||
| ImmVal = 0; | ||
| ImmSize = 0; | ||
| } | ||
| } | ||
|
|
||
| // Avoid emitting build_vector with 1 element and directly emit value. | ||
| if (ImmVals.size() == 1) { | ||
| SDValue Val = DAG.getConstant(ImmVals[0], SL, MVT::i64); | ||
| return DAG.getBitcast(VT, Val); | ||
| } | ||
|
|
||
| // Construct and return build_vector with 64b elements. | ||
| if (!ImmVals.empty()) { | ||
| SmallVector<SDValue, 8> VectorConsts(ImmVals.size()); | ||
| for (unsigned i = 0; i < ImmVals.size(); ++i) | ||
| VectorConsts[i] = DAG.getConstant(ImmVals[i], SL, MVT::i64); | ||
| unsigned NewNumElts = SizeBits / 64; | ||
| LLVMContext &Ctx = *DAG.getContext(); | ||
| EVT NewVT = EVT::getVectorVT(Ctx, MVT::i64, NewNumElts); | ||
| SDValue BV = DAG.getBuildVector(NewVT, SL, VectorConsts); | ||
| return DAG.getBitcast(VT, BV); | ||
| } | ||
| return SDValue(); | ||
| } | ||
|
|
||
| SDValue SITargetLowering::PerformDAGCombine(SDNode *N, | ||
| DAGCombinerInfo &DCI) const { | ||
| switch (N->getOpcode()) { | ||
|
|
@@ -16885,6 +17013,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, | |
| return performFCanonicalizeCombine(N, DCI); | ||
| case AMDGPUISD::RCP: | ||
| return performRcpCombine(N, DCI); | ||
| case ISD::BUILD_VECTOR: | ||
| return performBuildVectorCombine(N, DCI); | ||
| case ISD::FLDEXP: | ||
| case AMDGPUISD::FRACT: | ||
| case AMDGPUISD::RSQ: | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.