Skip to content

Commit c14e992

Browse files
committed
Reapply "AMDGPU: Move reg_sequence splat handling (llvm#140313)"
This reverts commit c8d27ce.
1 parent d8a7254 commit c14e992

File tree

3 files changed

+28
-44
lines changed

3 files changed

+28
-44
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 27 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,12 @@ class SIFoldOperandsImpl {
219219
const FoldableDef &OpToFold) const;
220220
bool isUseSafeToFold(const MachineInstr &MI,
221221
const MachineOperand &UseMO) const;
222-
bool
222+
223+
const TargetRegisterClass *getRegSeqInit(
224+
MachineInstr &RegSeq,
225+
SmallVectorImpl<std::pair<MachineOperand *, unsigned>> &Defs) const;
226+
227+
const TargetRegisterClass *
223228
getRegSeqInit(SmallVectorImpl<std::pair<MachineOperand *, unsigned>> &Defs,
224229
Register UseReg) const;
225230

@@ -935,19 +940,24 @@ static MachineOperand *lookUpCopyChain(const SIInstrInfo &TII,
935940
return Sub;
936941
}
937942

938-
// Find a def of the UseReg, check if it is a reg_sequence and find initializers
939-
// for each subreg, tracking it to foldable inline immediate if possible.
940-
// Returns true on success.
941-
bool SIFoldOperandsImpl::getRegSeqInit(
942-
SmallVectorImpl<std::pair<MachineOperand *, unsigned>> &Defs,
943-
Register UseReg, uint8_t OpTy) const {
944-
MachineInstr *Def = MRI->getVRegDef(UseReg);
945-
if (!Def || !Def->isRegSequence())
946-
return false;
943+
const TargetRegisterClass *SIFoldOperandsImpl::getRegSeqInit(
944+
MachineInstr &RegSeq,
945+
SmallVectorImpl<std::pair<MachineOperand *, unsigned>> &Defs) const {
947946

948-
for (unsigned I = 1, E = Def->getNumExplicitOperands(); I != E; I += 2) {
949-
MachineOperand &SrcOp = Def->getOperand(I);
950-
unsigned SubRegIdx = Def->getOperand(I + 1).getImm();
947+
assert(RegSeq.isRegSequence());
948+
949+
const TargetRegisterClass *RC = nullptr;
950+
951+
for (unsigned I = 1, E = RegSeq.getNumExplicitOperands(); I != E; I += 2) {
952+
MachineOperand &SrcOp = RegSeq.getOperand(I);
953+
unsigned SubRegIdx = RegSeq.getOperand(I + 1).getImm();
954+
955+
// Only accept reg_sequence with uniform reg class inputs for simplicity.
956+
const TargetRegisterClass *OpRC = getRegOpRC(*MRI, *TRI, SrcOp);
957+
if (!RC)
958+
RC = OpRC;
959+
else if (!TRI->getCommonSubClass(RC, OpRC))
960+
return nullptr;
951961

952962
if (SrcOp.getSubReg()) {
953963
// TODO: Handle subregister compose
@@ -956,8 +966,7 @@ bool SIFoldOperandsImpl::getRegSeqInit(
956966
}
957967

958968
MachineOperand *DefSrc = lookUpCopyChain(*TII, *MRI, SrcOp.getReg());
959-
if (DefSrc && (DefSrc->isReg() ||
960-
(DefSrc->isImm() && TII->isInlineConstant(*DefSrc, OpTy)))) {
969+
if (DefSrc && (DefSrc->isReg() || DefSrc->isImm())) {
961970
Defs.emplace_back(DefSrc, SubRegIdx);
962971
continue;
963972
}
@@ -1112,7 +1121,6 @@ bool SIFoldOperandsImpl::tryToFoldACImm(
11121121
if (!AMDGPU::isSISrcOperand(Desc, UseOpIdx))
11131122
return false;
11141123

1115-
uint8_t OpTy = Desc.operands()[UseOpIdx].OperandType;
11161124
MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
11171125
if (OpToFold.isImm() && OpToFold.isOperandLegal(*TII, *UseMI, UseOpIdx)) {
11181126
appendFoldCandidate(FoldList, UseMI, UseOpIdx, OpToFold);
@@ -1147,31 +1155,7 @@ bool SIFoldOperandsImpl::tryToFoldACImm(
11471155
}
11481156
}
11491157

1150-
SmallVector<std::pair<MachineOperand*, unsigned>, 32> Defs;
1151-
if (!getRegSeqInit(Defs, UseReg, OpTy))
1152-
return false;
1153-
1154-
int32_t Imm;
1155-
for (unsigned I = 0, E = Defs.size(); I != E; ++I) {
1156-
const MachineOperand *Op = Defs[I].first;
1157-
if (!Op->isImm())
1158-
return false;
1159-
1160-
auto SubImm = Op->getImm();
1161-
if (!I) {
1162-
Imm = SubImm;
1163-
if (!TII->isInlineConstant(*Op, OpTy) ||
1164-
!TII->isOperandLegal(*UseMI, UseOpIdx, Op))
1165-
return false;
1166-
1167-
continue;
1168-
}
1169-
if (Imm != SubImm)
1170-
return false; // Can only fold splat constants
1171-
}
1172-
1173-
appendFoldCandidate(FoldList, UseMI, UseOpIdx, Defs[0].first);
1174-
return true;
1158+
return false;
11751159
}
11761160

11771161
void SIFoldOperandsImpl::foldOperand(
@@ -1236,6 +1220,7 @@ void SIFoldOperandsImpl::foldOperand(
12361220
foldOperand(OpToFold, RSUseMI, RSUseMI->getOperandNo(RSUse), FoldList,
12371221
CopiesToReplace);
12381222
}
1223+
12391224
return;
12401225
}
12411226

@@ -2387,7 +2372,7 @@ bool SIFoldOperandsImpl::tryFoldRegSequence(MachineInstr &MI) {
23872372
return false;
23882373

23892374
SmallVector<std::pair<MachineOperand*, unsigned>, 32> Defs;
2390-
if (!getRegSeqInit(Defs, Reg, MCOI::OPERAND_REGISTER))
2375+
if (!getRegSeqInit(Defs, Reg))
23912376
return false;
23922377

23932378
for (auto &[Op, SubIdx] : Defs) {

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1346,7 +1346,7 @@ unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
13461346

13471347
unsigned getVGPRReductionToIncreaseWavesPerEU(const MCSubtargetInfo *STI,
13481348
unsigned NumVGPRs) {
1349-
unsigned Granule = getVGPRAllocGranule(STI);
1349+
unsigned Granule = getVGPRAllocGranule(STI, 0, std::nullopt);
13501350
unsigned MaxWaves = getMaxWavesPerEU(STI);
13511351
unsigned TotalNumVGPRs = getTotalNumVGPRs(STI);
13521352

llvm/test/CodeGen/AMDGPU/packed-fp32.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2769,7 +2769,6 @@ define amdgpu_kernel void @fma_v2_v_lit_splat(ptr addrspace(1) %a) {
27692769
; PACKED-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
27702770
; PACKED-SDAG-NEXT: s_endpgm
27712771
;
2772-
<<<<<<< HEAD
27732772
; GFX90A-GISEL-LABEL: fma_v2_v_lit_splat:
27742773
; GFX90A-GISEL: ; %bb.0:
27752774
; GFX90A-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24

0 commit comments

Comments
 (0)