Skip to content

Commit a32040e

Browse files
authored
[AMDGPU] Use 64-bit literals in codegen on gfx1250 (#148727)
1 parent 56a4f8d commit a32040e

File tree

6 files changed

+707
-6
lines changed

6 files changed

+707
-6
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -447,14 +447,42 @@ void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
447447
return;
448448
}
449449

450+
bool IsGCN = CurDAG->getSubtarget().getTargetTriple().isAMDGCN();
451+
if (IsGCN && Subtarget->has64BitLiterals() && VT.getSizeInBits() == 64 &&
452+
CurDAG->isConstantValueOfAnyType(SDValue(N, 0))) {
453+
uint64_t C = 0;
454+
bool AllConst = true;
455+
unsigned EltSize = EltVT.getSizeInBits();
456+
for (unsigned I = 0; I < NumVectorElts; ++I) {
457+
SDValue Op = N->getOperand(I);
458+
if (Op.isUndef()) {
459+
AllConst = false;
460+
break;
461+
}
462+
uint64_t Val;
463+
if (ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(Op)) {
464+
Val = CF->getValueAPF().bitcastToAPInt().getZExtValue();
465+
} else
466+
Val = cast<ConstantSDNode>(Op)->getZExtValue();
467+
C |= Val << (EltSize * I);
468+
}
469+
if (AllConst) {
470+
SDValue CV = CurDAG->getTargetConstant(C, DL, MVT::i64);
471+
MachineSDNode *Copy =
472+
CurDAG->getMachineNode(AMDGPU::S_MOV_B64_IMM_PSEUDO, DL, VT, CV);
473+
CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, VT, SDValue(Copy, 0),
474+
RegClass);
475+
return;
476+
}
477+
}
478+
450479
assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not "
451480
"supported yet");
452481
// 32 = Max Num Vector Elements
453482
// 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
454483
// 1 = Vector Register Class
455484
SmallVector<SDValue, 32 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
456485

457-
bool IsGCN = CurDAG->getSubtarget().getTargetTriple().isAMDGCN();
458486
RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
459487
bool IsRegSeq = true;
460488
unsigned NOps = N->getNumOperands();
@@ -676,7 +704,8 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
676704

677705
case ISD::Constant:
678706
case ISD::ConstantFP: {
679-
if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
707+
if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N) ||
708+
Subtarget->has64BitLiterals())
680709
break;
681710

682711
uint64_t Imm;

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12155,6 +12155,11 @@ SDValue SITargetLowering::splitBinaryBitConstantOp(
1215512155
if ((bitOpWithConstantIsReducible(Opc, ValLo) ||
1215612156
bitOpWithConstantIsReducible(Opc, ValHi)) ||
1215712157
(CRHS->hasOneUse() && !TII->isInlineConstant(CRHS->getAPIntValue()))) {
12158+
// We have 64-bit scalar and/or/xor, but do not have vector forms.
12159+
if (Subtarget->has64BitLiterals() && CRHS->hasOneUse() &&
12160+
!CRHS->user_begin()->isDivergent())
12161+
return SDValue();
12162+
1215812163
// If we need to materialize a 64-bit immediate, it will be split up later
1215912164
// anyway. Avoid creating the harder to understand 64-bit immediate
1216012165
// materialization.

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2273,6 +2273,12 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
22732273
case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
22742274
const MachineOperand &SrcOp = MI.getOperand(1);
22752275
assert(!SrcOp.isFPImm());
2276+
2277+
if (ST.has64BitLiterals()) {
2278+
MI.setDesc(get(AMDGPU::S_MOV_B64));
2279+
break;
2280+
}
2281+
22762282
APInt Imm(64, SrcOp.getImm());
22772283
if (Imm.isIntN(32) || isInlineConstant(Imm)) {
22782284
MI.setDesc(get(AMDGPU::S_MOV_B64));
@@ -6099,14 +6105,18 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
60996105
OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_V2FP32;
61006106
if (Is64BitOp &&
61016107
!AMDGPU::isInlinableLiteral64(Imm, ST.hasInv2PiInlineImm())) {
6102-
if (!AMDGPU::isValid32BitLiteral(Imm, Is64BitFPOp))
6108+
if (!AMDGPU::isValid32BitLiteral(Imm, Is64BitFPOp) &&
6109+
(!ST.has64BitLiterals() || InstDesc.getSize() != 4))
61036110
return false;
61046111

61056112
// FIXME: We can use sign extended 64-bit literals, but only for signed
61066113
// operands. At the moment we do not know if an operand is signed.
61076114
// Such operand will be encoded as its low 32 bits and then either
61086115
// correctly sign extended or incorrectly zero extended by HW.
6109-
if (!Is64BitFPOp && (int32_t)Imm < 0)
6116+
// If 64-bit literals are supported and the literal will be encoded
6117+
// as full 64 bit we still can use it.
6118+
if (!Is64BitFPOp && (int32_t)Imm < 0 &&
6119+
(!ST.has64BitLiterals() || AMDGPU::isValid32BitLiteral(Imm, false)))
61106120
return false;
61116121
}
61126122
}
@@ -9178,15 +9188,30 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
91789188
if (isDPP(MI))
91799189
return DescSize;
91809190
bool HasLiteral = false;
9191+
unsigned LiteralSize = 4;
91819192
for (int I = 0, E = MI.getNumExplicitOperands(); I != E; ++I) {
91829193
const MachineOperand &Op = MI.getOperand(I);
91839194
const MCOperandInfo &OpInfo = Desc.operands()[I];
91849195
if (!Op.isReg() && !isInlineConstant(Op, OpInfo)) {
91859196
HasLiteral = true;
9197+
if (ST.has64BitLiterals()) {
9198+
switch (OpInfo.OperandType) {
9199+
default:
9200+
break;
9201+
case AMDGPU::OPERAND_REG_IMM_FP64:
9202+
if (!AMDGPU::isValid32BitLiteral(Op.getImm(), true))
9203+
LiteralSize = 8;
9204+
break;
9205+
case AMDGPU::OPERAND_REG_IMM_INT64:
9206+
if (!Op.isImm() || !AMDGPU::isValid32BitLiteral(Op.getImm(), false))
9207+
LiteralSize = 8;
9208+
break;
9209+
}
9210+
}
91869211
break;
91879212
}
91889213
}
9189-
return HasLiteral ? DescSize + 4 : DescSize;
9214+
return HasLiteral ? DescSize + LiteralSize : DescSize;
91909215
}
91919216

91929217
// Check whether we have extra NSA words.

llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1058,7 +1058,11 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
10581058
// fold an immediate into the shrunk instruction as a literal operand. In
10591059
// GFX10 VOP3 instructions can take a literal operand anyway, so there is
10601060
// no advantage to doing this.
1061-
if (ST->hasVOP3Literal() && !IsPostRA)
1061+
// However, if 64-bit literals are allowed we still need to shrink it
1062+
// for such literal to be able to fold.
1063+
if (ST->hasVOP3Literal() &&
1064+
(!ST->has64BitLiterals() || AMDGPU::isTrue16Inst(MI.getOpcode())) &&
1065+
!IsPostRA)
10621066
continue;
10631067

10641068
if (ST->hasTrue16BitInsts() && AMDGPU::isTrue16Inst(MI.getOpcode()) &&

0 commit comments

Comments
 (0)