Skip to content

Commit 694a488

Browse files
authored
AMDGPU: Add pseudoinstruction for 64-bit agpr or vgpr constants (#154499)
64-bit version of 7425af4. We still need to lower to 32-bit v_accagpr_write_b32s, so this has a unique value restriction that requires both halves of the constant to be 32-bit inline immediates. This only introduces the new pseudo definitions, but doesn't try to use them yet.
1 parent f649605 commit 694a488

File tree

12 files changed

+589
-3
lines changed

12 files changed

+589
-3
lines changed

llvm/lib/Target/AMDGPU/SIDefines.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,10 @@ enum OperandType : unsigned {
243243
// Operand for SDWA instructions
244244
OPERAND_SDWA_VOPC_DST,
245245

246+
// Operand for AV_MOV_B64_IMM_PSEUDO, which is a pair of 32-bit inline
247+
// constants.
248+
OPERAND_INLINE_C_AV64_PSEUDO,
249+
246250
OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32,
247251
OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_V2FP32,
248252

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1349,6 +1349,7 @@ bool SIInstrInfo::getConstValDefinedInReg(const MachineInstr &MI,
13491349
case AMDGPU::V_MOV_B64_e32:
13501350
case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
13511351
case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
1352+
case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
13521353
case AMDGPU::S_MOV_B64_IMM_PSEUDO:
13531354
case AMDGPU::V_MOV_B64_PSEUDO: {
13541355
const MachineOperand &Src0 = MI.getOperand(1);
@@ -2133,6 +2134,25 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
21332134
get(IsAGPR ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::V_MOV_B32_e32));
21342135
break;
21352136
}
2137+
case AMDGPU::AV_MOV_B64_IMM_PSEUDO: {
2138+
Register Dst = MI.getOperand(0).getReg();
2139+
if (SIRegisterInfo::isAGPRClass(RI.getPhysRegBaseClass(Dst))) {
2140+
uint64_t Imm = static_cast<uint64_t>(MI.getOperand(1).getImm());
2141+
2142+
Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2143+
Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2144+
BuildMI(MBB, MI, DL, get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), DstLo)
2145+
.addImm(SignExtend64<32>(Lo_32(Imm)))
2146+
.addReg(Dst, RegState::Implicit | RegState::Define);
2147+
BuildMI(MBB, MI, DL, get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), DstHi)
2148+
.addImm(SignExtend64<32>(Hi_32(Imm)))
2149+
.addReg(Dst, RegState::Implicit | RegState::Define);
2150+
MI.eraseFromParent();
2151+
break;
2152+
}
2153+
2154+
[[fallthrough]];
2155+
}
21362156
case AMDGPU::V_MOV_B64_PSEUDO: {
21372157
Register Dst = MI.getOperand(0).getReg();
21382158
Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
@@ -3425,6 +3445,11 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
34253445
case AMDGPU::V_ACCVGPR_MOV_B32:
34263446
case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
34273447
return true;
3448+
case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3449+
// TODO: We could fold this, but it's a strange case. The immediate value
3450+
// can't be directly folded into any real use. We would have to spread new
3451+
// immediate legality checks around and only accept subregister extracts for
3452+
// profitability.
34283453
default:
34293454
return false;
34303455
}
@@ -4471,6 +4496,8 @@ bool SIInstrInfo::isInlineConstant(int64_t Imm, uint8_t OperandType) const {
44714496
case AMDGPU::OPERAND_KIMM16:
44724497
case AMDGPU::OPERAND_KIMM64:
44734498
return false;
4499+
case AMDGPU::OPERAND_INLINE_C_AV64_PSEUDO:
4500+
return isLegalAV64PseudoImm(Imm);
44744501
case AMDGPU::OPERAND_INPUT_MODS:
44754502
case MCOI::OPERAND_IMMEDIATE:
44764503
// Always embedded in the instruction for free.
@@ -4536,6 +4563,12 @@ bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
45364563
return ST.hasVOP3Literal();
45374564
}
45384565

4566+
bool SIInstrInfo::isLegalAV64PseudoImm(uint64_t Imm) const {
4567+
// 2 32-bit inline constants packed into one.
4568+
return AMDGPU::isInlinableLiteral32(Lo_32(Imm), ST.hasInv2PiInlineImm()) &&
4569+
AMDGPU::isInlinableLiteral32(Hi_32(Imm), ST.hasInv2PiInlineImm());
4570+
}
4571+
45394572
bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const {
45404573
// GFX90A does not have V_MUL_LEGACY_F32_e32.
45414574
if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.hasGFX90AInsts())
@@ -4896,6 +4929,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
48964929
case MCOI::OPERAND_IMMEDIATE:
48974930
case AMDGPU::OPERAND_KIMM32:
48984931
case AMDGPU::OPERAND_KIMM64:
4932+
case AMDGPU::OPERAND_INLINE_C_AV64_PSEUDO:
48994933
// Check if this operand is an immediate.
49004934
// FrameIndex operands will be replaced by immediates, so they are
49014935
// allowed.

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1183,6 +1183,9 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
11831183
bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
11841184
const MachineOperand &MO) const;
11851185

1186+
/// Check if this immediate value can be used for AV_MOV_B64_IMM_PSEUDO.
1187+
bool isLegalAV64PseudoImm(uint64_t Imm) const;
1188+
11861189
/// Return true if this 64-bit VALU instruction has a 32-bit encoding.
11871190
/// This function will return false if you pass it a 32-bit instruction.
11881191
bool hasVALU32BitEncoding(unsigned Opcode) const;

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1068,6 +1068,13 @@ def SplitBarrier : ImmOperand<i32> {
10681068
let PrintMethod = "printOperand";
10691069
}
10701070

1071+
// Pseudo-operand type. This is a pair of 32-bit inline constants
1072+
// packed into a single 64-bit value.
1073+
def AV_64_PSEUDO_IMM : Operand<i64> {
1074+
let OperandNamespace = "AMDGPU";
1075+
let OperandType = "OPERAND_INLINE_C_AV64_PSEUDO";
1076+
}
1077+
10711078
def VReg32OrOffClass : AsmOperandClass {
10721079
let Name = "VReg32OrOff";
10731080
let ParserMethod = "parseVReg32OrOff";

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ def AV_MOV_B32_IMM_PSEUDO
150150
let isReMaterializable = 1;
151151
let isAsCheapAsAMove = 1;
152152

153-
// Imprecise, technically if AGPR it's VOP3 and VOP1 for AGPR. But
153+
// Imprecise, technically if AGPR it's VOP3 and VOP1 for VGPR. But
154154
// this tricks the rematerialize logic into working for it.
155155
let VOP3 = 1;
156156
let isMoveImm = 1;
@@ -160,6 +160,27 @@ def AV_MOV_B32_IMM_PSEUDO
160160
let UseNamedOperandTable = 1;
161161
}
162162

163+
// 64-bit materialize immediate which supports AGPR or VGPR. This has
164+
// an unusual operand restriction which requires the two halves of the
165+
// immediate to each be 32-bit inline immediate values.
166+
//
167+
// FIXME: This unnecessarily has the even aligned vector register
168+
// requirement applied.
169+
def AV_MOV_B64_IMM_PSEUDO
170+
: VPseudoInstSI<(outs AV_64:$vdst), (ins AV_64_PSEUDO_IMM:$src0)> {
171+
let isReMaterializable = 1;
172+
let isAsCheapAsAMove = 1;
173+
174+
// Imprecise, technically if AGPR it's 2 x VOP3 and 2 x VOP1 for
175+
// VGPR. But this tricks the rematerialize logic into working for
176+
// it.
177+
let VOP3 = 1;
178+
let isMoveImm = 1;
179+
let SchedRW = [Write32Bit, Write32Bit];
180+
let Size = 16; // 2 x v_accwrite_write_b32 in the worst case
181+
let UseNamedOperandTable = 1;
182+
}
183+
163184
// 64-bit vector move with dpp. Expanded post-RA.
164185
def V_MOV_B64_DPP_PSEUDO : VOP_DPP_Pseudo <"v_mov_b64_dpp", VOP_I64_I64> {
165186
let Size = 16; // Requires two 8-byte v_mov_b32_dpp to complete.

llvm/test/CodeGen/AMDGPU/amdgpu-prepare-agpr-alloc.mir

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,14 @@
1313
ret void
1414
}
1515

16+
define void @func64() {
17+
ret void
18+
}
19+
20+
define void @func64_no_agprs() "amdgpu-agpr-alloc"="0,0" {
21+
ret void
22+
}
23+
1624
...
1725
---
1826
name: func
@@ -93,3 +101,107 @@ body: |
93101
%1:agpr_32 = V_ACCVGPR_WRITE_B32_e64 2, implicit $exec
94102
95103
...
104+
105+
---
106+
name: func64
107+
tracksRegLiveness: true
108+
stack:
109+
- { id: 0, size: 4 }
110+
body: |
111+
; HAS-AGPR-LABEL: name: func64
112+
; HAS-AGPR: bb.0:
113+
; HAS-AGPR-NEXT: successors: %bb.1(0x80000000)
114+
; HAS-AGPR-NEXT: liveins: $vgpr0_vgpr1
115+
; HAS-AGPR-NEXT: {{ $}}
116+
; HAS-AGPR-NEXT: [[V_MOV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_e64 $vgpr0_vgpr1, implicit $exec
117+
; HAS-AGPR-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 54, implicit $exec
118+
; HAS-AGPR-NEXT: [[V_MOV_B64_e64_1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_e64 1, implicit $exec
119+
; HAS-AGPR-NEXT: [[V_MOV_B64_e64_2:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_e64 64, implicit $exec
120+
; HAS-AGPR-NEXT: [[V_MOV_B64_e64_3:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_e64 %stack.0, implicit $exec
121+
; HAS-AGPR-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 65, implicit $exec
122+
; HAS-AGPR-NEXT: [[V_MOV_B2:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 279172874240, implicit $exec
123+
; HAS-AGPR-NEXT: [[V_MOV_B3:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 279172874305, implicit $exec
124+
; HAS-AGPR-NEXT: [[V_MOV_B4:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4290672329938, implicit $exec
125+
; HAS-AGPR-NEXT: [[V_MOV_B5:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO -9223372036854775808, implicit $exec
126+
; HAS-AGPR-NEXT: [[V_MOV_B6:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 1042479491, implicit $exec
127+
; HAS-AGPR-NEXT: [[V_MOV_B7:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4477415320595726336, implicit $exec
128+
; HAS-AGPR-NEXT: [[V_MOV_B8:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO $vgpr0_vgpr1, implicit $exec
129+
; HAS-AGPR-NEXT: [[V_MOV_B9:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO %stack.0, implicit $exec
130+
; HAS-AGPR-NEXT: {{ $}}
131+
; HAS-AGPR-NEXT: bb.1:
132+
; HAS-AGPR-NEXT: [[V_MOV_B64_e64_4:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_e64 3, implicit $exec
133+
;
134+
; NO-AGPR-LABEL: name: func64
135+
; NO-AGPR: bb.0:
136+
; NO-AGPR-NEXT: successors: %bb.1(0x80000000)
137+
; NO-AGPR-NEXT: liveins: $vgpr0_vgpr1
138+
; NO-AGPR-NEXT: {{ $}}
139+
; NO-AGPR-NEXT: [[V_MOV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_e64 $vgpr0_vgpr1, implicit $exec
140+
; NO-AGPR-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 54, implicit $exec
141+
; NO-AGPR-NEXT: [[V_MOV_B64_e64_1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_e64 1, implicit $exec
142+
; NO-AGPR-NEXT: [[V_MOV_B64_e64_2:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_e64 64, implicit $exec
143+
; NO-AGPR-NEXT: [[V_MOV_B64_e64_3:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_e64 %stack.0, implicit $exec
144+
; NO-AGPR-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 65, implicit $exec
145+
; NO-AGPR-NEXT: [[V_MOV_B2:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 279172874240, implicit $exec
146+
; NO-AGPR-NEXT: [[V_MOV_B3:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 279172874305, implicit $exec
147+
; NO-AGPR-NEXT: [[V_MOV_B4:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4290672329938, implicit $exec
148+
; NO-AGPR-NEXT: [[V_MOV_B5:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO -9223372036854775808, implicit $exec
149+
; NO-AGPR-NEXT: [[V_MOV_B6:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 1042479491, implicit $exec
150+
; NO-AGPR-NEXT: [[V_MOV_B7:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4477415320595726336, implicit $exec
151+
; NO-AGPR-NEXT: [[V_MOV_B8:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO $vgpr0_vgpr1, implicit $exec
152+
; NO-AGPR-NEXT: [[V_MOV_B9:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO %stack.0, implicit $exec
153+
; NO-AGPR-NEXT: {{ $}}
154+
; NO-AGPR-NEXT: bb.1:
155+
; NO-AGPR-NEXT: [[V_MOV_B64_e64_4:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_e64 3, implicit $exec
156+
bb.0:
157+
liveins: $vgpr0_vgpr1
158+
%0:vreg_64_align2 = V_MOV_B64_e64 $vgpr0_vgpr1, implicit $exec
159+
%1:vreg_64_align2 = V_MOV_B64_PSEUDO 54, implicit $exec
160+
%2:vreg_64_align2 = V_MOV_B64_e64 1, implicit $exec
161+
%3:vreg_64_align2 = V_MOV_B64_e64 64, implicit $exec
162+
%4:vreg_64_align2 = V_MOV_B64_e64 %stack.0, implicit $exec
163+
%5:vreg_64_align2 = V_MOV_B64_PSEUDO 65, implicit $exec
164+
%6:vreg_64_align2 = V_MOV_B64_PSEUDO 279172874240, implicit $exec
165+
%7:vreg_64_align2 = V_MOV_B64_PSEUDO 279172874305, implicit $exec
166+
%8:vreg_64_align2 = V_MOV_B64_PSEUDO 4290672329938, implicit $exec
167+
%9:vreg_64_align2 = V_MOV_B64_PSEUDO 9223372036854775808, implicit $exec
168+
%10:vreg_64_align2 = V_MOV_B64_PSEUDO 1042479491, implicit $exec
169+
%11:vreg_64_align2 = V_MOV_B64_PSEUDO 4477415320595726336, implicit $exec
170+
%12:vreg_64_align2 = V_MOV_B64_PSEUDO $vgpr0_vgpr1, implicit $exec
171+
%13:vreg_64_align2 = V_MOV_B64_PSEUDO %stack.0, implicit $exec
172+
173+
bb.1:
174+
%14:vreg_64_align2 = V_MOV_B64_e64 3, implicit $exec
175+
176+
...
177+
178+
---
179+
name: func64_no_agprs
180+
tracksRegLiveness: true
181+
body: |
182+
bb.0:
183+
liveins: $vgpr0
184+
; HAS-AGPR-LABEL: name: func64_no_agprs
185+
; HAS-AGPR: liveins: $vgpr0
186+
; HAS-AGPR-NEXT: {{ $}}
187+
; HAS-AGPR-NEXT: [[V_MOV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_e64 1, implicit $exec
188+
; HAS-AGPR-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4290672329938, implicit $exec
189+
; HAS-AGPR-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO -9223372036854775808, implicit $exec
190+
; HAS-AGPR-NEXT: [[V_MOV_B2:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 1042479491, implicit $exec
191+
; HAS-AGPR-NEXT: [[V_MOV_B3:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4477415320595726336, implicit $exec
192+
;
193+
; NO-AGPR-LABEL: name: func64_no_agprs
194+
; NO-AGPR: liveins: $vgpr0
195+
; NO-AGPR-NEXT: {{ $}}
196+
; NO-AGPR-NEXT: [[V_MOV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_e64 1, implicit $exec
197+
; NO-AGPR-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4290672329938, implicit $exec
198+
; NO-AGPR-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO -9223372036854775808, implicit $exec
199+
; NO-AGPR-NEXT: [[V_MOV_B2:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 1042479491, implicit $exec
200+
; NO-AGPR-NEXT: [[V_MOV_B3:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4477415320595726336, implicit $exec
201+
%0:vreg_64_align2 = V_MOV_B64_e64 1, implicit $exec
202+
%1:vreg_64_align2 = V_MOV_B64_PSEUDO 4290672329938, implicit $exec
203+
%2:vreg_64_align2 = V_MOV_B64_PSEUDO 9223372036854775808, implicit $exec
204+
%3:vreg_64_align2 = V_MOV_B64_PSEUDO 1042479491, implicit $exec
205+
%4:vreg_64_align2 = V_MOV_B64_PSEUDO 4477415320595726336, implicit $exec
206+
207+
...

0 commit comments

Comments
 (0)