Skip to content

Commit 19c9f9c

Browse files
[AMDGPU] GFX12: Add s_prefetch_inst/data instructions (#74448)
Co-authored-by: Stanislav Mekhanoshin <[email protected]>
1 parent 08e63dd commit 19c9f9c

File tree

7 files changed

+145
-4
lines changed

7 files changed

+145
-4
lines changed

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4199,8 +4199,9 @@ bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
41994199
return true;
42004200

42014201
Error(getSMEMOffsetLoc(Operands),
4202-
(isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
4203-
"expected a 21-bit signed offset");
4202+
isGFX12Plus() ? "expected a 24-bit signed offset"
4203+
: (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4204+
: "expected a 21-bit signed offset");
42044205

42054206
return false;
42064207
}

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,9 +91,11 @@ static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
9191
const MCDisassembler *Decoder) {
9292
auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
9393
int64_t Offset;
94-
if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
94+
if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.
95+
Offset = SignExtend64<24>(Imm);
96+
} else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
9597
Offset = Imm & 0xFFFFF;
96-
} else { // GFX9+ supports 21-bit signed offsets.
98+
} else { // GFX9+ supports 21-bit signed offsets.
9799
Offset = SignExtend64<21>(Imm);
98100
}
99101
return addOperand(Inst, MCOperand::createImm(Offset));

llvm/lib/Target/AMDGPU/SMInstructions.td

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,23 @@ class SM_WaveId_Pseudo<string opName, SDPatternOperator node> : SM_Pseudo<
211211
let has_sbase = 0;
212212
}
213213

214+
class SM_Prefetch_Pseudo <string opName, RegisterClass baseClass, bit hasSBase>
215+
: SM_Pseudo<opName, (outs), !con(!if(hasSBase, (ins baseClass:$sbase), (ins)),
216+
(ins smem_offset:$offset, SReg_32:$soffset, i8imm:$sdata)),
217+
!if(hasSBase, " $sbase,", "") # " $offset, $soffset, $sdata"> {
218+
// Mark prefetches as both load and store to prevent reordering with loads
219+
// and stores. This is also needed for pattern to match prefetch intrinsic.
220+
let mayLoad = 1;
221+
let mayStore = 1;
222+
let has_glc = 0;
223+
let LGKM_CNT = 0;
224+
let has_sbase = hasSBase;
225+
let ScalarStore = 0;
226+
let has_offset = 1;
227+
let has_soffset = 1;
228+
let PseudoInstr = opName;
229+
}
230+
214231
//===----------------------------------------------------------------------===//
215232
// Scalar Atomic Memory Classes
216233
//===----------------------------------------------------------------------===//
@@ -415,6 +432,16 @@ defm S_DCACHE_DISCARD : SM_Pseudo_Discards;
415432
defm S_DCACHE_DISCARD_X2 : SM_Pseudo_Discards;
416433
}
417434

435+
let SubtargetPredicate = isGFX12Plus in {
436+
def S_PREFETCH_INST : SM_Prefetch_Pseudo <"s_prefetch_inst", SReg_64, 1>;
437+
def S_PREFETCH_INST_PC_REL : SM_Prefetch_Pseudo <"s_prefetch_inst_pc_rel", SReg_64, 0>;
438+
def S_PREFETCH_DATA : SM_Prefetch_Pseudo <"s_prefetch_data", SReg_64, 1>;
439+
def S_PREFETCH_DATA_PC_REL : SM_Prefetch_Pseudo <"s_prefetch_data_pc_rel", SReg_64, 0>;
440+
def S_BUFFER_PREFETCH_DATA : SM_Prefetch_Pseudo <"s_buffer_prefetch_data", SReg_128, 1> {
441+
let is_buffer = 1;
442+
}
443+
} // end let SubtargetPredicate = isGFX12Plus
444+
418445
//===----------------------------------------------------------------------===//
419446
// Targets
420447
//===----------------------------------------------------------------------===//
@@ -1203,3 +1230,33 @@ multiclass SM_Real_Probe_gfx11<bits<8> op> {
12031230

12041231
defm S_ATC_PROBE : SM_Real_Probe_gfx11 <0x22>;
12051232
defm S_ATC_PROBE_BUFFER : SM_Real_Probe_gfx11 <0x23>;
1233+
1234+
//===----------------------------------------------------------------------===//
1235+
// GFX12.
1236+
//===----------------------------------------------------------------------===//
1237+
1238+
class SMEM_Real_gfx12<bits<8> op, SM_Pseudo ps, string opName = ps.Mnemonic> :
1239+
SMEM_Real_10Plus_common<op, ps, opName, SIEncodingFamily.GFX12,
1240+
SGPR_NULL_gfx11plus> {
1241+
let AssemblerPredicate = isGFX12Plus;
1242+
let DecoderNamespace = "GFX12";
1243+
let Inst{18-13} = op{5-0};
1244+
let Inst{19} = !if(ps.has_dlc, cpol{CPolBit.DLC}, 0);
1245+
let Inst{24-20} = ?; // TODO-GFX12: Add new bits {24-20}: TH, Scope, NV
1246+
let Inst{25} = !if(ps.has_glc, cpol{CPolBit.GLC}, 0);
1247+
let Inst{55-32} = offset{23-0};
1248+
}
1249+
1250+
class SMEM_Real_Prefetch_gfx12 <bits<8> op, SM_Pseudo ps> : SMEM_Real_gfx12<op, ps> {
1251+
bits<7> sdata; // Only 5 bits of sdata are supported.
1252+
1253+
let sdst = ?;
1254+
let Inst{12-11} = 0; // Unused sdata bits.
1255+
let Inst{10-6} = !if(ps.has_sdst, sdata{4-0}, ?);
1256+
}
1257+
1258+
def S_PREFETCH_INST_gfx12 : SMEM_Real_Prefetch_gfx12<0x24, S_PREFETCH_INST>;
1259+
def S_PREFETCH_INST_PC_REL_gfx12 : SMEM_Real_Prefetch_gfx12<0x25, S_PREFETCH_INST_PC_REL>;
1260+
def S_PREFETCH_DATA_gfx12 : SMEM_Real_Prefetch_gfx12<0x26, S_PREFETCH_DATA>;
1261+
def S_BUFFER_PREFETCH_DATA_gfx12 : SMEM_Real_Prefetch_gfx12<0x27, S_BUFFER_PREFETCH_DATA>;
1262+
def S_PREFETCH_DATA_PC_REL_gfx12 : SMEM_Real_Prefetch_gfx12<0x28, S_PREFETCH_DATA_PC_REL>;

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2604,13 +2604,19 @@ static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) {
26042604

26052605
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
26062606
int64_t EncodedOffset) {
2607+
if (isGFX12Plus(ST))
2608+
return isUInt<23>(EncodedOffset);
2609+
26072610
return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset)
26082611
: isUInt<8>(EncodedOffset);
26092612
}
26102613

26112614
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
26122615
int64_t EncodedOffset,
26132616
bool IsBuffer) {
2617+
if (isGFX12Plus(ST))
2618+
return isInt<24>(EncodedOffset);
2619+
26142620
return !IsBuffer &&
26152621
hasSMRDSignedImmOffset(ST) &&
26162622
isInt<21>(EncodedOffset);
@@ -2631,6 +2637,10 @@ uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST,
26312637

26322638
std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
26332639
int64_t ByteOffset, bool IsBuffer) {
2640+
if (isGFX12Plus(ST)) // 24 bit signed offsets
2641+
return isInt<24>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
2642+
: std::nullopt;
2643+
26342644
// The signed version is always a byte offset.
26352645
if (!IsBuffer && hasSMRDSignedImmOffset(ST)) {
26362646
assert(hasSMEMByteOffset(ST));
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
// RUN: llvm-mc -arch=amdgcn -show-encoding -mcpu=gfx1200 %s | FileCheck --check-prefix=GFX12 %s
2+
3+
//===----------------------------------------------------------------------===//
4+
// ENC_SMEM.
5+
//===----------------------------------------------------------------------===//
6+
7+
s_prefetch_inst s[12:13], 16, s4, 2
8+
// GFX12: s_prefetch_inst s[12:13], 0x10, s4, 2 ; encoding: [0x86,0x80,0x04,0xf4,0x10,0x00,0x00,0x08]
9+
10+
s_prefetch_inst s[14:15], 0, m0, 7
11+
// GFX12: s_prefetch_inst s[14:15], 0x0, m0, 7 ; encoding: [0xc7,0x81,0x04,0xf4,0x00,0x00,0x00,0xfa]
12+
13+
s_prefetch_inst s[14:15], 0x7fffff, m0, 7
14+
// GFX12: s_prefetch_inst s[14:15], 0x7fffff, m0, 7 ; encoding: [0xc7,0x81,0x04,0xf4,0xff,0xff,0x7f,0xfa]
15+
16+
s_prefetch_inst s[14:15], -1, m0, 7
17+
// GFX12: s_prefetch_inst s[14:15], -0x1, m0, 7 ; encoding: [0xc7,0x81,0x04,0xf4,0xff,0xff,0xff,0xfa]
18+
19+
s_prefetch_inst s[14:15], 100, m0, 31
20+
// GFX12: s_prefetch_inst s[14:15], 0x64, m0, 31 ; encoding: [0xc7,0x87,0x04,0xf4,0x64,0x00,0x00,0xfa]
21+
22+
s_prefetch_inst_pc_rel 100, s10, 7
23+
// GFX12: s_prefetch_inst_pc_rel 0x64, s10, 7 ; encoding: [0xc0,0xa1,0x04,0xf4,0x64,0x00,0x00,0x14]
24+
25+
s_prefetch_data s[18:19], 100, s10, 7
26+
// GFX12: s_prefetch_data s[18:19], 0x64, s10, 7 ; encoding: [0xc9,0xc1,0x04,0xf4,0x64,0x00,0x00,0x14]
27+
28+
s_prefetch_data_pc_rel 100, s10, 7
29+
// GFX12: s_prefetch_data_pc_rel 0x64, s10, 7 ; encoding: [0xc0,0x01,0x05,0xf4,0x64,0x00,0x00,0x14]
30+
31+
s_buffer_prefetch_data s[20:23], 100, s10, 7
32+
// GFX12: s_buffer_prefetch_data s[20:23], 0x64, s10, 7 ; encoding: [0xca,0xe1,0x04,0xf4,0x64,0x00,0x00,0x14]
33+
34+
s_buffer_prefetch_data s[20:23], 100, null, 7
35+
// GFX12: s_buffer_prefetch_data s[20:23], 0x64, null, 7 ; encoding: [0xca,0xe1,0x04,0xf4,0x64,0x00,0x00,0xf8]

llvm/test/MC/AMDGPU/gfx12_err.s

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,3 +41,8 @@ image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_LOAD_HT scope:SCOPE
4141

4242
image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D scope:SCOPE_SE th:TH_LOAD_HT scope:SCOPE_SE
4343
// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand
44+
45+
s_prefetch_inst s[14:15], 0xffffff, m0, 7
46+
// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: expected a 24-bit signed offset
47+
// GFX12-ERR: s_prefetch_inst s[14:15], 0xffffff, m0, 7
48+
// GFX12-ERR: ^
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12 %s
2+
3+
# GFX12: s_prefetch_inst s[12:13], 0x10, s4, 2 ; encoding: [0x86,0x80,0x04,0xf4,0x10,0x00,0x00,0x08]
4+
0x86,0x80,0x04,0xf4,0x10,0x00,0x00,0x08
5+
6+
# GFX12: s_prefetch_inst s[14:15], 0x0, m0, 7 ; encoding: [0xc7,0x81,0x04,0xf4,0x00,0x00,0x00,0xfa]
7+
0xc7,0x81,0x04,0xf4,0x00,0x00,0x00,0xfa
8+
9+
# GFX12: s_prefetch_inst s[14:15], 0x7fffff, m0, 7 ; encoding: [0xc7,0x81,0x04,0xf4,0xff,0xff,0x7f,0xfa]
10+
0xc7,0x81,0x04,0xf4,0xff,0xff,0x7f,0xfa
11+
12+
# GFX12: s_prefetch_inst s[14:15], -0x1, m0, 7 ; encoding: [0xc7,0x81,0x04,0xf4,0xff,0xff,0xff,0xfa]
13+
0xc7,0x81,0x04,0xf4,0xff,0xff,0xff,0xfa
14+
15+
# GFX12: s_prefetch_inst s[14:15], 0x64, m0, 31 ; encoding: [0xc7,0x87,0x04,0xf4,0x64,0x00,0x00,0xfa]
16+
0xc7,0x87,0x04,0xf4,0x64,0x00,0x00,0xfa
17+
18+
# GFX12: s_prefetch_inst_pc_rel 0x64, s10, 7 ; encoding: [0xc0,0xa1,0x04,0xf4,0x64,0x00,0x00,0x14]
19+
0xc0,0xa1,0x04,0xf4,0x64,0x00,0x00,0x14
20+
21+
# GFX12: s_prefetch_data s[18:19], 0x64, s10, 7 ; encoding: [0xc9,0xc1,0x04,0xf4,0x64,0x00,0x00,0x14]
22+
0xc9,0xc1,0x04,0xf4,0x64,0x00,0x00,0x14
23+
24+
# GFX12: s_prefetch_data_pc_rel 0x64, s10, 7 ; encoding: [0xc0,0x01,0x05,0xf4,0x64,0x00,0x00,0x14]
25+
0xc0,0x01,0x05,0xf4,0x64,0x00,0x00,0x14
26+
27+
# GFX12: s_buffer_prefetch_data s[20:23], 0x64, s10, 7 ; encoding: [0xca,0xe1,0x04,0xf4,0x64,0x00,0x00,0x14]
28+
0xca,0xe1,0x04,0xf4,0x64,0x00,0x00,0x14
29+
30+
# GFX12: s_buffer_prefetch_data s[20:23], 0x64, null, 7 ; encoding: [0xca,0xe1,0x04,0xf4,0x64,0x00,0x00,0xf8]
31+
0xca,0xe1,0x04,0xf4,0x64,0x00,0x00,0xf8

0 commit comments

Comments
 (0)