Skip to content

Commit a70f7da

Browse files
authored
[AMDGPU] gfx1250 flat and global prefetch MC support (#150455)
1 parent d96579b commit a70f7da

File tree

5 files changed

+147
-0
lines changed

5 files changed

+147
-0
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,12 @@ def FeatureInstFwdPrefetchBug : SubtargetFeature<"inst-fwd-prefetch-bug",
268268
"S_INST_PREFETCH instruction causes shader to hang"
269269
>;
270270

271+
def FeatureVmemPrefInsts : SubtargetFeature<"vmem-pref-insts",
272+
"HasVmemPrefInsts",
273+
"true",
274+
"Has flat_prefect_b8 and global_prefetch_b8 instructions"
275+
>;
276+
271277
def FeatureSafeSmemPrefetch : SubtargetFeature<"safe-smem-prefetch",
272278
"HasSafeSmemPrefetch",
273279
"true",
@@ -2027,6 +2033,7 @@ def FeatureISAVersion12_50 : FeatureSet<
20272033
FeatureFlatBufferGlobalAtomicFaddF64Inst,
20282034
FeatureMemoryAtomicFAddF32DenormalSupport,
20292035
FeatureKernargPreload,
2036+
FeatureVmemPrefInsts,
20302037
FeatureLshlAddU64Inst,
20312038
FeatureAddSubU64Insts,
20322039
FeatureLdsBarrierArriveAtomic,
@@ -2807,6 +2814,9 @@ def HasScalarDwordx3Loads : Predicate<"Subtarget->hasScalarDwordx3Loads()">;
28072814
def HasXF32Insts : Predicate<"Subtarget->hasXF32Insts()">,
28082815
AssemblerPredicate<(all_of FeatureXF32Insts)>;
28092816

2817+
def HasVmemPrefInsts : Predicate<"Subtarget->hasVmemPrefInsts()">,
2818+
AssemblerPredicate<(all_of FeatureVmemPrefInsts)>;
2819+
28102820
def HasAshrPkInsts : Predicate<"Subtarget->hasAshrPkInsts()">,
28112821
AssemblerPredicate<(all_of FeatureAshrPkInsts)>;
28122822

llvm/lib/Target/AMDGPU/FLATInstructions.td

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -464,6 +464,37 @@ class FLAT_Global_Invalidate_Writeback<string opName, SDPatternOperator node = n
464464
let sve = 0;
465465
}
466466

467+
class FLAT_Prefetch_Pseudo<string opName, dag addr = (ins VReg_64:$vaddr), string asm = " $vaddr"> :
468+
FLAT_Pseudo<opName, (outs), !con(addr, (ins flat_offset:$offset, CPol_0:$cpol)), asm#"$offset$cpol"> {
469+
let has_vdst = 0;
470+
let has_data = 0;
471+
let mayLoad = 1;
472+
let mayStore = 1;
473+
let VM_CNT = 0;
474+
let LGKM_CNT = 0;
475+
}
476+
477+
multiclass FLAT_Flat_Prefetch_Pseudo<string opName> {
478+
def "" : FLAT_Prefetch_Pseudo<opName>,
479+
GlobalSaddrTable<0, opName>;
480+
def _SADDR : FLAT_Prefetch_Pseudo<opName, (ins SReg_64:$saddr, VGPR_32:$vaddr), " $vaddr, $saddr">,
481+
GlobalSaddrTable<1, opName> {
482+
let OtherPredicates = [HasFlatGVSMode];
483+
let enabled_saddr = 1;
484+
}
485+
}
486+
487+
multiclass FLAT_Global_Prefetch_Pseudo<string opName> {
488+
let is_flat_global = 1, has_saddr = 1 in {
489+
def "" : FLAT_Prefetch_Pseudo<opName, (ins VReg_64:$vaddr), " $vaddr, off">,
490+
GlobalSaddrTable<0, opName>;
491+
def _SADDR : FLAT_Prefetch_Pseudo<opName, (ins SReg_64:$saddr, VGPR_32:$vaddr), " $vaddr, $saddr">,
492+
GlobalSaddrTable<1, opName> {
493+
let enabled_saddr = 1;
494+
}
495+
}
496+
}
497+
467498
class FlatScratchInst <string sv_op, string mode> {
468499
string SVOp = sv_op;
469500
string Mode = mode;
@@ -1218,6 +1249,11 @@ let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in
12181249
"global_atomic_pk_add_f16", VGPR_32, v2f16
12191250
>;
12201251

1252+
let SubtargetPredicate = HasVmemPrefInsts in {
1253+
defm FLAT_PREFETCH_B8 : FLAT_Flat_Prefetch_Pseudo<"flat_prefetch_b8">;
1254+
defm GLOBAL_PREFETCH_B8 : FLAT_Global_Prefetch_Pseudo<"global_prefetch_b8">;
1255+
}
1256+
12211257
//===----------------------------------------------------------------------===//
12221258
// Flat Patterns
12231259
//===----------------------------------------------------------------------===//
@@ -3210,6 +3246,9 @@ multiclass VFLAT_Real_Atomics_gfx1250<bits<8> op, string name = get_FLAT_ps<NAME
32103246
defm TENSOR_SAVE : VFLAT_Real_gfx1250<0x06e>;
32113247
defm TENSOR_STOP : VFLAT_Real_gfx1250<0x06f>;
32123248

3249+
defm FLAT_PREFETCH_B8 : VFLAT_Real_AllAddr_gfx1250<0x05d>;
3250+
defm GLOBAL_PREFETCH_B8 : VFLAT_Real_AllAddr_gfx1250<0x05d>;
3251+
32133252
defm GLOBAL_LOAD_TR_B128_w32 : VFLAT_Real_AllAddr_gfx1250<0x057, "global_load_tr16_b128">;
32143253
defm GLOBAL_LOAD_TR_B64_w32 : VFLAT_Real_AllAddr_gfx1250<0x058, "global_load_tr8_b64">;
32153254

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
245245
bool HasVMEMtoScalarWriteHazard = false;
246246
bool HasSMEMtoVectorWriteHazard = false;
247247
bool HasInstFwdPrefetchBug = false;
248+
bool HasVmemPrefInsts = false;
248249
bool HasSafeSmemPrefetch = false;
249250
bool HasVcmpxExecWARHazard = false;
250251
bool HasLdsBranchVmemWARHazard = false;
@@ -990,6 +991,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
990991

991992
bool hasPrefetch() const { return GFX12Insts; }
992993

994+
bool hasVmemPrefInsts() const { return HasVmemPrefInsts; }
995+
993996
bool hasSafeSmemPrefetch() const { return HasSafeSmemPrefetch; }
994997

995998
// Has s_cmpk_* instructions.

llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,58 @@ scratch_store_b32 v2, v5, s1 scale_offset
109109
// GFX12-ERR-NEXT:{{^}}scratch_store_b32 v2, v5, s1 scale_offset
110110
// GFX12-ERR-NEXT:{{^}} ^
111111

112+
flat_prefetch_b8 v[2:3]
113+
// GFX1250: flat_prefetch_b8 v[2:3] ; encoding: [0x7c,0x40,0x17,0xec,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
114+
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
115+
116+
flat_prefetch_b8 v[2:3] offset:1024 ; encoding: [0x7c,0x40,0x17,0xec,0x00,0x00,0x00,0x00,0x02,0x00,0xfc,0xff]
117+
// GFX1250: flat_prefetch_b8 v[2:3] offset:1024 ; encoding: [0x7c,0x40,0x17,0xec,0x00,0x00,0x00,0x00,0x02,0x00,0x04,0x00]
118+
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
119+
120+
flat_prefetch_b8 v[2:3] offset:-1024 ; encoding: [0x7c,0x40,0x17,0xec,0x00,0x00,0x00,0x00,0x02,0x00,0xfc,0xff]
121+
// GFX1250: flat_prefetch_b8 v[2:3] offset:-1024 ; encoding: [0x7c,0x40,0x17,0xec,0x00,0x00,0x00,0x00,0x02,0x00,0xfc,0xff]
122+
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
123+
124+
flat_prefetch_b8 v[2:3] offset:-1024 th:TH_LOAD_NT scope:SCOPE_SE ; encoding: [0x7c,0x40,0x17,0xec,0x00,0x00,0x14,0x00,0x02,0x00,0xfc,0xff]
125+
// GFX1250: flat_prefetch_b8 v[2:3] offset:-1024 th:TH_LOAD_NT scope:SCOPE_SE ; encoding: [0x7c,0x40,0x17,0xec,0x00,0x00,0x14,0x00,0x02,0x00,0xfc,0xff]
126+
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
127+
128+
flat_prefetch_b8 v[2:3] th:TH_LOAD_HT scope:SCOPE_CU ; encoding: [0x7c,0x40,0x17,0xec,0x00,0x00,0x24,0x00,0x02,0x00,0xfc,0xff]
129+
// GFX1250: flat_prefetch_b8 v[2:3] th:TH_LOAD_HT ; encoding: [0x7c,0x40,0x17,0xec,0x00,0x00,0x20,0x00,0x02,0x00,0x00,0x00]
130+
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
131+
132+
flat_prefetch_b8 v[2:3] offset:64 th:TH_LOAD_NT_RT scope:SCOPE_DEV ; encoding: [0x7c,0x40,0x17,0xec,0x00,0x00,0x24,0x00,0x02,0x00,0xfc,0xff]
133+
// GFX1250: flat_prefetch_b8 v[2:3] offset:64 th:TH_LOAD_NT_RT scope:SCOPE_DEV ; encoding: [0x7c,0x40,0x17,0xec,0x00,0x00,0x48,0x00,0x02,0x40,0x00,0x00]
134+
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
135+
136+
flat_prefetch_b8 v[2:3] th:TH_LOAD_HT
137+
// GFX1250: flat_prefetch_b8 v[2:3] th:TH_LOAD_HT ; encoding: [0x7c,0x40,0x17,0xec,0x00,0x00,0x20,0x00,0x02,0x00,0x00,0x00]
138+
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
139+
140+
flat_prefetch_b8 v[2:3] th:TH_LOAD_BYPASS scope:SCOPE_SYS
141+
// GFX1250: flat_prefetch_b8 v[2:3] th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0x40,0x17,0xec,0x00,0x00,0x3c,0x00,0x02,0x00,0x00,0x00]
142+
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
143+
144+
global_prefetch_b8 v[2:3], off offset:-1024 th:TH_LOAD_HT scope:SCOPE_SE
145+
// GFX1250: global_prefetch_b8 v[2:3], off offset:-1024 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x7c,0x40,0x17,0xee,0x00,0x00,0x24,0x00,0x02,0x00,0xfc,0xff]
146+
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
147+
148+
global_prefetch_b8 v4, s[2:3] offset:-1024 th:TH_LOAD_NT scope:SCOPE_DEV
149+
// GFX1250: global_prefetch_b8 v4, s[2:3] offset:-1024 th:TH_LOAD_NT scope:SCOPE_DEV ; encoding: [0x02,0x40,0x17,0xee,0x00,0x00,0x18,0x00,0x04,0x00,0xfc,0xff]
150+
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
151+
152+
global_prefetch_b8 v4, s[2:3] th:TH_LOAD_RT_NT scope:SCOPE_CU
153+
// GFX1250: global_prefetch_b8 v4, s[2:3] th:TH_LOAD_RT_NT ; encoding: [0x02,0x40,0x17,0xee,0x00,0x00,0x50,0x00,0x04,0x00,0x00,0x00]
154+
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
155+
156+
global_prefetch_b8 v[2:3], off th:TH_LOAD_BYPASS scope:SCOPE_SYS
157+
// GFX1250: global_prefetch_b8 v[2:3], off th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0x40,0x17,0xee,0x00,0x00,0x3c,0x00,0x02,0x00,0x00,0x00]
158+
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
159+
160+
global_prefetch_b8 v[2:3], off offset:64 th:TH_LOAD_NT_RT scope:SCOPE_DEV
161+
// GFX1250: global_prefetch_b8 v[2:3], off offset:64 th:TH_LOAD_NT_RT scope:SCOPE_DEV ; encoding: [0x7c,0x40,0x17,0xee,0x00,0x00,0x48,0x00,0x02,0x40,0x00,0x00]
162+
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
163+
112164
tensor_save s[0:1]
113165
// GFX1250: tensor_save s[0:1] ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
114166
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
@@ -484,3 +536,7 @@ flat_store_d16_hi_b16 v2, v2, s[2:3] offset:64 scale_offset
484536
flat_store_d16_hi_b8 v2, v2, s[2:3] offset:64 scale_offset
485537
// GFX1250: flat_store_d16_hi_b8 v2, v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0x00,0x09,0xec,0x00,0x00,0x01,0x01,0x02,0x40,0x00,0x00]
486538
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: not a valid operand.
539+
540+
flat_prefetch_b8 v3, s[2:3]
541+
// GFX1250: flat_prefetch_b8 v3, s[2:3] ; encoding: [0x02,0x40,0x17,0xec,0x00,0x00,0x00,0x00,0x03,0x00,0x00,0x00]
542+
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,9 @@
240240
# GFX1250: flat_load_u8 v1, v2, s[2:3] offset:64 scale_offset ; encoding: [0x02,0x00,0x04,0xec,0x01,0x00,0x01,0x00,0x02,0x40,0x00,0x00]
241241
0x02,0x00,0x04,0xec,0x01,0x00,0x01,0x00,0x02,0x40,0x00,0x00
242242

243+
# GFX1250: flat_prefetch_b8 v3, s[2:3] ; encoding: [0x02,0x40,0x17,0xec,0x00,0x00,0x00,0x00,0x03,0x00,0x00,0x00]
244+
0x02,0x40,0x17,0xec,0x00,0x00,0x00,0x00,0x03,0x00,0x00,0x00
245+
243246
# GFX1250: flat_store_b128 v2, v[2:5], s[2:3] offset:64 scale_offset ; encoding: [0x02,0x40,0x07,0xec,0x00,0x00,0x01,0x01,0x02,0x40,0x00,0x00]
244247
0x02,0x40,0x07,0xec,0x00,0x00,0x01,0x01,0x02,0x40,0x00,0x00
245248

@@ -3045,6 +3048,42 @@
30453048
# GFX1250: scratch_store_b32 v2, v5, s1 scale_offset ; encoding: [0x01,0x80,0x06,0xed,0x00,0x00,0x83,0x02,0x02,0x00,0x00,0x00]
30463049
0x01,0x80,0x06,0xed,0x00,0x00,0x83,0x02,0x02,0x00,0x00,0x00
30473050

3051+
# GFX1250: flat_prefetch_b8 v[2:3] ; encoding: [0x7c,0x40,0x17,0xec,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
3052+
0x7c,0x40,0x17,0xec,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00
3053+
3054+
# GFX1250: flat_prefetch_b8 v[2:3] offset:-1024 ; encoding: [0x7c,0x40,0x17,0xec,0x00,0x00,0x00,0x00,0x02,0x00,0xfc,0xff]
3055+
0x7c,0x40,0x17,0xec,0x00,0x00,0x00,0x00,0x02,0x00,0xfc,0xff
3056+
3057+
# GFX1250: flat_prefetch_b8 v[2:3] offset:-1024 th:TH_LOAD_NT scope:SCOPE_SE ; encoding: [0x7c,0x40,0x17,0xec,0x00,0x00,0x14,0x00,0x02,0x00,0xfc,0xff]
3058+
0x7c,0x40,0x17,0xec,0x00,0x00,0x14,0x00,0x02,0x00,0xfc,0xff
3059+
3060+
# GFX1250: flat_prefetch_b8 v[2:3] offset:-64 th:TH_LOAD_RT_NT scope:SCOPE_SYS ; encoding: [0x7c,0x40,0x17,0xec,0x00,0x00,0x5c,0x00,0x02,0xc0,0xff,0xff]
3061+
0x7c,0x40,0x17,0xec,0x00,0x00,0x5c,0x00,0x02,0xc0,0xff,0xff
3062+
3063+
# GFX1250: flat_prefetch_b8 v[2:3] offset:1024 ; encoding: [0x7c,0x40,0x17,0xec,0x00,0x00,0x00,0x00,0x02,0x00,0x04,0x00]
3064+
0x7c,0x40,0x17,0xec,0x00,0x00,0x00,0x00,0x02,0x00,0x04,0x00
3065+
3066+
# GFX1250: flat_prefetch_b8 v[2:3] th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0x40,0x17,0xec,0x00,0x00,0x3c,0x00,0x02,0x00,0x00,0x00]
3067+
0x7c,0x40,0x17,0xec,0x00,0x00,0x3c,0x00,0x02,0x00,0x00,0x00
3068+
3069+
# GFX1250: flat_prefetch_b8 v[2:3] th:TH_LOAD_HT ; encoding: [0x7c,0x40,0x17,0xec,0x00,0x00,0x20,0x00,0x02,0x00,0x00,0x00]
3070+
0x7c,0x40,0x17,0xec,0x00,0x00,0x20,0x00,0x02,0x00,0x00,0x00
3071+
3072+
# GFX1250: global_prefetch_b8 v4, s[2:3] offset:-1024 th:TH_LOAD_NT scope:SCOPE_DEV ; encoding: [0x02,0x40,0x17,0xee,0x00,0x00,0x18,0x00,0x04,0x00,0xfc,0xff]
3073+
0x02,0x40,0x17,0xee,0x00,0x00,0x18,0x00,0x04,0x00,0xfc,0xff
3074+
3075+
# GFX1250: global_prefetch_b8 v4, s[2:3] th:TH_LOAD_RT_NT ; encoding: [0x02,0x40,0x17,0xee,0x00,0x00,0x50,0x00,0x04,0x00,0x00,0x00]
3076+
0x02,0x40,0x17,0xee,0x00,0x00,0x50,0x00,0x04,0x00,0x00,0x00
3077+
3078+
# GFX1250: global_prefetch_b8 v[2:3], off offset:-1024 th:TH_LOAD_HT scope:SCOPE_SE ; encoding: [0x7c,0x40,0x17,0xee,0x00,0x00,0x24,0x00,0x02,0x00,0xfc,0xff]
3079+
0x7c,0x40,0x17,0xee,0x00,0x00,0x24,0x00,0x02,0x00,0xfc,0xff
3080+
3081+
# GFX1250: global_prefetch_b8 v[2:3], off offset:64 th:TH_LOAD_NT_RT scope:SCOPE_DEV ; encoding: [0x7c,0x40,0x17,0xee,0x00,0x00,0x48,0x00,0x02,0x40,0x00,0x00]
3082+
0x7c,0x40,0x17,0xee,0x00,0x00,0x48,0x00,0x02,0x40,0x00,0x00
3083+
3084+
# GFX1250: global_prefetch_b8 v[2:3], off th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0x40,0x17,0xee,0x00,0x00,0x3c,0x00,0x02,0x00,0x00,0x00]
3085+
0x7c,0x40,0x17,0xee,0x00,0x00,0x3c,0x00,0x02,0x00,0x00,0x00
3086+
30483087
# GFX1250: tensor_save s[0:1] ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
30493088
0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
30503089

0 commit comments

Comments
 (0)