-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[AMDGPU] MC support for async load and store on gfx1250 #151030
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Member
|
@llvm/pr-subscribers-mc @llvm/pr-subscribers-backend-amdgpu Author: Changpeng Fang (changpeng) ChangesPatch is 39.99 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/151030.diff 5 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 7207c251994ad..0f172e0ddee56 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -369,31 +369,68 @@ multiclass FLAT_Global_Store_Pseudo_t16<string opName> {
}
}
-class FLAT_Global_Load_LDS_Pseudo <string opName, bit EnableSaddr = 0> : FLAT_Pseudo<
+// Async loads, introduced in gfx1250, will store directly
+// to a DS address in vdst (they will not use M0 for DS addess).
+class FLAT_Global_Load_LDS_Pseudo <string opName, bit EnableSaddr = 0, bit IsAsync = 0> : FLAT_Pseudo<
opName,
(outs ),
!con(
- !if(EnableSaddr, (ins SReg_64:$saddr, VGPR_32:$vaddr), (ins VReg_64:$vaddr)),
- (ins flat_offset:$offset, CPol_0:$cpol)),
- " $vaddr"#!if(EnableSaddr, ", $saddr", ", off")#"$offset$cpol"> {
- let LGKM_CNT = 1;
+ !if(IsAsync, (ins VGPR_32:$vdst), (ins)),
+ !if(EnableSaddr, (ins SReg_64:$saddr, VGPR_32:$vaddr), (ins VReg_64:$vaddr)),
+ (ins flat_offset:$offset, CPol_0:$cpol)),
+ !if(IsAsync, " $vdst,", "")#" $vaddr"#!if(EnableSaddr, ", $saddr", ", off")#"$offset$cpol"> {
+ let LGKM_CNT = !not(IsAsync);
+ let VM_CNT = !not(IsAsync);
+ let ASYNC_CNT = IsAsync;
let is_flat_global = 1;
let lds = 1;
let has_data = 0;
+ let has_vdst = IsAsync; // vdst for ds address with IsAsync
+ let mayLoad = 1;
+ let mayStore = 1;
+ let has_saddr = 1;
+ let enabled_saddr = EnableSaddr;
+ let VALU = 1;
+ let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
+ let Uses = !if(IsAsync, [EXEC, ASYNCcnt], [M0, EXEC]);
+ let Defs = !if(IsAsync, [ASYNCcnt], []);
+ let SchedRW = [WriteVMEM, WriteLDS];
+}
+
+multiclass FLAT_Global_Load_LDS_Pseudo<string opName, bit IsAsync = 0> {
+ def "" : FLAT_Global_Load_LDS_Pseudo<opName, 0, IsAsync>,
+ GlobalSaddrTable<0, opName>;
+ def _SADDR : FLAT_Global_Load_LDS_Pseudo<opName, 1, IsAsync>,
+ GlobalSaddrTable<1, opName>;
+}
+
+class FLAT_Global_STORE_LDS_Pseudo <string opName, bit EnableSaddr = 0> : FLAT_Pseudo<
+ opName,
+ (outs ),
+ !con(
+ !if(EnableSaddr, (ins SReg_64:$saddr, VGPR_32:$vaddr), (ins VReg_64:$vaddr)), (ins VGPR_32:$vdata),
+ (ins flat_offset:$offset, CPol_0:$cpol)),
+ " $vaddr, $vdata"#!if(EnableSaddr, ", $saddr", ", off")#"$offset$cpol"> {
+ let VM_CNT = 0;
+ let ASYNC_CNT = 1;
+ let is_flat_global = 1;
+ let lds = 1;
+ let has_data = 1; // vdata for ds address
let has_vdst = 0;
let mayLoad = 1;
let mayStore = 1;
let has_saddr = 1;
let enabled_saddr = EnableSaddr;
let VALU = 1;
- let Uses = [M0, EXEC];
+ let Uses = [EXEC, ASYNCcnt];
+ let Defs = [ASYNCcnt];
let SchedRW = [WriteVMEM, WriteLDS];
}
-multiclass FLAT_Global_Load_LDS_Pseudo<string opName> {
- def "" : FLAT_Global_Load_LDS_Pseudo<opName>,
+multiclass FLAT_Global_STORE_LDS_Pseudo<string opName> {
+ def "" : FLAT_Global_STORE_LDS_Pseudo<opName>,
GlobalSaddrTable<0, opName>;
- def _SADDR : FLAT_Global_Load_LDS_Pseudo<opName, 1>,
+ def _SADDR : FLAT_Global_STORE_LDS_Pseudo<opName, 1>,
GlobalSaddrTable<1, opName>;
}
@@ -1156,6 +1193,15 @@ let SubtargetPredicate = isGFX12Plus in {
let SubtargetPredicate = isGFX1250Plus in {
+defm GLOBAL_LOAD_ASYNC_TO_LDS_B8 : FLAT_Global_Load_LDS_Pseudo<"global_load_async_to_lds_b8", 1>;
+defm GLOBAL_LOAD_ASYNC_TO_LDS_B32 : FLAT_Global_Load_LDS_Pseudo<"global_load_async_to_lds_b32", 1>;
+defm GLOBAL_LOAD_ASYNC_TO_LDS_B64 : FLAT_Global_Load_LDS_Pseudo<"global_load_async_to_lds_b64", 1>;
+defm GLOBAL_LOAD_ASYNC_TO_LDS_B128 : FLAT_Global_Load_LDS_Pseudo<"global_load_async_to_lds_b128", 1>;
+defm GLOBAL_STORE_ASYNC_FROM_LDS_B8 : FLAT_Global_STORE_LDS_Pseudo<"global_store_async_from_lds_b8">;
+defm GLOBAL_STORE_ASYNC_FROM_LDS_B32 : FLAT_Global_STORE_LDS_Pseudo<"global_store_async_from_lds_b32">;
+defm GLOBAL_STORE_ASYNC_FROM_LDS_B64 : FLAT_Global_STORE_LDS_Pseudo<"global_store_async_from_lds_b64">;
+defm GLOBAL_STORE_ASYNC_FROM_LDS_B128 : FLAT_Global_STORE_LDS_Pseudo<"global_store_async_from_lds_b128">;
+
def TENSOR_SAVE : FLAT_Global_Tensor_Pseudo<"tensor_save", 1>;
def TENSOR_STOP : FLAT_Global_Tensor_Pseudo<"tensor_stop">;
} // End SubtargetPredicate = isGFX1250Plus
@@ -3374,6 +3420,15 @@ defm GLOBAL_LOAD_MONITOR_B32 : VFLAT_Real_AllAddr_gfx1250<0x070>;
defm GLOBAL_LOAD_MONITOR_B64 : VFLAT_Real_AllAddr_gfx1250<0x071>;
defm GLOBAL_LOAD_MONITOR_B128 : VFLAT_Real_AllAddr_gfx1250<0x072>;
+defm GLOBAL_LOAD_ASYNC_TO_LDS_B8 : VFLAT_Real_AllAddr_gfx1250<0x5f>;
+defm GLOBAL_LOAD_ASYNC_TO_LDS_B32 : VFLAT_Real_AllAddr_gfx1250<0x60>;
+defm GLOBAL_LOAD_ASYNC_TO_LDS_B64 : VFLAT_Real_AllAddr_gfx1250<0x61>;
+defm GLOBAL_LOAD_ASYNC_TO_LDS_B128 : VFLAT_Real_AllAddr_gfx1250<0x62>;
+defm GLOBAL_STORE_ASYNC_FROM_LDS_B8 : VFLAT_Real_AllAddr_gfx1250<0x63>;
+defm GLOBAL_STORE_ASYNC_FROM_LDS_B32 : VFLAT_Real_AllAddr_gfx1250<0x64>;
+defm GLOBAL_STORE_ASYNC_FROM_LDS_B64 : VFLAT_Real_AllAddr_gfx1250<0x65>;
+defm GLOBAL_STORE_ASYNC_FROM_LDS_B128 : VFLAT_Real_AllAddr_gfx1250<0x66>;
+
defm GLOBAL_LOAD_TR_B128_w32 : VFLAT_Real_AllAddr_gfx1250<0x057, "global_load_tr16_b128">;
defm GLOBAL_LOAD_TR_B64_w32 : VFLAT_Real_AllAddr_gfx1250<0x058, "global_load_tr8_b64">;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index b5b3cc97569ed..83e63ac4e9777 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -732,7 +732,14 @@ bool isGenericAtomic(unsigned Opc) {
}
bool isAsyncStore(unsigned Opc) {
- return false; // placeholder before async store implementation.
+ return Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B8_gfx1250 ||
+ Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B32_gfx1250 ||
+ Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B64_gfx1250 ||
+ Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B128_gfx1250 ||
+ Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B8_SADDR_gfx1250 ||
+ Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B32_SADDR_gfx1250 ||
+ Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B64_SADDR_gfx1250 ||
+ Opc == GLOBAL_STORE_ASYNC_FROM_LDS_B128_SADDR_gfx1250;
}
bool isTensorStore(unsigned Opc) {
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s
index b9eb2d2dc8c70..c5288a76e5721 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s
@@ -249,6 +249,250 @@ flat_load_monitor_b32 v1, v[2:3] offset:64
// GFX1250: flat_load_monitor_b32 v1, v[2:3] offset:64 ; encoding: [0x7c,0x00,0x1c,0xec,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00]
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+flat_load_monitor_b32 v1, v[2:3] offset:-64 th:TH_LOAD_BYPASS scope:SCOPE_SYS
+// GFX1250: flat_load_monitor_b32 v1, v[2:3] offset:-64 th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0x00,0x1c,0xec,0x01,0x00,0x3c,0x00,0x02,0xc0,0xff,0xff]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+flat_load_monitor_b64 v[0:1], v[2:3]
+// GFX1250: flat_load_monitor_b64 v[0:1], v[2:3] ; encoding: [0x7c,0x40,0x1c,0xec,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+flat_load_monitor_b64 v[0:1], v[2:3] offset:64
+// GFX1250: flat_load_monitor_b64 v[0:1], v[2:3] offset:64 ; encoding: [0x7c,0x40,0x1c,0xec,0x00,0x00,0x00,0x00,0x02,0x40,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+flat_load_monitor_b64 v[0:1], v[2:3] offset:-64 th:TH_LOAD_BYPASS scope:SCOPE_SYS
+// GFX1250: flat_load_monitor_b64 v[0:1], v[2:3] offset:-64 th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0x40,0x1c,0xec,0x00,0x00,0x3c,0x00,0x02,0xc0,0xff,0xff]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+flat_load_monitor_b128 v[0:3], v[4:5]
+// GFX1250: flat_load_monitor_b128 v[0:3], v[4:5] ; encoding: [0x7c,0x80,0x1c,0xec,0x00,0x00,0x00,0x00,0x04,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+flat_load_monitor_b128 v[0:3], v[4:5] offset:64
+// GFX1250: flat_load_monitor_b128 v[0:3], v[4:5] offset:64 ; encoding: [0x7c,0x80,0x1c,0xec,0x00,0x00,0x00,0x00,0x04,0x40,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+flat_load_monitor_b128 v[0:3], v[4:5] offset:-64 th:TH_LOAD_BYPASS scope:SCOPE_SYS
+// GFX1250: flat_load_monitor_b128 v[0:3], v[4:5] offset:-64 th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0x80,0x1c,0xec,0x00,0x00,0x3c,0x00,0x04,0xc0,0xff,0xff]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+flat_load_monitor_b32 v1, v2, s[4:5] offset:64 scale_offset
+// GFX1250: flat_load_monitor_b32 v1, v2, s[4:5] offset:64 scale_offset ; encoding: [0x04,0x00,0x1c,0xec,0x01,0x00,0x01,0x00,0x02,0x40,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+flat_load_monitor_b64 v[2:3], v2, s[4:5] offset:64 scale_offset
+// GFX1250: flat_load_monitor_b64 v[2:3], v2, s[4:5] offset:64 scale_offset ; encoding: [0x04,0x40,0x1c,0xec,0x02,0x00,0x01,0x00,0x02,0x40,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+global_store_async_from_lds_b8 v[2:3], v1, off th:TH_STORE_BYPASS scope:SCOPE_SYS
+// GFX1250: global_store_async_from_lds_b8 v[2:3], v1, off th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0xc0,0x18,0xee,0x00,0x00,0xbc,0x00,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+global_store_async_from_lds_b8 v[2:3], v1, off offset:64
+// GFX1250: global_store_async_from_lds_b8 v[2:3], v1, off offset:64 ; encoding: [0x7c,0xc0,0x18,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+global_store_async_from_lds_b8 v[2:3], v1, off offset:-64
+// GFX1250: global_store_async_from_lds_b8 v[2:3], v1, off offset:-64 ; encoding: [0x7c,0xc0,0x18,0xee,0x00,0x00,0x80,0x00,0x02,0xc0,0xff,0xff]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+global_store_async_from_lds_b8 v2, v1, s[2:3] th:TH_STORE_NT_HT scope:SCOPE_DEV
+// GFX1250: global_store_async_from_lds_b8 v2, v1, s[2:3] th:TH_STORE_NT_HT scope:SCOPE_DEV ; encoding: [0x02,0xc0,0x18,0xee,0x00,0x00,0xe8,0x00,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+global_store_async_from_lds_b8 v2, v1, s[2:3] offset:64
+// GFX1250: global_store_async_from_lds_b8 v2, v1, s[2:3] offset:64 ; encoding: [0x02,0xc0,0x18,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+global_store_async_from_lds_b8 v2, v1, s[2:3] offset:-64
+// GFX1250: global_store_async_from_lds_b8 v2, v1, s[2:3] offset:-64 ; encoding: [0x02,0xc0,0x18,0xee,0x00,0x00,0x80,0x00,0x02,0xc0,0xff,0xff]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+global_store_async_from_lds_b32 v[2:3], v1, off th:TH_STORE_BYPASS scope:SCOPE_SYS
+// GFX1250: global_store_async_from_lds_b32 v[2:3], v1, off th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0x00,0x19,0xee,0x00,0x00,0xbc,0x00,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+global_store_async_from_lds_b32 v[2:3], v1, off offset:64
+// GFX1250: global_store_async_from_lds_b32 v[2:3], v1, off offset:64 ; encoding: [0x7c,0x00,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+global_store_async_from_lds_b32 v[2:3], v1, off offset:-64
+// GFX1250: global_store_async_from_lds_b32 v[2:3], v1, off offset:-64 ; encoding: [0x7c,0x00,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0xc0,0xff,0xff]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+global_store_async_from_lds_b32 v2, v1, s[2:3] th:TH_STORE_NT_HT scope:SCOPE_DEV
+// GFX1250: global_store_async_from_lds_b32 v2, v1, s[2:3] th:TH_STORE_NT_HT scope:SCOPE_DEV ; encoding: [0x02,0x00,0x19,0xee,0x00,0x00,0xe8,0x00,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+global_store_async_from_lds_b32 v2, v1, s[2:3] offset:64
+// GFX1250: global_store_async_from_lds_b32 v2, v1, s[2:3] offset:64 ; encoding: [0x02,0x00,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+global_store_async_from_lds_b32 v2, v1, s[2:3] offset:-64
+// GFX1250: global_store_async_from_lds_b32 v2, v1, s[2:3] offset:-64 ; encoding: [0x02,0x00,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0xc0,0xff,0xff]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+global_store_async_from_lds_b64 v[2:3], v1, off th:TH_STORE_BYPASS scope:SCOPE_SYS
+// GFX1250: global_store_async_from_lds_b64 v[2:3], v1, off th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0x40,0x19,0xee,0x00,0x00,0xbc,0x00,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+global_store_async_from_lds_b64 v[2:3], v1, off offset:64
+// GFX1250: global_store_async_from_lds_b64 v[2:3], v1, off offset:64 ; encoding: [0x7c,0x40,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+global_store_async_from_lds_b64 v[2:3], v1, off offset:-64
+// GFX1250: global_store_async_from_lds_b64 v[2:3], v1, off offset:-64 ; encoding: [0x7c,0x40,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0xc0,0xff,0xff]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+global_store_async_from_lds_b64 v2, v1, s[2:3] th:TH_STORE_NT_HT scope:SCOPE_DEV
+// GFX1250: global_store_async_from_lds_b64 v2, v1, s[2:3] th:TH_STORE_NT_HT scope:SCOPE_DEV ; encoding: [0x02,0x40,0x19,0xee,0x00,0x00,0xe8,0x00,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+global_store_async_from_lds_b64 v2, v1, s[2:3] offset:64
+// GFX1250: global_store_async_from_lds_b64 v2, v1, s[2:3] offset:64 ; encoding: [0x02,0x40,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+global_store_async_from_lds_b64 v2, v1, s[2:3] offset:-64
+// GFX1250: global_store_async_from_lds_b64 v2, v1, s[2:3] offset:-64 ; encoding: [0x02,0x40,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0xc0,0xff,0xff]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+global_store_async_from_lds_b128 v[2:3], v1, off th:TH_STORE_BYPASS scope:SCOPE_SYS
+// GFX1250: global_store_async_from_lds_b128 v[2:3], v1, off th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0x80,0x19,0xee,0x00,0x00,0xbc,0x00,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+global_store_async_from_lds_b128 v[2:3], v1, off offset:64
+// GFX1250: global_store_async_from_lds_b128 v[2:3], v1, off offset:64 ; encoding: [0x7c,0x80,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+global_store_async_from_lds_b128 v[2:3], v1, off offset:-64
+// GFX1250: global_store_async_from_lds_b128 v[2:3], v1, off offset:-64 ; encoding: [0x7c,0x80,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0xc0,0xff,0xff]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+global_store_async_from_lds_b128 v2, v1, s[2:3] th:TH_STORE_NT_HT scope:SCOPE_DEV
+// GFX1250: global_store_async_from_lds_b128 v2, v1, s[2:3] th:TH_STORE_NT_HT scope:SCOPE_DEV ; encoding: [0x02,0x80,0x19,0xee,0x00,0x00,0xe8,0x00,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+global_store_async_from_lds_b128 v2, v1, s[2:3] offset:64
+// GFX1250: global_store_async_from_lds_b128 v2, v1, s[2:3] offset:64 ; encoding: [0x02,0x80,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+global_store_async_from_lds_b128 v2, v1, s[2:3] offset:-64
+// GFX1250: global_store_async_from_lds_b128 v2, v1, s[2:3] offset:-64 ; encoding: [0x02,0x80,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0xc0,0xff,0xff]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+global_store_async_from_lds_b32 v2, v1, s[4:5] scale_offset th:TH_STORE_BYPASS scope:SCOPE_SYS
+// GFX1250: global_store_async_from_lds_b32 v2, v1, s[4:5] scale_offset th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x04,0x00,0x19,0xee,0x00,0x00,0xbd,0x00,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+global_store_async_from_lds_b64 v2, v1, s[4:5] scale_offset th:TH_STORE_BYPASS scope:SCOPE_SYS
+// GFX1250: global_store_async_from_lds_b64 v2, v1, s[4:5] scale_offset th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x04,0x40,0x19,0xee,0x00,0x00,0xbd,0x00,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+global_load_async_to_lds_b8 v1, v[2:3], off th:TH_LOAD_BYPASS scope:SCOPE_SYS
+// GFX1250: global_load_async_to_lds_b8 v1, v[2:3], off th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0xc0,0x17,0xee,0x01,0x00,0x3c,0x00,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+global_load_async_to_lds_b8 v1, v[2:3], off offset:64
+// GFX1250: global_load_async_to_lds_b8 v1, v[2:3], off offset:64 ; encoding: [0x7c,0xc0,0x17,0xee,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+global_load_async_to_lds_b8 v1, v[2:3], off offset:-64
+// GFX1250: global_load_async_to_lds_b8 v1, v[2:3], off offset:-64 ; encoding: [0x7c,0xc0,0x17,0xee,0x01,0x00,0x00,0x00,0x02,0xc0,0xff,0xff]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+global_load_async_to_lds_b8 v1, v2, s[2:3] th:TH_LOAD_NT_HT scope:SCOPE_DEV
+// GFX1250: global_load_async_to_lds_b8 v1, v2, s[2:3] th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x02,0xc0,0x17,0xee,0x01,0x00,0x68,0x00,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+global_load_async_to_lds_b8 v1, v2, s[2:3] offset:64
+// GFX1250: global_load_async_to_lds_b8 v1, v2, s[2:3] offset:64 ; encoding: [0x02,0xc0,0x17,0xee,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+global_load_async_to_lds_b8 v1, v2, s[2:3] offset:-64
+// GFX1250: global_load_async_to_lds_b8 v1, v2, s[2:3] offset:-64 ; encoding: [0x02,0xc0,0x17,0xee,0x01,0x00,0x00,0x00,0x02,0xc0,0xff,0xff]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+global_load_async_to_lds_b32 v1, v[2:3], off th:TH_LOAD_BYPASS scope:SCOPE_SYS
+// GFX1250: global_load_async_to_lds_b32 v1, v[2:3], off th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0x00,0x18,0xee,0x01,0x00,0x3c,0x00,0x02,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+global_load_async_to_lds_b32 v1, v[2:3], off offset:64
+// GFX1250: global_load_async_to_lds_b32 v1, v[2:3], off offset:64 ; encoding: [0x7c,0x00,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+global_load_async_to_lds_b32 v1, v[2:3], off offset:-64
+// GFX1250: global_load_async_to_lds_b32 v1, v[2:3], off offset:-64 ; encoding: [0x7c,0x00,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0xc0,0xff,0xff]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+global_load_async_to_lds_b32 v1, v2, s[2:3] th:TH_LOAD_NT_HT scope:SCOPE_DEV
+// GFX1250: global_load_async_to_lds_b32 v1, v2, s[2:3] th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x02,0...
[truncated]
|
shiltian
approved these changes
Jul 28, 2025
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
No description provided.