@@ -13,9 +13,8 @@ define amdgpu_kernel void @global_load_tr_b64(ptr addrspace(1) %addr, ptr addrsp
1313; GFX12-SDAG-W32-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
1414; GFX12-SDAG-W32-NEXT: v_mov_b32_e32 v2, 0
1515; GFX12-SDAG-W32-NEXT: s_wait_kmcnt 0x0
16- ; GFX12-SDAG-W32-NEXT: global_load_tr_b64 v[0:1], v2, s[0:1] offset:32 th:TH_LOAD_NT
16+ ; GFX12-SDAG-W32-NEXT: global_load_tr_b64 v[0:1], v2, s[0:1] offset:32
1717; GFX12-SDAG-W32-NEXT: s_wait_loadcnt 0x0
18- ; GFX12-SDAG-W32-NEXT: global_inv scope:SCOPE_SYS
1918; GFX12-SDAG-W32-NEXT: global_store_b64 v2, v[0:1], s[2:3]
2019; GFX12-SDAG-W32-NEXT: s_nop 0
2120; GFX12-SDAG-W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -26,9 +25,8 @@ define amdgpu_kernel void @global_load_tr_b64(ptr addrspace(1) %addr, ptr addrsp
2625; GFX12-GISEL-W32-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
2726; GFX12-GISEL-W32-NEXT: v_mov_b32_e32 v2, 0
2827; GFX12-GISEL-W32-NEXT: s_wait_kmcnt 0x0
29- ; GFX12-GISEL-W32-NEXT: global_load_tr_b64 v[0:1], v2, s[0:1] offset:32 th:TH_LOAD_NT
28+ ; GFX12-GISEL-W32-NEXT: global_load_tr_b64 v[0:1], v2, s[0:1] offset:32
3029; GFX12-GISEL-W32-NEXT: s_wait_loadcnt 0x0
31- ; GFX12-GISEL-W32-NEXT: global_inv scope:SCOPE_SYS
3230; GFX12-GISEL-W32-NEXT: global_store_b64 v2, v[0:1], s[2:3]
3331; GFX12-GISEL-W32-NEXT: s_nop 0
3432; GFX12-GISEL-W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -46,9 +44,8 @@ define amdgpu_kernel void @global_load_tr_b128_i16(ptr addrspace(1) %addr, ptr a
4644; GFX12-SDAG-W32-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
4745; GFX12-SDAG-W32-NEXT: v_mov_b32_e32 v4, 0
4846; GFX12-SDAG-W32-NEXT: s_wait_kmcnt 0x0
49- ; GFX12-SDAG-W32-NEXT: global_load_tr_b128 v[0:3], v4, s[0:1] offset:32 th:TH_LOAD_NT
47+ ; GFX12-SDAG-W32-NEXT: global_load_tr_b128 v[0:3], v4, s[0:1] offset:32
5048; GFX12-SDAG-W32-NEXT: s_wait_loadcnt 0x0
51- ; GFX12-SDAG-W32-NEXT: global_inv scope:SCOPE_SYS
5249; GFX12-SDAG-W32-NEXT: global_store_b128 v4, v[0:3], s[2:3]
5350; GFX12-SDAG-W32-NEXT: s_nop 0
5451; GFX12-SDAG-W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -59,9 +56,8 @@ define amdgpu_kernel void @global_load_tr_b128_i16(ptr addrspace(1) %addr, ptr a
5956; GFX12-GISEL-W32-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
6057; GFX12-GISEL-W32-NEXT: v_mov_b32_e32 v4, 0
6158; GFX12-GISEL-W32-NEXT: s_wait_kmcnt 0x0
62- ; GFX12-GISEL-W32-NEXT: global_load_tr_b128 v[0:3], v4, s[0:1] offset:32 th:TH_LOAD_NT
59+ ; GFX12-GISEL-W32-NEXT: global_load_tr_b128 v[0:3], v4, s[0:1] offset:32
6360; GFX12-GISEL-W32-NEXT: s_wait_loadcnt 0x0
64- ; GFX12-GISEL-W32-NEXT: global_inv scope:SCOPE_SYS
6561; GFX12-GISEL-W32-NEXT: global_store_b128 v4, v[0:3], s[2:3]
6662; GFX12-GISEL-W32-NEXT: s_nop 0
6763; GFX12-GISEL-W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -79,9 +75,8 @@ define amdgpu_kernel void @global_load_tr_b128_half(ptr addrspace(1) %addr, ptr
7975; GFX12-SDAG-W32-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
8076; GFX12-SDAG-W32-NEXT: v_mov_b32_e32 v4, 0
8177; GFX12-SDAG-W32-NEXT: s_wait_kmcnt 0x0
82- ; GFX12-SDAG-W32-NEXT: global_load_tr_b128 v[0:3], v4, s[0:1] offset:32 th:TH_LOAD_NT
78+ ; GFX12-SDAG-W32-NEXT: global_load_tr_b128 v[0:3], v4, s[0:1] offset:32
8379; GFX12-SDAG-W32-NEXT: s_wait_loadcnt 0x0
84- ; GFX12-SDAG-W32-NEXT: global_inv scope:SCOPE_SYS
8580; GFX12-SDAG-W32-NEXT: global_store_b128 v4, v[0:3], s[2:3]
8681; GFX12-SDAG-W32-NEXT: s_nop 0
8782; GFX12-SDAG-W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -92,9 +87,8 @@ define amdgpu_kernel void @global_load_tr_b128_half(ptr addrspace(1) %addr, ptr
9287; GFX12-GISEL-W32-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
9388; GFX12-GISEL-W32-NEXT: v_mov_b32_e32 v4, 0
9489; GFX12-GISEL-W32-NEXT: s_wait_kmcnt 0x0
95- ; GFX12-GISEL-W32-NEXT: global_load_tr_b128 v[0:3], v4, s[0:1] offset:32 th:TH_LOAD_NT
90+ ; GFX12-GISEL-W32-NEXT: global_load_tr_b128 v[0:3], v4, s[0:1] offset:32
9691; GFX12-GISEL-W32-NEXT: s_wait_loadcnt 0x0
97- ; GFX12-GISEL-W32-NEXT: global_inv scope:SCOPE_SYS
9892; GFX12-GISEL-W32-NEXT: global_store_b128 v4, v[0:3], s[2:3]
9993; GFX12-GISEL-W32-NEXT: s_nop 0
10094; GFX12-GISEL-W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -112,9 +106,8 @@ define amdgpu_kernel void @global_load_tr_b128_bfloat(ptr addrspace(1) %addr, pt
112106; GFX12-SDAG-W32-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
113107; GFX12-SDAG-W32-NEXT: v_mov_b32_e32 v4, 0
114108; GFX12-SDAG-W32-NEXT: s_wait_kmcnt 0x0
115- ; GFX12-SDAG-W32-NEXT: global_load_tr_b128 v[0:3], v4, s[0:1] offset:32 th:TH_LOAD_NT
109+ ; GFX12-SDAG-W32-NEXT: global_load_tr_b128 v[0:3], v4, s[0:1] offset:32
116110; GFX12-SDAG-W32-NEXT: s_wait_loadcnt 0x0
117- ; GFX12-SDAG-W32-NEXT: global_inv scope:SCOPE_SYS
118111; GFX12-SDAG-W32-NEXT: global_store_b128 v4, v[0:3], s[2:3]
119112; GFX12-SDAG-W32-NEXT: s_nop 0
120113; GFX12-SDAG-W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -125,9 +118,8 @@ define amdgpu_kernel void @global_load_tr_b128_bfloat(ptr addrspace(1) %addr, pt
125118; GFX12-GISEL-W32-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
126119; GFX12-GISEL-W32-NEXT: v_mov_b32_e32 v4, 0
127120; GFX12-GISEL-W32-NEXT: s_wait_kmcnt 0x0
128- ; GFX12-GISEL-W32-NEXT: global_load_tr_b128 v[0:3], v4, s[0:1] offset:32 th:TH_LOAD_NT
121+ ; GFX12-GISEL-W32-NEXT: global_load_tr_b128 v[0:3], v4, s[0:1] offset:32
129122; GFX12-GISEL-W32-NEXT: s_wait_loadcnt 0x0
130- ; GFX12-GISEL-W32-NEXT: global_inv scope:SCOPE_SYS
131123; GFX12-GISEL-W32-NEXT: global_store_b128 v4, v[0:3], s[2:3]
132124; GFX12-GISEL-W32-NEXT: s_nop 0
133125; GFX12-GISEL-W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
0 commit comments