Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -913,6 +913,8 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,

addRulesForGOpcs({G_ABS}, Standard).Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt}});

addRulesForGOpcs({G_FENCE}).Any({{{}}, {{}, {}}});

addRulesForGOpcs({G_READSTEADYCOUNTER, G_READCYCLECOUNTER}, Standard)
.Uni(S64, {{Sgpr64}, {}});

Expand Down
388 changes: 388 additions & 0 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/fence.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,388 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s

@global_var = addrspace(1) global i32 0, align 4

define amdgpu_kernel void @fence_release(ptr addrspace(1) %ptr, i32 %val1, i32 %val2) {
; GFX9-LABEL: fence_release:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, global_var@gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, global_var@gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v1, s2
; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9-NEXT: v_mov_b32_e32 v1, s3
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dword v0, v1, s[4:5]
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
;
; GFX11-LABEL: fence_release:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
; GFX11-NEXT: s_getpc_b64 s[4:5]
; GFX11-NEXT: s_add_u32 s4, s4, global_var@gotpcrel32@lo+4
; GFX11-NEXT: s_addc_u32 s5, s5, global_var@gotpcrel32@hi+12
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v1, s2 :: v_dual_mov_b32 v2, s3
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: global_store_b32 v0, v2, s[4:5] dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: fence_release:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
; GFX12-NEXT: s_getpc_b64 s[4:5]
; GFX12-NEXT: s_sext_i32_i16 s5, s5
; GFX12-NEXT: s_add_co_u32 s4, s4, global_var@gotpcrel32@lo+8
; GFX12-NEXT: s_add_co_ci_u32 s5, s5, global_var@gotpcrel32@hi+16
; GFX12-NEXT: v_mov_b32_e32 v0, 0
; GFX12-NEXT: s_load_b64 s[4:5], s[4:5], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_dual_mov_b32 v1, s2 :: v_dual_mov_b32 v2, s3
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-NEXT: global_wb scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: global_store_b32 v0, v2, s[4:5] scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: s_endpgm
store i32 %val1, ptr addrspace(1) %ptr
fence release
store volatile i32 %val2, ptr addrspace(1) @global_var
ret void
}

define amdgpu_kernel void @fence_acquire(ptr addrspace(1) %ptr) {
; GFX9-LABEL: fence_acquire:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: buffer_wbinvl1_vol
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_load_dword s2, s[0:1], 0x0
; GFX9-NEXT: s_getpc_b64 s[0:1]
; GFX9-NEXT: s_add_u32 s0, s0, global_var@gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, global_var@gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s2
; GFX9-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
;
; GFX11-LABEL: fence_acquire:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: v_mov_b32_e32 v1, 0
; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x0
; GFX11-NEXT: s_getpc_b64 s[0:1]
; GFX11-NEXT: s_add_u32 s0, s0, global_var@gotpcrel32@lo+4
; GFX11-NEXT: s_addc_u32 s1, s1, global_var@gotpcrel32@hi+12
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, s2
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: fence_acquire:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: v_mov_b32_e32 v1, 0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_b32 s2, s[0:1], 0x0
; GFX12-NEXT: s_getpc_b64 s[0:1]
; GFX12-NEXT: s_sext_i32_i16 s1, s1
; GFX12-NEXT: s_add_co_u32 s0, s0, global_var@gotpcrel32@lo+8
; GFX12-NEXT: s_add_co_ci_u32 s1, s1, global_var@gotpcrel32@hi+16
; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_mov_b32_e32 v0, s2
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_store_b32 v1, v0, s[0:1] scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: s_endpgm
fence acquire
%val = load i32, ptr addrspace(1) %ptr
store volatile i32 %val, ptr addrspace(1) @global_var
ret void
}

define amdgpu_kernel void @fence_acq_rel(ptr addrspace(1) %ptr, i32 %val) {
; GFX9-LABEL: fence_acq_rel:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dword s2, s[8:9], 0x8
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v1, s2
; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_wbinvl1_vol
; GFX9-NEXT: global_load_dword v1, v0, s[0:1]
; GFX9-NEXT: s_getpc_b64 s[0:1]
; GFX9-NEXT: s_add_u32 s0, s0, global_var@gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, global_var@gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
;
; GFX11-LABEL: fence_acq_rel:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-NEXT: s_getpc_b64 s[0:1]
; GFX11-NEXT: s_add_u32 s0, s0, global_var@gotpcrel32@lo+4
; GFX11-NEXT: s_addc_u32 s1, s1, global_var@gotpcrel32@hi+12
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: fence_acq_rel:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b96 s[0:2], s[4:5], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-NEXT: global_wb scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX12-NEXT: s_getpc_b64 s[0:1]
; GFX12-NEXT: s_sext_i32_i16 s1, s1
; GFX12-NEXT: s_add_co_u32 s0, s0, global_var@gotpcrel32@lo+8
; GFX12-NEXT: s_add_co_ci_u32 s1, s1, global_var@gotpcrel32@hi+16
; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: s_endpgm
store i32 %val, ptr addrspace(1) %ptr
fence acq_rel
%load = load i32, ptr addrspace(1) %ptr
store volatile i32 %load, ptr addrspace(1) @global_var
ret void
}

define amdgpu_kernel void @fence_seq_cst(ptr addrspace(1) %ptr, i32 %val) {
; GFX9-LABEL: fence_seq_cst:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dword s2, s[8:9], 0x8
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v1, s2
; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_wbinvl1_vol
; GFX9-NEXT: global_load_dword v1, v0, s[0:1]
; GFX9-NEXT: s_getpc_b64 s[0:1]
; GFX9-NEXT: s_add_u32 s0, s0, global_var@gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, global_var@gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
;
; GFX11-LABEL: fence_seq_cst:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-NEXT: s_getpc_b64 s[0:1]
; GFX11-NEXT: s_add_u32 s0, s0, global_var@gotpcrel32@lo+4
; GFX11-NEXT: s_addc_u32 s1, s1, global_var@gotpcrel32@hi+12
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: fence_seq_cst:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b96 s[0:2], s[4:5], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-NEXT: global_wb scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_SYS
; GFX12-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX12-NEXT: s_getpc_b64 s[0:1]
; GFX12-NEXT: s_sext_i32_i16 s1, s1
; GFX12-NEXT: s_add_co_u32 s0, s0, global_var@gotpcrel32@lo+8
; GFX12-NEXT: s_add_co_ci_u32 s1, s1, global_var@gotpcrel32@hi+16
; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: s_endpgm
store i32 %val, ptr addrspace(1) %ptr
fence seq_cst
%load = load i32, ptr addrspace(1) %ptr
store volatile i32 %load, ptr addrspace(1) @global_var
ret void
}

define amdgpu_kernel void @fence_workgroup_release(ptr addrspace(3) %ptr, i32 %val1, i32 %val2) {
; GFX9-LABEL: fence_workgroup_release:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, global_var@gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, global_var@gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s1
; GFX9-NEXT: v_mov_b32_e32 v1, s0
; GFX9-NEXT: ds_write_b32 v1, v0
; GFX9-NEXT: v_mov_b32_e32 v0, s2
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_store_dword v1, v0, s[4:5]
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
;
; GFX11-LABEL: fence_workgroup_release:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
; GFX11-NEXT: s_getpc_b64 s[4:5]
; GFX11-NEXT: s_add_u32 s4, s4, global_var@gotpcrel32@lo+4
; GFX11-NEXT: s_addc_u32 s5, s5, global_var@gotpcrel32@hi+12
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0
; GFX11-NEXT: v_mov_b32_e32 v3, s2
; GFX11-NEXT: ds_store_b32 v1, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v2, v3, s[4:5] dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: fence_workgroup_release:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b96 s[0:2], s[4:5], 0x0
; GFX12-NEXT: s_getpc_b64 s[4:5]
; GFX12-NEXT: s_sext_i32_i16 s5, s5
; GFX12-NEXT: s_add_co_u32 s4, s4, global_var@gotpcrel32@lo+8
; GFX12-NEXT: s_add_co_ci_u32 s5, s5, global_var@gotpcrel32@hi+16
; GFX12-NEXT: v_mov_b32_e32 v2, 0
; GFX12-NEXT: s_load_b64 s[4:5], s[4:5], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0
; GFX12-NEXT: v_mov_b32_e32 v3, s2
; GFX12-NEXT: ds_store_b32 v1, v0
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: global_store_b32 v2, v3, s[4:5] scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: s_endpgm
store i32 %val1, ptr addrspace(3) %ptr
fence syncscope("workgroup") release
store volatile i32 %val2, ptr addrspace(1) @global_var
ret void
}

define amdgpu_kernel void @fence_agent_seq_cst(ptr addrspace(1) %ptr, i32 %val) {
; GFX9-LABEL: fence_agent_seq_cst:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dword s2, s[8:9], 0x8
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v1, s2
; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_wbinvl1_vol
; GFX9-NEXT: global_load_dword v1, v0, s[0:1]
; GFX9-NEXT: s_getpc_b64 s[0:1]
; GFX9-NEXT: s_add_u32 s0, s0, global_var@gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s1, s1, global_var@gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
;
; GFX11-LABEL: fence_agent_seq_cst:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: buffer_gl1_inv
; GFX11-NEXT: buffer_gl0_inv
; GFX11-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX11-NEXT: s_getpc_b64 s[0:1]
; GFX11-NEXT: s_add_u32 s0, s0, global_var@gotpcrel32@lo+4
; GFX11-NEXT: s_addc_u32 s1, s1, global_var@gotpcrel32@hi+12
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: fence_agent_seq_cst:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b96 s[0:2], s[4:5], 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: global_inv scope:SCOPE_DEV
; GFX12-NEXT: global_load_b32 v1, v0, s[0:1]
; GFX12-NEXT: s_getpc_b64 s[0:1]
; GFX12-NEXT: s_sext_i32_i16 s1, s1
; GFX12-NEXT: s_add_co_u32 s0, s0, global_var@gotpcrel32@lo+8
; GFX12-NEXT: s_add_co_ci_u32 s1, s1, global_var@gotpcrel32@hi+16
; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
; GFX12-NEXT: s_wait_storecnt 0x0
; GFX12-NEXT: s_endpgm
store i32 %val, ptr addrspace(1) %ptr
fence syncscope("agent") seq_cst
%load = load i32, ptr addrspace(1) %ptr
store volatile i32 %load, ptr addrspace(1) @global_var
ret void
}
Loading