@@ -768,17 +768,19 @@ define void @test5_s_barrier_init_m0(i32 %arg1 ,i32 %arg2) {
768768}
769769
770770define amdgpu_kernel void @test1_s_barrier_join (ptr addrspace (1 ) %out ) #0 {
771+ ;
771772; GFX12-SDAG-LABEL: test1_s_barrier_join:
772773; GFX12-SDAG: ; %bb.0: ; %entry
773774; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
774775; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
775- ; GFX12-SDAG-NEXT: s_barrier_join -1
776- ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
776+ ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
777+ ; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 2, v0
777778; GFX12-SDAG-NEXT: v_mul_u32_u24_e32 v1, v0, v0
778- ; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0
779779; GFX12-SDAG-NEXT: v_sub_nc_u32_e32 v0, v1, v0
780780; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
781- ; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
781+ ; GFX12-SDAG-NEXT: global_store_b32 v3, v2, s[0:1]
782+ ; GFX12-SDAG-NEXT: s_barrier_join -1
783+ ; GFX12-SDAG-NEXT: global_store_b32 v3, v0, s[0:1]
782784; GFX12-SDAG-NEXT: s_nop 0
783785; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
784786; GFX12-SDAG-NEXT: s_endpgm
@@ -810,17 +812,19 @@ entry:
810812}
811813
812814define amdgpu_kernel void @test2_s_barrier_join (ptr addrspace (1 ) %out ) #0 {
815+ ;
813816; GFX12-SDAG-LABEL: test2_s_barrier_join:
814817; GFX12-SDAG: ; %bb.0: ; %entry
815818; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
816819; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
817- ; GFX12-SDAG-NEXT: s_barrier_join 1
818- ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
820+ ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
821+ ; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 2, v0
819822; GFX12-SDAG-NEXT: v_mul_u32_u24_e32 v1, v0, v0
820- ; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0
821823; GFX12-SDAG-NEXT: v_sub_nc_u32_e32 v0, v1, v0
822824; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
823- ; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
825+ ; GFX12-SDAG-NEXT: global_store_b32 v3, v2, s[0:1]
826+ ; GFX12-SDAG-NEXT: s_barrier_join 1
827+ ; GFX12-SDAG-NEXT: global_store_b32 v3, v0, s[0:1]
824828; GFX12-SDAG-NEXT: s_nop 0
825829; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
826830; GFX12-SDAG-NEXT: s_endpgm
@@ -852,17 +856,19 @@ entry:
852856}
853857
854858define amdgpu_kernel void @test3_s_barrier_join (ptr addrspace (1 ) %out ) #0 {
859+ ;
855860; GFX12-SDAG-LABEL: test3_s_barrier_join:
856861; GFX12-SDAG: ; %bb.0: ; %entry
857862; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
858863; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
859- ; GFX12-SDAG-NEXT: s_barrier_join 0
860- ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
864+ ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
865+ ; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 2, v0
861866; GFX12-SDAG-NEXT: v_mul_u32_u24_e32 v1, v0, v0
862- ; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0
863867; GFX12-SDAG-NEXT: v_sub_nc_u32_e32 v0, v1, v0
864868; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
865- ; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
869+ ; GFX12-SDAG-NEXT: global_store_b32 v3, v2, s[0:1]
870+ ; GFX12-SDAG-NEXT: s_barrier_join 0
871+ ; GFX12-SDAG-NEXT: global_store_b32 v3, v0, s[0:1]
866872; GFX12-SDAG-NEXT: s_nop 0
867873; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
868874; GFX12-SDAG-NEXT: s_endpgm
@@ -967,6 +973,20 @@ define void @test5_s_barrier_join_m0(i32 %arg) {
967973 ret void
968974}
969975
976+ define void @test6_s_barrier_join_0 () {
977+ ; GFX12-LABEL: test6_s_barrier_join_0:
978+ ; GFX12: ; %bb.0:
979+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
980+ ; GFX12-NEXT: s_wait_expcnt 0x0
981+ ; GFX12-NEXT: s_wait_samplecnt 0x0
982+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
983+ ; GFX12-NEXT: s_wait_kmcnt 0x0
984+ ; GFX12-NEXT: s_barrier_join 0
985+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
986+ call void @llvm.amdgcn.s.barrier.join (i32 0 )
987+ ret void
988+ }
989+
970990define amdgpu_kernel void @test1_s_barrier_leave (ptr addrspace (1 ) %a , ptr addrspace (1 ) %b , ptr addrspace (1 ) %c , ptr addrspace (1 ) %out ) #0 {
971991; GFX12-SDAG-LABEL: test1_s_barrier_leave:
972992; GFX12-SDAG: ; %bb.0: ; %entry
@@ -1026,17 +1046,19 @@ entry:
10261046}
10271047
10281048define amdgpu_kernel void @test1_s_wakeup_barrier (ptr addrspace (1 ) %out ) #0 {
1049+ ;
10291050; GFX12-SDAG-LABEL: test1_s_wakeup_barrier:
10301051; GFX12-SDAG: ; %bb.0: ; %entry
10311052; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
10321053; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1033- ; GFX12-SDAG-NEXT: s_wakeup_barrier -1
1034- ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1054+ ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
1055+ ; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 2, v0
10351056; GFX12-SDAG-NEXT: v_mul_u32_u24_e32 v1, v0, v0
1036- ; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0
10371057; GFX12-SDAG-NEXT: v_sub_nc_u32_e32 v0, v1, v0
10381058; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
1039- ; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
1059+ ; GFX12-SDAG-NEXT: global_store_b32 v3, v2, s[0:1]
1060+ ; GFX12-SDAG-NEXT: s_wakeup_barrier -1
1061+ ; GFX12-SDAG-NEXT: global_store_b32 v3, v0, s[0:1]
10401062; GFX12-SDAG-NEXT: s_nop 0
10411063; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
10421064; GFX12-SDAG-NEXT: s_endpgm
@@ -1068,17 +1090,19 @@ entry:
10681090}
10691091
10701092define amdgpu_kernel void @test2_s_wakeup_barrier (ptr addrspace (1 ) %out ) #0 {
1093+ ;
10711094; GFX12-SDAG-LABEL: test2_s_wakeup_barrier:
10721095; GFX12-SDAG: ; %bb.0: ; %entry
10731096; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
10741097; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1075- ; GFX12-SDAG-NEXT: s_wakeup_barrier 1
1076- ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1098+ ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
1099+ ; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 2, v0
10771100; GFX12-SDAG-NEXT: v_mul_u32_u24_e32 v1, v0, v0
1078- ; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0
10791101; GFX12-SDAG-NEXT: v_sub_nc_u32_e32 v0, v1, v0
10801102; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
1081- ; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
1103+ ; GFX12-SDAG-NEXT: global_store_b32 v3, v2, s[0:1]
1104+ ; GFX12-SDAG-NEXT: s_wakeup_barrier 1
1105+ ; GFX12-SDAG-NEXT: global_store_b32 v3, v0, s[0:1]
10821106; GFX12-SDAG-NEXT: s_nop 0
10831107; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
10841108; GFX12-SDAG-NEXT: s_endpgm
@@ -1110,17 +1134,19 @@ entry:
11101134}
11111135
11121136define amdgpu_kernel void @test3_s_wakeup_barrier (ptr addrspace (1 ) %out ) #0 {
1137+ ;
11131138; GFX12-SDAG-LABEL: test3_s_wakeup_barrier:
11141139; GFX12-SDAG: ; %bb.0: ; %entry
11151140; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
11161141; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1117- ; GFX12-SDAG-NEXT: s_wakeup_barrier 0
1118- ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1142+ ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
1143+ ; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v3, 2, v0
11191144; GFX12-SDAG-NEXT: v_mul_u32_u24_e32 v1, v0, v0
1120- ; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0
11211145; GFX12-SDAG-NEXT: v_sub_nc_u32_e32 v0, v1, v0
11221146; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
1123- ; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
1147+ ; GFX12-SDAG-NEXT: global_store_b32 v3, v2, s[0:1]
1148+ ; GFX12-SDAG-NEXT: s_wakeup_barrier 0
1149+ ; GFX12-SDAG-NEXT: global_store_b32 v3, v0, s[0:1]
11241150; GFX12-SDAG-NEXT: s_nop 0
11251151; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
11261152; GFX12-SDAG-NEXT: s_endpgm
@@ -1226,34 +1252,21 @@ define void @test5_s_wakeup_barrier_m0(i32 %arg) {
12261252}
12271253
12281254define amdgpu_kernel void @test1_s_get_barrier_state (ptr addrspace (1 ) %out ) #0 {
1229- ; GFX12-SDAG-LABEL: test1_s_get_barrier_state:
1230- ; GFX12-SDAG: ; %bb.0: ; %entry
1231- ; GFX12-SDAG-NEXT: s_get_barrier_state s4, -1
1232- ; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
1233- ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
1234- ; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1235- ; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, s4 :: v_dual_and_b32 v0, 0x3ff, v0
1236- ; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1237- ; GFX12-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
1238- ; GFX12-SDAG-NEXT: s_nop 0
1239- ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1240- ; GFX12-SDAG-NEXT: s_endpgm
1241- ;
1242- ; GFX12-GISEL-LABEL: test1_s_get_barrier_state:
1243- ; GFX12-GISEL: ; %bb.0: ; %entry
1244- ; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
1245- ; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
1246- ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_2)
1247- ; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1248- ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
1249- ; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
1250- ; GFX12-GISEL-NEXT: s_get_barrier_state s2, -1
1251- ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
1252- ; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s2
1253- ; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
1254- ; GFX12-GISEL-NEXT: s_nop 0
1255- ; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1256- ; GFX12-GISEL-NEXT: s_endpgm
1255+ ; GFX12-LABEL: test1_s_get_barrier_state:
1256+ ; GFX12: ; %bb.0: ; %entry
1257+ ; GFX12-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
1258+ ; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
1259+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_2)
1260+ ; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1261+ ; GFX12-NEXT: s_wait_kmcnt 0x0
1262+ ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
1263+ ; GFX12-NEXT: s_get_barrier_state s2, -1
1264+ ; GFX12-NEXT: s_wait_kmcnt 0x0
1265+ ; GFX12-NEXT: v_mov_b32_e32 v1, s2
1266+ ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
1267+ ; GFX12-NEXT: s_nop 0
1268+ ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1269+ ; GFX12-NEXT: s_endpgm
12571270entry:
12581271 %tmp = call i32 @llvm.amdgcn.workitem.id.x ()
12591272 %tmp1 = getelementptr i32 , ptr addrspace (1 ) %out , i32 %tmp
@@ -1264,34 +1277,21 @@ entry:
12641277}
12651278
12661279define amdgpu_kernel void @test2_s_get_barrier_state (ptr addrspace (1 ) %out ) #0 {
1267- ; GFX12-SDAG-LABEL: test2_s_get_barrier_state:
1268- ; GFX12-SDAG: ; %bb.0: ; %entry
1269- ; GFX12-SDAG-NEXT: s_get_barrier_state s4, 1
1270- ; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
1271- ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
1272- ; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1273- ; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, s4 :: v_dual_and_b32 v0, 0x3ff, v0
1274- ; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1275- ; GFX12-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
1276- ; GFX12-SDAG-NEXT: s_nop 0
1277- ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1278- ; GFX12-SDAG-NEXT: s_endpgm
1279- ;
1280- ; GFX12-GISEL-LABEL: test2_s_get_barrier_state:
1281- ; GFX12-GISEL: ; %bb.0: ; %entry
1282- ; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
1283- ; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
1284- ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_2)
1285- ; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1286- ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
1287- ; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
1288- ; GFX12-GISEL-NEXT: s_get_barrier_state s2, 1
1289- ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
1290- ; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s2
1291- ; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
1292- ; GFX12-GISEL-NEXT: s_nop 0
1293- ; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1294- ; GFX12-GISEL-NEXT: s_endpgm
1280+ ; GFX12-LABEL: test2_s_get_barrier_state:
1281+ ; GFX12: ; %bb.0: ; %entry
1282+ ; GFX12-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
1283+ ; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
1284+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_2)
1285+ ; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1286+ ; GFX12-NEXT: s_wait_kmcnt 0x0
1287+ ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
1288+ ; GFX12-NEXT: s_get_barrier_state s2, 1
1289+ ; GFX12-NEXT: s_wait_kmcnt 0x0
1290+ ; GFX12-NEXT: v_mov_b32_e32 v1, s2
1291+ ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
1292+ ; GFX12-NEXT: s_nop 0
1293+ ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1294+ ; GFX12-NEXT: s_endpgm
12951295entry:
12961296 %tmp = call i32 @llvm.amdgcn.workitem.id.x ()
12971297 %tmp1 = getelementptr i32 , ptr addrspace (1 ) %out , i32 %tmp
@@ -1302,34 +1302,21 @@ entry:
13021302}
13031303
13041304define amdgpu_kernel void @test3_s_get_barrier_state (ptr addrspace (1 ) %out ) #0 {
1305- ; GFX12-SDAG-LABEL: test3_s_get_barrier_state:
1306- ; GFX12-SDAG: ; %bb.0: ; %entry
1307- ; GFX12-SDAG-NEXT: s_get_barrier_state s4, 0
1308- ; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
1309- ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
1310- ; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1311- ; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, s4 :: v_dual_and_b32 v0, 0x3ff, v0
1312- ; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1313- ; GFX12-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
1314- ; GFX12-SDAG-NEXT: s_nop 0
1315- ; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1316- ; GFX12-SDAG-NEXT: s_endpgm
1317- ;
1318- ; GFX12-GISEL-LABEL: test3_s_get_barrier_state:
1319- ; GFX12-GISEL: ; %bb.0: ; %entry
1320- ; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
1321- ; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
1322- ; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_2)
1323- ; GFX12-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1324- ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
1325- ; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
1326- ; GFX12-GISEL-NEXT: s_get_barrier_state s2, 0
1327- ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
1328- ; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s2
1329- ; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
1330- ; GFX12-GISEL-NEXT: s_nop 0
1331- ; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1332- ; GFX12-GISEL-NEXT: s_endpgm
1305+ ; GFX12-LABEL: test3_s_get_barrier_state:
1306+ ; GFX12: ; %bb.0: ; %entry
1307+ ; GFX12-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
1308+ ; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
1309+ ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_2)
1310+ ; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1311+ ; GFX12-NEXT: s_wait_kmcnt 0x0
1312+ ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
1313+ ; GFX12-NEXT: s_get_barrier_state s2, 0
1314+ ; GFX12-NEXT: s_wait_kmcnt 0x0
1315+ ; GFX12-NEXT: v_mov_b32_e32 v1, s2
1316+ ; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
1317+ ; GFX12-NEXT: s_nop 0
1318+ ; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1319+ ; GFX12-NEXT: s_endpgm
13331320entry:
13341321 %tmp = call i32 @llvm.amdgcn.workitem.id.x ()
13351322 %tmp1 = getelementptr i32 , ptr addrspace (1 ) %out , i32 %tmp
@@ -1401,6 +1388,24 @@ define i32 @test5_s_get_barrier_state_m0(i32 %arg) {
14011388 ret i32 %state
14021389}
14031390
1391+ define i32 @test6_s_get_barrier_state_0 () {
1392+ ; GFX12-LABEL: test6_s_get_barrier_state_0:
1393+ ; GFX12: ; %bb.0:
1394+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1395+ ; GFX12-NEXT: s_wait_expcnt 0x0
1396+ ; GFX12-NEXT: s_wait_samplecnt 0x0
1397+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
1398+ ; GFX12-NEXT: s_wait_kmcnt 0x0
1399+ ; GFX12-NEXT: s_get_barrier_state s0, 0
1400+ ; GFX12-NEXT: s_wait_kmcnt 0x0
1401+ ; GFX12-NEXT: s_wait_alu 0xfffe
1402+ ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1403+ ; GFX12-NEXT: v_mov_b32_e32 v0, s0
1404+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
1405+ %state = call i32 @llvm.amdgcn.s.get.barrier.state (i32 0 )
1406+ ret i32 %state
1407+ }
1408+
14041409define amdgpu_kernel void @test_barrier_convert (ptr addrspace (1 ) %out ) #0 {
14051410; GFX12-SDAG-LABEL: test_barrier_convert:
14061411; GFX12-SDAG: ; %bb.0: ; %entry
0 commit comments