@@ -1312,5 +1312,247 @@ define amdgpu_kernel void @global_atomic_usub_cond_sgpr_base_offset_nortn(ptr ad
13121312 ret void
13131313}
13141314
1315+ define i32 @global_atomic_usub_cond__amdgpu_no_remote_memory (ptr addrspace (1 ) %ptr , i32 %data ) {
1316+ ; GFX9-SDAG-LABEL: global_atomic_usub_cond__amdgpu_no_remote_memory:
1317+ ; GFX9-SDAG: ; %bb.0:
1318+ ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1319+ ; GFX9-SDAG-NEXT: global_load_dword v3, v[0:1], off
1320+ ; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], 0
1321+ ; GFX9-SDAG-NEXT: .LBB15_1: ; %atomicrmw.start
1322+ ; GFX9-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
1323+ ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
1324+ ; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, v3
1325+ ; GFX9-SDAG-NEXT: v_sub_u32_e32 v3, v4, v2
1326+ ; GFX9-SDAG-NEXT: v_cmp_ge_u32_e32 vcc, v4, v2
1327+ ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
1328+ ; GFX9-SDAG-NEXT: global_atomic_cmpswap v3, v[0:1], v[3:4], off glc
1329+ ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
1330+ ; GFX9-SDAG-NEXT: buffer_wbinvl1_vol
1331+ ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4
1332+ ; GFX9-SDAG-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1333+ ; GFX9-SDAG-NEXT: s_andn2_b64 exec, exec, s[4:5]
1334+ ; GFX9-SDAG-NEXT: s_cbranch_execnz .LBB15_1
1335+ ; GFX9-SDAG-NEXT: ; %bb.2: ; %atomicrmw.end
1336+ ; GFX9-SDAG-NEXT: s_or_b64 exec, exec, s[4:5]
1337+ ; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, v3
1338+ ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1339+ ;
1340+ ; GFX12-SDAG-LABEL: global_atomic_usub_cond__amdgpu_no_remote_memory:
1341+ ; GFX12-SDAG: ; %bb.0:
1342+ ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
1343+ ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
1344+ ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
1345+ ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
1346+ ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
1347+ ; GFX12-SDAG-NEXT: s_wait_storecnt 0x0
1348+ ; GFX12-SDAG-NEXT: global_atomic_cond_sub_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
1349+ ; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
1350+ ; GFX12-SDAG-NEXT: global_inv scope:SCOPE_DEV
1351+ ; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
1352+ ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
1353+ ;
1354+ ; GFX9-GISEL-LABEL: global_atomic_usub_cond__amdgpu_no_remote_memory:
1355+ ; GFX9-GISEL: ; %bb.0:
1356+ ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1357+ ; GFX9-GISEL-NEXT: global_load_dword v3, v[0:1], off
1358+ ; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], 0
1359+ ; GFX9-GISEL-NEXT: .LBB15_1: ; %atomicrmw.start
1360+ ; GFX9-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
1361+ ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
1362+ ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, v3
1363+ ; GFX9-GISEL-NEXT: v_sub_u32_e32 v3, v4, v2
1364+ ; GFX9-GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v4, v2
1365+ ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
1366+ ; GFX9-GISEL-NEXT: global_atomic_cmpswap v3, v[0:1], v[3:4], off glc
1367+ ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
1368+ ; GFX9-GISEL-NEXT: buffer_wbinvl1_vol
1369+ ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4
1370+ ; GFX9-GISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1371+ ; GFX9-GISEL-NEXT: s_andn2_b64 exec, exec, s[4:5]
1372+ ; GFX9-GISEL-NEXT: s_cbranch_execnz .LBB15_1
1373+ ; GFX9-GISEL-NEXT: ; %bb.2: ; %atomicrmw.end
1374+ ; GFX9-GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
1375+ ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, v3
1376+ ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1377+ ;
1378+ ; GFX12-GISEL-LABEL: global_atomic_usub_cond__amdgpu_no_remote_memory:
1379+ ; GFX12-GISEL: ; %bb.0:
1380+ ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
1381+ ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
1382+ ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
1383+ ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
1384+ ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
1385+ ; GFX12-GISEL-NEXT: s_wait_storecnt 0x0
1386+ ; GFX12-GISEL-NEXT: global_atomic_cond_sub_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
1387+ ; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
1388+ ; GFX12-GISEL-NEXT: global_inv scope:SCOPE_DEV
1389+ ; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
1390+ ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
1391+ %ret = atomicrmw usub_cond ptr addrspace (1 ) %ptr , i32 %data syncscope("agent" ) seq_cst , align 4 , !amdgpu.no.remote.memory !0
1392+ ret i32 %ret
1393+ }
1394+
1395+ define i32 @global_atomic_usub_cond__amdgpu_no_fine_grained_memory (ptr addrspace (1 ) %ptr , i32 %data ) {
1396+ ; GFX9-SDAG-LABEL: global_atomic_usub_cond__amdgpu_no_fine_grained_memory:
1397+ ; GFX9-SDAG: ; %bb.0:
1398+ ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1399+ ; GFX9-SDAG-NEXT: global_load_dword v3, v[0:1], off
1400+ ; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], 0
1401+ ; GFX9-SDAG-NEXT: .LBB16_1: ; %atomicrmw.start
1402+ ; GFX9-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
1403+ ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
1404+ ; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, v3
1405+ ; GFX9-SDAG-NEXT: v_sub_u32_e32 v3, v4, v2
1406+ ; GFX9-SDAG-NEXT: v_cmp_ge_u32_e32 vcc, v4, v2
1407+ ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
1408+ ; GFX9-SDAG-NEXT: global_atomic_cmpswap v3, v[0:1], v[3:4], off glc
1409+ ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
1410+ ; GFX9-SDAG-NEXT: buffer_wbinvl1_vol
1411+ ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4
1412+ ; GFX9-SDAG-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1413+ ; GFX9-SDAG-NEXT: s_andn2_b64 exec, exec, s[4:5]
1414+ ; GFX9-SDAG-NEXT: s_cbranch_execnz .LBB16_1
1415+ ; GFX9-SDAG-NEXT: ; %bb.2: ; %atomicrmw.end
1416+ ; GFX9-SDAG-NEXT: s_or_b64 exec, exec, s[4:5]
1417+ ; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, v3
1418+ ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1419+ ;
1420+ ; GFX12-SDAG-LABEL: global_atomic_usub_cond__amdgpu_no_fine_grained_memory:
1421+ ; GFX12-SDAG: ; %bb.0:
1422+ ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
1423+ ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
1424+ ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
1425+ ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
1426+ ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
1427+ ; GFX12-SDAG-NEXT: s_wait_storecnt 0x0
1428+ ; GFX12-SDAG-NEXT: global_atomic_cond_sub_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
1429+ ; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
1430+ ; GFX12-SDAG-NEXT: global_inv scope:SCOPE_DEV
1431+ ; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
1432+ ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
1433+ ;
1434+ ; GFX9-GISEL-LABEL: global_atomic_usub_cond__amdgpu_no_fine_grained_memory:
1435+ ; GFX9-GISEL: ; %bb.0:
1436+ ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1437+ ; GFX9-GISEL-NEXT: global_load_dword v3, v[0:1], off
1438+ ; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], 0
1439+ ; GFX9-GISEL-NEXT: .LBB16_1: ; %atomicrmw.start
1440+ ; GFX9-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
1441+ ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
1442+ ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, v3
1443+ ; GFX9-GISEL-NEXT: v_sub_u32_e32 v3, v4, v2
1444+ ; GFX9-GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v4, v2
1445+ ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
1446+ ; GFX9-GISEL-NEXT: global_atomic_cmpswap v3, v[0:1], v[3:4], off glc
1447+ ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
1448+ ; GFX9-GISEL-NEXT: buffer_wbinvl1_vol
1449+ ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4
1450+ ; GFX9-GISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1451+ ; GFX9-GISEL-NEXT: s_andn2_b64 exec, exec, s[4:5]
1452+ ; GFX9-GISEL-NEXT: s_cbranch_execnz .LBB16_1
1453+ ; GFX9-GISEL-NEXT: ; %bb.2: ; %atomicrmw.end
1454+ ; GFX9-GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
1455+ ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, v3
1456+ ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1457+ ;
1458+ ; GFX12-GISEL-LABEL: global_atomic_usub_cond__amdgpu_no_fine_grained_memory:
1459+ ; GFX12-GISEL: ; %bb.0:
1460+ ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
1461+ ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
1462+ ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
1463+ ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
1464+ ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
1465+ ; GFX12-GISEL-NEXT: s_wait_storecnt 0x0
1466+ ; GFX12-GISEL-NEXT: global_atomic_cond_sub_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
1467+ ; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
1468+ ; GFX12-GISEL-NEXT: global_inv scope:SCOPE_DEV
1469+ ; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
1470+ ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
1471+ %ret = atomicrmw usub_cond ptr addrspace (1 ) %ptr , i32 %data syncscope("agent" ) seq_cst , align 4 , !amdgpu.no.fine.grained.memory !0
1472+ ret i32 %ret
1473+ }
1474+
1475+ define i32 @global_atomic_usub_cond__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory (ptr addrspace (1 ) %ptr , i32 %data ) {
1476+ ; GFX9-SDAG-LABEL: global_atomic_usub_cond__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
1477+ ; GFX9-SDAG: ; %bb.0:
1478+ ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1479+ ; GFX9-SDAG-NEXT: global_load_dword v3, v[0:1], off
1480+ ; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], 0
1481+ ; GFX9-SDAG-NEXT: .LBB17_1: ; %atomicrmw.start
1482+ ; GFX9-SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
1483+ ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
1484+ ; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, v3
1485+ ; GFX9-SDAG-NEXT: v_sub_u32_e32 v3, v4, v2
1486+ ; GFX9-SDAG-NEXT: v_cmp_ge_u32_e32 vcc, v4, v2
1487+ ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
1488+ ; GFX9-SDAG-NEXT: global_atomic_cmpswap v3, v[0:1], v[3:4], off glc
1489+ ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
1490+ ; GFX9-SDAG-NEXT: buffer_wbinvl1_vol
1491+ ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4
1492+ ; GFX9-SDAG-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1493+ ; GFX9-SDAG-NEXT: s_andn2_b64 exec, exec, s[4:5]
1494+ ; GFX9-SDAG-NEXT: s_cbranch_execnz .LBB17_1
1495+ ; GFX9-SDAG-NEXT: ; %bb.2: ; %atomicrmw.end
1496+ ; GFX9-SDAG-NEXT: s_or_b64 exec, exec, s[4:5]
1497+ ; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, v3
1498+ ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1499+ ;
1500+ ; GFX12-SDAG-LABEL: global_atomic_usub_cond__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
1501+ ; GFX12-SDAG: ; %bb.0:
1502+ ; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
1503+ ; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
1504+ ; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
1505+ ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
1506+ ; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
1507+ ; GFX12-SDAG-NEXT: s_wait_storecnt 0x0
1508+ ; GFX12-SDAG-NEXT: global_atomic_cond_sub_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
1509+ ; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
1510+ ; GFX12-SDAG-NEXT: global_inv scope:SCOPE_DEV
1511+ ; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
1512+ ; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
1513+ ;
1514+ ; GFX9-GISEL-LABEL: global_atomic_usub_cond__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
1515+ ; GFX9-GISEL: ; %bb.0:
1516+ ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1517+ ; GFX9-GISEL-NEXT: global_load_dword v3, v[0:1], off
1518+ ; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], 0
1519+ ; GFX9-GISEL-NEXT: .LBB17_1: ; %atomicrmw.start
1520+ ; GFX9-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
1521+ ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
1522+ ; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, v3
1523+ ; GFX9-GISEL-NEXT: v_sub_u32_e32 v3, v4, v2
1524+ ; GFX9-GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v4, v2
1525+ ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
1526+ ; GFX9-GISEL-NEXT: global_atomic_cmpswap v3, v[0:1], v[3:4], off glc
1527+ ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
1528+ ; GFX9-GISEL-NEXT: buffer_wbinvl1_vol
1529+ ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4
1530+ ; GFX9-GISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1531+ ; GFX9-GISEL-NEXT: s_andn2_b64 exec, exec, s[4:5]
1532+ ; GFX9-GISEL-NEXT: s_cbranch_execnz .LBB17_1
1533+ ; GFX9-GISEL-NEXT: ; %bb.2: ; %atomicrmw.end
1534+ ; GFX9-GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
1535+ ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, v3
1536+ ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1537+ ;
1538+ ; GFX12-GISEL-LABEL: global_atomic_usub_cond__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
1539+ ; GFX12-GISEL: ; %bb.0:
1540+ ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
1541+ ; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
1542+ ; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
1543+ ; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
1544+ ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
1545+ ; GFX12-GISEL-NEXT: s_wait_storecnt 0x0
1546+ ; GFX12-GISEL-NEXT: global_atomic_cond_sub_u32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
1547+ ; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
1548+ ; GFX12-GISEL-NEXT: global_inv scope:SCOPE_DEV
1549+ ; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
1550+ ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
1551+ %ret = atomicrmw usub_cond ptr addrspace (1 ) %ptr , i32 %data syncscope("agent" ) seq_cst , align 4 , !amdgpu.no.fine.grained.memory !0 , !amdgpu.no.remote.memory !0
1552+ ret i32 %ret
1553+ }
1554+
13151555attributes #0 = { nounwind willreturn }
13161556attributes #1 = { argmemonly nounwind }
1557+
1558+ !0 = !{}
0 commit comments