Skip to content

Commit 1ce7914

Browse files
committed
review comments
1 parent 7072b74 commit 1ce7914

File tree

1 file changed

+92
-49
lines changed

1 file changed

+92
-49
lines changed

llvm/test/CodeGen/AMDGPU/mad_64_32.ll

Lines changed: 92 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1395,7 +1395,7 @@ define i64 @lshr_mad_i64_2(i64 %arg0) #0 {
13951395
; CI-LABEL: lshr_mad_i64_2:
13961396
; CI: ; %bb.0:
13971397
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1398-
; CI-NEXT: s_movk_i32 s4, 0xfc88
1398+
; CI-NEXT: s_movk_i32 s4, 0xd1
13991399
; CI-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v1, s4, v[0:1]
14001400
; CI-NEXT: v_sub_i32_e32 v1, vcc, v3, v1
14011401
; CI-NEXT: v_mov_b32_e32 v0, v2
@@ -1404,7 +1404,7 @@ define i64 @lshr_mad_i64_2(i64 %arg0) #0 {
14041404
; SI-LABEL: lshr_mad_i64_2:
14051405
; SI: ; %bb.0:
14061406
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1407-
; SI-NEXT: s_movk_i32 s4, 0xfc88
1407+
; SI-NEXT: s_movk_i32 s4, 0xd1
14081408
; SI-NEXT: v_mul_hi_u32 v2, v1, s4
14091409
; SI-NEXT: v_mul_lo_u32 v3, v1, s4
14101410
; SI-NEXT: v_sub_i32_e32 v2, vcc, v2, v1
@@ -1415,7 +1415,7 @@ define i64 @lshr_mad_i64_2(i64 %arg0) #0 {
14151415
; GFX9-LABEL: lshr_mad_i64_2:
14161416
; GFX9: ; %bb.0:
14171417
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1418-
; GFX9-NEXT: s_movk_i32 s4, 0xfc88
1418+
; GFX9-NEXT: s_movk_i32 s4, 0xd1
14191419
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v1, s4, v[0:1]
14201420
; GFX9-NEXT: v_sub_u32_e32 v1, v3, v1
14211421
; GFX9-NEXT: v_mov_b32_e32 v0, v2
@@ -1424,7 +1424,7 @@ define i64 @lshr_mad_i64_2(i64 %arg0) #0 {
14241424
; GFX11-LABEL: lshr_mad_i64_2:
14251425
; GFX11: ; %bb.0:
14261426
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1427-
; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, 0xfffffc88, v1, v[0:1]
1427+
; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, 0xd1, v1, v[0:1]
14281428
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
14291429
; GFX11-NEXT: v_sub_nc_u32_e32 v1, v3, v1
14301430
; GFX11-NEXT: v_mov_b32_e32 v0, v2
@@ -1437,6 +1437,64 @@ define i64 @lshr_mad_i64_2(i64 %arg0) #0 {
14371437
; GFX12-NEXT: s_wait_samplecnt 0x0
14381438
; GFX12-NEXT: s_wait_bvhcnt 0x0
14391439
; GFX12-NEXT: s_wait_kmcnt 0x0
1440+
; GFX12-NEXT: v_mad_co_u64_u32 v[2:3], null, 0xd1, v1, v[0:1]
1441+
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
1442+
; GFX12-NEXT: v_sub_nc_u32_e32 v1, v3, v1
1443+
; GFX12-NEXT: v_mov_b32_e32 v0, v2
1444+
; GFX12-NEXT: s_setpc_b64 s[30:31]
1445+
%lsh = lshr i64 %arg0, 32
1446+
%mul = mul i64 %lsh, s0xffffffff000000d1
1447+
%mad = add i64 %mul, %arg0
1448+
1449+
ret i64 %mad
1450+
}
1451+
1452+
define i64 @lshr_mad_i64_3(i64 %arg0) #0 {
1453+
; CI-LABEL: lshr_mad_i64_3:
1454+
; CI: ; %bb.0:
1455+
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1456+
; CI-NEXT: s_movk_i32 s4, 0xfc88
1457+
; CI-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v1, s4, v[0:1]
1458+
; CI-NEXT: v_sub_i32_e32 v1, vcc, v3, v1
1459+
; CI-NEXT: v_mov_b32_e32 v0, v2
1460+
; CI-NEXT: s_setpc_b64 s[30:31]
1461+
;
1462+
; SI-LABEL: lshr_mad_i64_3:
1463+
; SI: ; %bb.0:
1464+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1465+
; SI-NEXT: s_movk_i32 s4, 0xfc88
1466+
; SI-NEXT: v_mul_hi_u32 v2, v1, s4
1467+
; SI-NEXT: v_mul_lo_u32 v3, v1, s4
1468+
; SI-NEXT: v_sub_i32_e32 v2, vcc, v2, v1
1469+
; SI-NEXT: v_add_i32_e32 v0, vcc, v3, v0
1470+
; SI-NEXT: v_addc_u32_e32 v1, vcc, v2, v1, vcc
1471+
; SI-NEXT: s_setpc_b64 s[30:31]
1472+
;
1473+
; GFX9-LABEL: lshr_mad_i64_3:
1474+
; GFX9: ; %bb.0:
1475+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1476+
; GFX9-NEXT: s_movk_i32 s4, 0xfc88
1477+
; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v1, s4, v[0:1]
1478+
; GFX9-NEXT: v_sub_u32_e32 v1, v3, v1
1479+
; GFX9-NEXT: v_mov_b32_e32 v0, v2
1480+
; GFX9-NEXT: s_setpc_b64 s[30:31]
1481+
;
1482+
; GFX11-LABEL: lshr_mad_i64_3:
1483+
; GFX11: ; %bb.0:
1484+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1485+
; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, 0xfffffc88, v1, v[0:1]
1486+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
1487+
; GFX11-NEXT: v_sub_nc_u32_e32 v1, v3, v1
1488+
; GFX11-NEXT: v_mov_b32_e32 v0, v2
1489+
; GFX11-NEXT: s_setpc_b64 s[30:31]
1490+
;
1491+
; GFX12-LABEL: lshr_mad_i64_3:
1492+
; GFX12: ; %bb.0:
1493+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1494+
; GFX12-NEXT: s_wait_expcnt 0x0
1495+
; GFX12-NEXT: s_wait_samplecnt 0x0
1496+
; GFX12-NEXT: s_wait_bvhcnt 0x0
1497+
; GFX12-NEXT: s_wait_kmcnt 0x0
14401498
; GFX12-NEXT: v_mad_co_u64_u32 v[2:3], null, 0xfffffc88, v1, v[0:1]
14411499
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
14421500
; GFX12-NEXT: v_sub_nc_u32_e32 v1, v3, v1
@@ -1449,8 +1507,8 @@ define i64 @lshr_mad_i64_2(i64 %arg0) #0 {
14491507
ret i64 %mad
14501508
}
14511509

1452-
define i64 @lshr_mad_i64_3(i32 %arg0, i64 %arg1) #0 {
1453-
; CI-LABEL: lshr_mad_i64_3:
1510+
define i64 @lshr_mad_i64_4(i32 %arg0, i64 %arg1) #0 {
1511+
; CI-LABEL: lshr_mad_i64_4:
14541512
; CI: ; %bb.0:
14551513
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14561514
; CI-NEXT: v_mul_lo_u32 v3, v2, v0
@@ -1461,7 +1519,7 @@ define i64 @lshr_mad_i64_3(i32 %arg0, i64 %arg1) #0 {
14611519
; CI-NEXT: v_sub_i32_e32 v1, vcc, v1, v2
14621520
; CI-NEXT: s_setpc_b64 s[30:31]
14631521
;
1464-
; SI-LABEL: lshr_mad_i64_3:
1522+
; SI-LABEL: lshr_mad_i64_4:
14651523
; SI: ; %bb.0:
14661524
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14671525
; SI-NEXT: v_mul_lo_u32 v2, v2, v0
@@ -1476,7 +1534,7 @@ define i64 @lshr_mad_i64_3(i32 %arg0, i64 %arg1) #0 {
14761534
; SI-NEXT: v_addc_u32_e32 v1, vcc, v3, v2, vcc
14771535
; SI-NEXT: s_setpc_b64 s[30:31]
14781536
;
1479-
; GFX9-LABEL: lshr_mad_i64_3:
1537+
; GFX9-LABEL: lshr_mad_i64_4:
14801538
; GFX9: ; %bb.0:
14811539
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14821540
; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, v0, 0
@@ -1488,7 +1546,7 @@ define i64 @lshr_mad_i64_3(i32 %arg0, i64 %arg1) #0 {
14881546
; GFX9-NEXT: v_sub_u32_e32 v1, v1, v2
14891547
; GFX9-NEXT: s_setpc_b64 s[30:31]
14901548
;
1491-
; GFX11-LABEL: lshr_mad_i64_3:
1549+
; GFX11-LABEL: lshr_mad_i64_4:
14921550
; GFX11: ; %bb.0:
14931551
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14941552
; GFX11-NEXT: v_mad_u64_u32 v[3:4], null, v1, v0, 0
@@ -1502,7 +1560,7 @@ define i64 @lshr_mad_i64_3(i32 %arg0, i64 %arg1) #0 {
15021560
; GFX11-NEXT: v_sub_nc_u32_e32 v1, v1, v5
15031561
; GFX11-NEXT: s_setpc_b64 s[30:31]
15041562
;
1505-
; GFX12-LABEL: lshr_mad_i64_3:
1563+
; GFX12-LABEL: lshr_mad_i64_4:
15061564
; GFX12: ; %bb.0:
15071565
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
15081566
; GFX12-NEXT: s_wait_expcnt 0x0
@@ -1800,72 +1858,57 @@ define i64 @lshr_mad_i64_negative_4(i64 %arg0) #0 {
18001858
ret i64 %mad
18011859
}
18021860

1803-
define i64 @lshr_mad_i64_sgpr(i64 inreg %arg0) #0 {
1861+
define amdgpu_ps i64 @lshr_mad_i64_sgpr(i64 inreg %arg0) #0 {
18041862
; CI-LABEL: lshr_mad_i64_sgpr:
18051863
; CI: ; %bb.0:
1806-
; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1807-
; CI-NEXT: v_mov_b32_e32 v0, s16
1864+
; CI-NEXT: v_mov_b32_e32 v0, s0
18081865
; CI-NEXT: v_mov_b32_e32 v2, 0xffff1c18
1809-
; CI-NEXT: v_mov_b32_e32 v1, s17
1810-
; CI-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s17, v2, v[0:1]
1811-
; CI-NEXT: v_subrev_i32_e32 v1, vcc, s17, v1
1812-
; CI-NEXT: s_setpc_b64 s[30:31]
1866+
; CI-NEXT: v_mov_b32_e32 v1, s1
1867+
; CI-NEXT: v_mad_u64_u32 v[0:1], s[2:3], s1, v2, v[0:1]
1868+
; CI-NEXT: v_subrev_i32_e32 v1, vcc, s1, v1
1869+
; CI-NEXT: v_readfirstlane_b32 s0, v0
1870+
; CI-NEXT: v_readfirstlane_b32 s1, v1
1871+
; CI-NEXT: ; return to shader part epilog
18131872
;
18141873
; SI-LABEL: lshr_mad_i64_sgpr:
18151874
; SI: ; %bb.0:
1816-
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18171875
; SI-NEXT: v_mov_b32_e32 v0, 0xffff1c18
1818-
; SI-NEXT: v_mul_hi_u32 v0, s17, v0
1819-
; SI-NEXT: s_mul_i32 s4, s17, 0xffff1c18
1820-
; SI-NEXT: v_mov_b32_e32 v2, s17
1821-
; SI-NEXT: v_subrev_i32_e32 v1, vcc, s17, v0
1822-
; SI-NEXT: v_mov_b32_e32 v0, s4
1823-
; SI-NEXT: v_add_i32_e32 v0, vcc, s16, v0
1824-
; SI-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
1825-
; SI-NEXT: s_setpc_b64 s[30:31]
1876+
; SI-NEXT: v_mul_hi_u32 v0, s1, v0
1877+
; SI-NEXT: s_mul_i32 s2, s1, 0xffff1c18
1878+
; SI-NEXT: v_readfirstlane_b32 s3, v0
1879+
; SI-NEXT: s_sub_i32 s3, s3, s1
1880+
; SI-NEXT: s_add_u32 s0, s2, s0
1881+
; SI-NEXT: s_addc_u32 s1, s3, s1
1882+
; SI-NEXT: ; return to shader part epilog
18261883
;
18271884
; GFX9-LABEL: lshr_mad_i64_sgpr:
18281885
; GFX9: ; %bb.0:
1829-
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1830-
; GFX9-NEXT: s_mul_hi_u32 s4, s17, 0xffff1c18
1831-
; GFX9-NEXT: s_sub_i32 s4, s4, s17
1832-
; GFX9-NEXT: s_mul_i32 s5, s17, 0xffff1c18
1833-
; GFX9-NEXT: s_add_u32 s5, s5, s16
1834-
; GFX9-NEXT: s_addc_u32 s4, s4, s17
1835-
; GFX9-NEXT: v_mov_b32_e32 v0, s5
1836-
; GFX9-NEXT: v_mov_b32_e32 v1, s4
1837-
; GFX9-NEXT: s_setpc_b64 s[30:31]
1886+
; GFX9-NEXT: s_mul_hi_u32 s2, s1, 0xffff1c18
1887+
; GFX9-NEXT: s_sub_i32 s2, s2, s1
1888+
; GFX9-NEXT: s_mul_i32 s3, s1, 0xffff1c18
1889+
; GFX9-NEXT: s_add_u32 s0, s3, s0
1890+
; GFX9-NEXT: s_addc_u32 s1, s2, s1
1891+
; GFX9-NEXT: ; return to shader part epilog
18381892
;
18391893
; GFX11-LABEL: lshr_mad_i64_sgpr:
18401894
; GFX11: ; %bb.0:
1841-
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18421895
; GFX11-NEXT: s_mul_hi_u32 s2, s1, 0xffff1c18
18431896
; GFX11-NEXT: s_mul_i32 s3, s1, 0xffff1c18
18441897
; GFX11-NEXT: s_sub_i32 s2, s2, s1
18451898
; GFX11-NEXT: s_add_u32 s0, s3, s0
18461899
; GFX11-NEXT: s_addc_u32 s1, s2, s1
1847-
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1848-
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
1849-
; GFX11-NEXT: s_setpc_b64 s[30:31]
1900+
; GFX11-NEXT: ; return to shader part epilog
18501901
;
18511902
; GFX12-LABEL: lshr_mad_i64_sgpr:
18521903
; GFX12: ; %bb.0:
1853-
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1854-
; GFX12-NEXT: s_wait_expcnt 0x0
1855-
; GFX12-NEXT: s_wait_samplecnt 0x0
1856-
; GFX12-NEXT: s_wait_bvhcnt 0x0
1857-
; GFX12-NEXT: s_wait_kmcnt 0x0
18581904
; GFX12-NEXT: s_mov_b32 s4, 0xffff1c18
18591905
; GFX12-NEXT: s_mov_b32 s3, 0
18601906
; GFX12-NEXT: s_mov_b32 s2, s1
18611907
; GFX12-NEXT: s_mov_b32 s5, -1
1862-
; GFX12-NEXT: s_wait_alu 0xfffe
1908+
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
18631909
; GFX12-NEXT: s_mul_u64 s[2:3], s[2:3], s[4:5]
1864-
; GFX12-NEXT: s_wait_alu 0xfffe
18651910
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[2:3], s[0:1]
1866-
; GFX12-NEXT: s_wait_alu 0xfffe
1867-
; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
1868-
; GFX12-NEXT: s_setpc_b64 s[30:31]
1911+
; GFX12-NEXT: ; return to shader part epilog
18691912
%lsh = lshr i64 %arg0, 32
18701913
%mul = mul i64 %lsh, s0xffffffffffff1c18
18711914
%mad = add i64 %mul, %arg0

0 commit comments

Comments
 (0)