@@ -1574,8 +1574,8 @@ define i65 @v_lshr_i65(i65 %value, i65 %amount) {
15741574; GFX6-LABEL: v_lshr_i65:
15751575; GFX6: ; %bb.0:
15761576; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1577- ; GFX6-NEXT: v_mov_b32_e32 v5, 0
15781577; GFX6-NEXT: v_and_b32_e32 v4, 1, v2
1578+ ; GFX6-NEXT: v_mov_b32_e32 v5, 0
15791579; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 64, v3
15801580; GFX6-NEXT: v_add_i32_e32 v2, vcc, 0xffffffc0, v3
15811581; GFX6-NEXT: v_lshr_b64 v[6:7], v[0:1], v3
@@ -1596,8 +1596,8 @@ define i65 @v_lshr_i65(i65 %value, i65 %amount) {
15961596; GFX8-LABEL: v_lshr_i65:
15971597; GFX8: ; %bb.0:
15981598; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1599- ; GFX8-NEXT: v_mov_b32_e32 v5, 0
16001599; GFX8-NEXT: v_and_b32_e32 v4, 1, v2
1600+ ; GFX8-NEXT: v_mov_b32_e32 v5, 0
16011601; GFX8-NEXT: v_sub_u32_e32 v8, vcc, 64, v3
16021602; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0xffffffc0, v3
16031603; GFX8-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
@@ -1618,8 +1618,8 @@ define i65 @v_lshr_i65(i65 %value, i65 %amount) {
16181618; GFX9-LABEL: v_lshr_i65:
16191619; GFX9: ; %bb.0:
16201620; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1621- ; GFX9-NEXT: v_mov_b32_e32 v5, 0
16221621; GFX9-NEXT: v_and_b32_e32 v4, 1, v2
1622+ ; GFX9-NEXT: v_mov_b32_e32 v5, 0
16231623; GFX9-NEXT: v_sub_u32_e32 v8, 64, v3
16241624; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffc0, v3
16251625; GFX9-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
@@ -1688,8 +1688,8 @@ define i65 @v_lshr_i65_33(i65 %value) {
16881688; GFX6: ; %bb.0:
16891689; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16901690; GFX6-NEXT: v_mov_b32_e32 v3, v1
1691- ; GFX6-NEXT: v_mov_b32_e32 v1, 0
16921691; GFX6-NEXT: v_and_b32_e32 v0, 1, v2
1692+ ; GFX6-NEXT: v_mov_b32_e32 v1, 0
16931693; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 31
16941694; GFX6-NEXT: v_lshrrev_b32_e32 v2, 1, v3
16951695; GFX6-NEXT: v_or_b32_e32 v0, v2, v0
@@ -1700,8 +1700,8 @@ define i65 @v_lshr_i65_33(i65 %value) {
17001700; GFX8: ; %bb.0:
17011701; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17021702; GFX8-NEXT: v_mov_b32_e32 v3, v1
1703- ; GFX8-NEXT: v_mov_b32_e32 v1, 0
17041703; GFX8-NEXT: v_and_b32_e32 v0, 1, v2
1704+ ; GFX8-NEXT: v_mov_b32_e32 v1, 0
17051705; GFX8-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
17061706; GFX8-NEXT: v_lshrrev_b32_e32 v2, 1, v3
17071707; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
@@ -1712,8 +1712,8 @@ define i65 @v_lshr_i65_33(i65 %value) {
17121712; GFX9: ; %bb.0:
17131713; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17141714; GFX9-NEXT: v_mov_b32_e32 v3, v1
1715- ; GFX9-NEXT: v_mov_b32_e32 v1, 0
17161715; GFX9-NEXT: v_and_b32_e32 v0, 1, v2
1716+ ; GFX9-NEXT: v_mov_b32_e32 v1, 0
17171717; GFX9-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
17181718; GFX9-NEXT: v_lshrrev_b32_e32 v2, 1, v3
17191719; GFX9-NEXT: v_or_b32_e32 v0, v2, v0
@@ -1749,22 +1749,20 @@ define i65 @v_lshr_i65_33(i65 %value) {
17491749define amdgpu_ps i65 @s_lshr_i65 (i65 inreg %value , i65 inreg %amount ) {
17501750; GCN-LABEL: s_lshr_i65:
17511751; GCN: ; %bb.0:
1752- ; GCN-NEXT: s_mov_b32 s4, s3
1753- ; GCN-NEXT: s_mov_b32 s3, 0
1754- ; GCN-NEXT: s_and_b64 s[2:3], s[2:3], 1
1755- ; GCN-NEXT: s_sub_i32 s10, s4, 64
1756- ; GCN-NEXT: s_sub_i32 s8, 64, s4
1757- ; GCN-NEXT: s_cmp_lt_u32 s4, 64
1752+ ; GCN-NEXT: s_and_b64 s[4:5], s[2:3], 1
1753+ ; GCN-NEXT: s_sub_i32 s10, s3, 64
1754+ ; GCN-NEXT: s_sub_i32 s8, 64, s3
1755+ ; GCN-NEXT: s_cmp_lt_u32 s3, 64
17581756; GCN-NEXT: s_cselect_b32 s11, 1, 0
1759- ; GCN-NEXT: s_cmp_eq_u32 s4 , 0
1757+ ; GCN-NEXT: s_cmp_eq_u32 s3 , 0
17601758; GCN-NEXT: s_cselect_b32 s12, 1, 0
1761- ; GCN-NEXT: s_lshr_b64 s[6:7], s[2:3 ], s4
1762- ; GCN-NEXT: s_lshr_b64 s[4:5 ], s[0:1], s4
1763- ; GCN-NEXT: s_lshl_b64 s[8:9], s[2:3 ], s8
1764- ; GCN-NEXT: s_or_b64 s[4:5 ], s[4:5 ], s[8:9]
1765- ; GCN-NEXT: s_lshr_b64 s[2:3 ], s[2:3 ], s10
1759+ ; GCN-NEXT: s_lshr_b64 s[6:7], s[4:5 ], s3
1760+ ; GCN-NEXT: s_lshr_b64 s[2:3 ], s[0:1], s3
1761+ ; GCN-NEXT: s_lshl_b64 s[8:9], s[4:5 ], s8
1762+ ; GCN-NEXT: s_or_b64 s[2:3 ], s[2:3 ], s[8:9]
1763+ ; GCN-NEXT: s_lshr_b64 s[4:5 ], s[4:5 ], s10
17661764; GCN-NEXT: s_cmp_lg_u32 s11, 0
1767- ; GCN-NEXT: s_cselect_b64 s[2:3], s[4:5 ], s[2:3 ]
1765+ ; GCN-NEXT: s_cselect_b64 s[2:3], s[2:3 ], s[4:5 ]
17681766; GCN-NEXT: s_cmp_lg_u32 s12, 0
17691767; GCN-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
17701768; GCN-NEXT: s_cmp_lg_u32 s11, 0
@@ -1773,26 +1771,24 @@ define amdgpu_ps i65 @s_lshr_i65(i65 inreg %value, i65 inreg %amount) {
17731771;
17741772; GFX10PLUS-LABEL: s_lshr_i65:
17751773; GFX10PLUS: ; %bb.0:
1776- ; GFX10PLUS-NEXT: s_mov_b32 s4, s3
1777- ; GFX10PLUS-NEXT: s_mov_b32 s3, 0
1778- ; GFX10PLUS-NEXT: s_sub_i32 s10, s4, 64
1779- ; GFX10PLUS-NEXT: s_and_b64 s[2:3], s[2:3], 1
1780- ; GFX10PLUS-NEXT: s_sub_i32 s5, 64, s4
1781- ; GFX10PLUS-NEXT: s_cmp_lt_u32 s4, 64
1774+ ; GFX10PLUS-NEXT: s_and_b64 s[4:5], s[2:3], 1
1775+ ; GFX10PLUS-NEXT: s_sub_i32 s10, s3, 64
1776+ ; GFX10PLUS-NEXT: s_sub_i32 s2, 64, s3
1777+ ; GFX10PLUS-NEXT: s_cmp_lt_u32 s3, 64
17821778; GFX10PLUS-NEXT: s_cselect_b32 s11, 1, 0
1783- ; GFX10PLUS-NEXT: s_cmp_eq_u32 s4 , 0
1779+ ; GFX10PLUS-NEXT: s_cmp_eq_u32 s3 , 0
17841780; GFX10PLUS-NEXT: s_cselect_b32 s12, 1, 0
1785- ; GFX10PLUS-NEXT: s_lshr_b64 s[6:7], s[0:1], s4
1786- ; GFX10PLUS-NEXT: s_lshl_b64 s[8:9], s[2:3 ], s5
1787- ; GFX10PLUS-NEXT: s_lshr_b64 s[4:5 ], s[2:3 ], s4
1781+ ; GFX10PLUS-NEXT: s_lshr_b64 s[6:7], s[0:1], s3
1782+ ; GFX10PLUS-NEXT: s_lshl_b64 s[8:9], s[4:5 ], s2
1783+ ; GFX10PLUS-NEXT: s_lshr_b64 s[2:3 ], s[4:5 ], s3
17881784; GFX10PLUS-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9]
1789- ; GFX10PLUS-NEXT: s_lshr_b64 s[2:3 ], s[2:3 ], s10
1785+ ; GFX10PLUS-NEXT: s_lshr_b64 s[4:5 ], s[4:5 ], s10
17901786; GFX10PLUS-NEXT: s_cmp_lg_u32 s11, 0
1791- ; GFX10PLUS-NEXT: s_cselect_b64 s[2:3 ], s[6:7], s[2:3 ]
1787+ ; GFX10PLUS-NEXT: s_cselect_b64 s[4:5 ], s[6:7], s[4:5 ]
17921788; GFX10PLUS-NEXT: s_cmp_lg_u32 s12, 0
1793- ; GFX10PLUS-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3 ]
1789+ ; GFX10PLUS-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5 ]
17941790; GFX10PLUS-NEXT: s_cmp_lg_u32 s11, 0
1795- ; GFX10PLUS-NEXT: s_cselect_b32 s2, s4 , 0
1791+ ; GFX10PLUS-NEXT: s_cselect_b32 s2, s2 , 0
17961792; GFX10PLUS-NEXT: ; return to shader part epilog
17971793 %result = lshr i65 %value , %amount
17981794 ret i65 %result
@@ -1801,22 +1797,22 @@ define amdgpu_ps i65 @s_lshr_i65(i65 inreg %value, i65 inreg %amount) {
18011797define amdgpu_ps i65 @s_lshr_i65_33 (i65 inreg %value ) {
18021798; GCN-LABEL: s_lshr_i65_33:
18031799; GCN: ; %bb.0:
1804- ; GCN-NEXT: s_mov_b32 s3, 0
1805- ; GCN-NEXT: s_and_b64 s[4:5], s[2:3] , 1
1806- ; GCN-NEXT: s_lshr_b32 s2, s1, 1
1807- ; GCN-NEXT: s_lshl_b64 s[0:1 ], s[4:5 ], 31
1808- ; GCN-NEXT: s_or_b64 s[0:1], s[2:3 ], s[0:1 ]
1809- ; GCN-NEXT: s_lshr_b32 s2, s5 , 1
1800+ ; GCN-NEXT: s_and_b64 s[2:3], s[2:3], 1
1801+ ; GCN-NEXT: s_lshr_b32 s0, s1 , 1
1802+ ; GCN-NEXT: s_mov_b32 s1, 0
1803+ ; GCN-NEXT: s_lshl_b64 s[4:5 ], s[2:3 ], 31
1804+ ; GCN-NEXT: s_or_b64 s[0:1], s[0:1 ], s[4:5 ]
1805+ ; GCN-NEXT: s_lshr_b32 s2, s3 , 1
18101806; GCN-NEXT: ; return to shader part epilog
18111807;
18121808; GFX10PLUS-LABEL: s_lshr_i65_33:
18131809; GFX10PLUS: ; %bb.0:
1814- ; GFX10PLUS-NEXT: s_mov_b32 s3, 0
1815- ; GFX10PLUS-NEXT: s_and_b64 s[4:5], s[2:3] , 1
1816- ; GFX10PLUS-NEXT: s_lshr_b32 s2, s1, 1
1817- ; GFX10PLUS-NEXT: s_lshl_b64 s[0:1 ], s[4:5 ], 31
1818- ; GFX10PLUS-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
1819- ; GFX10PLUS-NEXT: s_lshr_b32 s2, s5, 1
1810+ ; GFX10PLUS-NEXT: s_and_b64 s[2:3], s[2:3], 1
1811+ ; GFX10PLUS-NEXT: s_lshr_b32 s0, s1 , 1
1812+ ; GFX10PLUS-NEXT: s_mov_b32 s1, 0
1813+ ; GFX10PLUS-NEXT: s_lshl_b64 s[4:5 ], s[2:3 ], 31
1814+ ; GFX10PLUS-NEXT: s_lshr_b32 s2, s3, 1
1815+ ; GFX10PLUS-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
18201816; GFX10PLUS-NEXT: ; return to shader part epilog
18211817 %result = lshr i65 %value , 33
18221818 ret i65 %result
0 commit comments