22
33define i64 @lshl_add_u64_v1v (i64 %v , i64 %a ) {
44; GCN-LABEL: lshl_add_u64_v1v:
5- ; GCN: v_lshl_add_u64 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 1, v[{{[0-9:]+}}]
5+ ; GCN: ; %bb.0:
6+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7+ ; GCN-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 1, v[2:3]
8+ ; GCN-NEXT: s_setpc_b64 s[30:31]
69 %shl = shl i64 %v , 1
710 %add = add i64 %shl , %a
811 ret i64 %add
912}
1013
1114define i64 @lshl_add_u64_v4v (i64 %v , i64 %a ) {
1215; GCN-LABEL: lshl_add_u64_v4v:
13- ; GCN: v_lshl_add_u64 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 4, v[{{[0-9:]+}}]
16+ ; GCN: ; %bb.0:
17+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18+ ; GCN-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 4, v[2:3]
19+ ; GCN-NEXT: s_setpc_b64 s[30:31]
1420 %shl = shl i64 %v , 4
1521 %add = add i64 %shl , %a
1622 ret i64 %add
1723}
1824
1925define i64 @lshl_add_u64_v5v (i64 %v , i64 %a ) {
2026; GCN-LABEL: lshl_add_u64_v5v:
21- ; GCN: v_lshlrev_b64
22- ; GCN-NEXT: v_lshl_add_u64 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 0, v[{{[0-9:]+}}]
27+ ; GCN: ; %bb.0:
28+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
29+ ; GCN-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 5, v[2:3]
30+ ; GCN-NEXT: s_setpc_b64 s[30:31]
2331 %shl = shl i64 %v , 5
2432 %add = add i64 %shl , %a
2533 ret i64 %add
2634}
2735
2836define i64 @lshl_add_u64_vvv (i64 %v , i64 %s , i64 %a ) {
2937; GCN-LABEL: lshl_add_u64_vvv:
30- ; GCN: v_lshlrev_b64
31- ; GCN-NEXT: v_lshl_add_u64 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 0, v[{{[0-9:]+}}]
38+ ; GCN: ; %bb.0:
39+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
40+ ; GCN-NEXT: v_lshl_add_u64 v[0:1], v[0:1], v2, v[4:5]
41+ ; GCN-NEXT: s_setpc_b64 s[30:31]
3242 %shl = shl i64 %v , %s
3343 %add = add i64 %shl , %a
3444 ret i64 %add
3545}
3646
3747define amdgpu_kernel void @lshl_add_u64_s2v (i64 %v ) {
3848; GCN-LABEL: lshl_add_u64_s2v:
39- ; GCN: v_lshl_add_u64 v[{{[0-9:]+}}], s[{{[0-9:]+}}], 2, v[{{[0-9:]+}}]
49+ ; GCN: ; %bb.0:
50+ ; GCN-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
51+ ; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
52+ ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
53+ ; GCN-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 2, v[0:1]
54+ ; GCN-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
55+ ; GCN-NEXT: s_endpgm
4056 %a = load i64 , ptr undef
4157 %shl = shl i64 %v , 2
4258 %add = add i64 %shl , %a
@@ -46,7 +62,13 @@ define amdgpu_kernel void @lshl_add_u64_s2v(i64 %v) {
4662
4763define amdgpu_kernel void @lshl_add_u64_v2s (i64 %a ) {
4864; GCN-LABEL: lshl_add_u64_v2s:
49- ; GCN: v_lshl_add_u64 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 2, s[{{[0-9:]+}}]
65+ ; GCN: ; %bb.0:
66+ ; GCN-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
67+ ; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
68+ ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
69+ ; GCN-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
70+ ; GCN-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
71+ ; GCN-NEXT: s_endpgm
5072 %v = load i64 , ptr undef
5173 %shl = shl i64 %v , 2
5274 %add = add i64 %shl , %a
@@ -56,9 +78,14 @@ define amdgpu_kernel void @lshl_add_u64_v2s(i64 %a) {
5678
5779define amdgpu_kernel void @lshl_add_u64_s2s (i64 %v , i64 %a ) {
5880; GCN-LABEL: lshl_add_u64_s2s:
59- ; GCN: s_lshl_b64
60- ; GCN: s_add_u32
61- ; GCN: s_addc_u32
81+ ; GCN: ; %bb.0:
82+ ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
83+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
84+ ; GCN-NEXT: v_mov_b32_e32 v0, s2
85+ ; GCN-NEXT: v_mov_b32_e32 v1, s3
86+ ; GCN-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 2, v[0:1]
87+ ; GCN-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
88+ ; GCN-NEXT: s_endpgm
6289 %shl = shl i64 %v , 2
6390 %add = add i64 %shl , %a
6491 store i64 %add , ptr undef
@@ -67,14 +94,23 @@ define amdgpu_kernel void @lshl_add_u64_s2s(i64 %v, i64 %a) {
6794
6895define i64 @add_u64_vv (i64 %v , i64 %a ) {
6996; GCN-LABEL: add_u64_vv:
70- ; GCN: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
97+ ; GCN: ; %bb.0:
98+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
99+ ; GCN-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
100+ ; GCN-NEXT: s_setpc_b64 s[30:31]
71101 %add = add i64 %v , %a
72102 ret i64 %add
73103}
74104
75105define amdgpu_kernel void @add_u64_sv (i64 %v ) {
76106; GCN-LABEL: add_u64_sv:
77- ; GCN: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
107+ ; GCN: ; %bb.0:
108+ ; GCN-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
109+ ; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
110+ ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
111+ ; GCN-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
112+ ; GCN-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
113+ ; GCN-NEXT: s_endpgm
78114 %a = load i64 , ptr undef
79115 %add = add i64 %v , %a
80116 store i64 %add , ptr undef
@@ -83,7 +119,13 @@ define amdgpu_kernel void @add_u64_sv(i64 %v) {
83119
84120define amdgpu_kernel void @add_u64_vs (i64 %a ) {
85121; GCN-LABEL: add_u64_vs:
86- ; GCN: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
122+ ; GCN: ; %bb.0:
123+ ; GCN-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
124+ ; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
125+ ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
126+ ; GCN-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
127+ ; GCN-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
128+ ; GCN-NEXT: s_endpgm
87129 %v = load i64 , ptr undef
88130 %add = add i64 %v , %a
89131 store i64 %add , ptr undef
@@ -92,16 +134,27 @@ define amdgpu_kernel void @add_u64_vs(i64 %a) {
92134
93135define amdgpu_kernel void @add_u64_ss (i64 %v , i64 %a ) {
94136; GCN-LABEL: add_u64_ss:
95- ; GCN: s_add_u32
96- ; GCN: s_addc_u32 s1, s1, s3
137+ ; GCN: ; %bb.0:
138+ ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
139+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
140+ ; GCN-NEXT: s_add_u32 s0, s0, s2
141+ ; GCN-NEXT: s_addc_u32 s1, s1, s3
142+ ; GCN-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
143+ ; GCN-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
144+ ; GCN-NEXT: s_endpgm
97145 %add = add i64 %v , %a
98146 store i64 %add , ptr undef
99147 ret void
100148}
101149
102150define i32 @lshl_add_u64_gep (ptr %p , i64 %a ) {
103151; GCN-LABEL: lshl_add_u64_gep:
104- ; GCN: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
152+ ; GCN: ; %bb.0:
153+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
154+ ; GCN-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
155+ ; GCN-NEXT: flat_load_dword v0, v[0:1]
156+ ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
157+ ; GCN-NEXT: s_setpc_b64 s[30:31]
105158 %gep = getelementptr inbounds i32 , ptr %p , i64 %a
106159 %v = load i32 , ptr %gep
107160 ret i32 %v
0 commit comments