1+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
12; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3+ ; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -global-isel=1 < %s | FileCheck -check-prefix=GI %s
24
35define i64 @lshl_add_u64_v1v (i64 %v , i64 %a ) {
46; GCN-LABEL: lshl_add_u64_v1v:
57; GCN: ; %bb.0:
68; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
79; GCN-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 1, v[2:3]
810; GCN-NEXT: s_setpc_b64 s[30:31]
11+ ;
12+ ; GI-LABEL: lshl_add_u64_v1v:
13+ ; GI: ; %bb.0:
14+ ; GI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15+ ; GI-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 1, v[2:3]
16+ ; GI-NEXT: s_setpc_b64 s[30:31]
917 %shl = shl i64 %v , 1
1018 %add = add i64 %shl , %a
1119 ret i64 %add
@@ -17,6 +25,12 @@ define i64 @lshl_add_u64_v4v(i64 %v, i64 %a) {
1725; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1826; GCN-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 4, v[2:3]
1927; GCN-NEXT: s_setpc_b64 s[30:31]
28+ ;
29+ ; GI-LABEL: lshl_add_u64_v4v:
30+ ; GI: ; %bb.0:
31+ ; GI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
32+ ; GI-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 4, v[2:3]
33+ ; GI-NEXT: s_setpc_b64 s[30:31]
2034 %shl = shl i64 %v , 4
2135 %add = add i64 %shl , %a
2236 ret i64 %add
@@ -28,6 +42,13 @@ define i64 @lshl_add_u64_v5v(i64 %v, i64 %a) {
2842; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2943; GCN-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 5, v[2:3]
3044; GCN-NEXT: s_setpc_b64 s[30:31]
45+ ;
46+ ; GI-LABEL: lshl_add_u64_v5v:
47+ ; GI: ; %bb.0:
48+ ; GI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
49+ ; GI-NEXT: v_lshlrev_b64 v[0:1], 5, v[0:1]
50+ ; GI-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
51+ ; GI-NEXT: s_setpc_b64 s[30:31]
3152 %shl = shl i64 %v , 5
3253 %add = add i64 %shl , %a
3354 ret i64 %add
@@ -39,57 +60,67 @@ define i64 @lshl_add_u64_vvv(i64 %v, i64 %s, i64 %a) {
3960; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4061; GCN-NEXT: v_lshl_add_u64 v[0:1], v[0:1], v2, v[4:5]
4162; GCN-NEXT: s_setpc_b64 s[30:31]
63+ ;
64+ ; GI-LABEL: lshl_add_u64_vvv:
65+ ; GI: ; %bb.0:
66+ ; GI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
67+ ; GI-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
68+ ; GI-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[4:5]
69+ ; GI-NEXT: s_setpc_b64 s[30:31]
4270 %shl = shl i64 %v , %s
4371 %add = add i64 %shl , %a
4472 ret i64 %add
4573}
4674
47- define amdgpu_kernel void @lshl_add_u64_s2v (i64 %v ) {
75+ define i64 @lshl_add_u64_s2v (i64 %v , i64 %a ) {
4876; GCN-LABEL: lshl_add_u64_s2v:
4977; GCN: ; %bb.0:
50- ; GCN-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
51- ; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
52- ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
53- ; GCN-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 2, v[0:1]
54- ; GCN-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
55- ; GCN-NEXT: s_endpgm
56- %a = load i64 , ptr undef
78+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
79+ ; GCN-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, v[2:3]
80+ ; GCN-NEXT: s_setpc_b64 s[30:31]
81+ ;
82+ ; GI-LABEL: lshl_add_u64_s2v:
83+ ; GI: ; %bb.0:
84+ ; GI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
85+ ; GI-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, v[2:3]
86+ ; GI-NEXT: s_setpc_b64 s[30:31]
5787 %shl = shl i64 %v , 2
5888 %add = add i64 %shl , %a
59- store i64 %add , ptr undef
60- ret void
89+ ret i64 %add
6190}
6291
63- define amdgpu_kernel void @lshl_add_u64_v2s (i64 %a ) {
92+ define i64 @lshl_add_u64_v2s (i64 %a , i64 %v ) {
6493; GCN-LABEL: lshl_add_u64_v2s:
6594; GCN: ; %bb.0:
66- ; GCN-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
67- ; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
68- ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
69- ; GCN-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
70- ; GCN-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
71- ; GCN-NEXT: s_endpgm
72- %v = load i64 , ptr undef
95+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
96+ ; GCN-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
97+ ; GCN-NEXT: s_setpc_b64 s[30:31]
98+ ;
99+ ; GI-LABEL: lshl_add_u64_v2s:
100+ ; GI: ; %bb.0:
101+ ; GI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102+ ; GI-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
103+ ; GI-NEXT: s_setpc_b64 s[30:31]
73104 %shl = shl i64 %v , 2
74105 %add = add i64 %shl , %a
75- store i64 %add , ptr undef
76- ret void
106+ ret i64 %add
77107}
78108
79- define amdgpu_kernel void @lshl_add_u64_s2s (i64 %v , i64 %a ) {
109+ define i64 @lshl_add_u64_s2s (i64 %v , i64 %a ) {
80110; GCN-LABEL: lshl_add_u64_s2s:
81111; GCN: ; %bb.0:
82- ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
83- ; GCN-NEXT: s_waitcnt lgkmcnt(0)
84- ; GCN-NEXT: v_mov_b32_e32 v0, s2
85- ; GCN-NEXT: v_mov_b32_e32 v1, s3
86- ; GCN-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 2, v[0:1]
87- ; GCN-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
88- ; GCN-NEXT: s_endpgm
112+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
113+ ; GCN-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, v[2:3]
114+ ; GCN-NEXT: s_setpc_b64 s[30:31]
115+ ;
116+ ; GI-LABEL: lshl_add_u64_s2s:
117+ ; GI: ; %bb.0:
118+ ; GI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
119+ ; GI-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, v[2:3]
120+ ; GI-NEXT: s_setpc_b64 s[30:31]
89121 %shl = shl i64 %v , 2
90122 %add = add i64 %shl , %a
91- store i64 %add , ptr undef
92- ret void
123+ ret i64 %add
93124}
94125
95126define i64 @add_u64_vv (i64 %v , i64 %a ) {
@@ -98,53 +129,62 @@ define i64 @add_u64_vv(i64 %v, i64 %a) {
98129; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
99130; GCN-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
100131; GCN-NEXT: s_setpc_b64 s[30:31]
132+ ;
133+ ; GI-LABEL: add_u64_vv:
134+ ; GI: ; %bb.0:
135+ ; GI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
136+ ; GI-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
137+ ; GI-NEXT: s_setpc_b64 s[30:31]
101138 %add = add i64 %v , %a
102139 ret i64 %add
103140}
104141
105- define amdgpu_kernel void @add_u64_sv (i64 %v ) {
142+ define i64 @add_u64_sv (i64 %v , i64 %a ) {
106143; GCN-LABEL: add_u64_sv:
107144; GCN: ; %bb.0:
108- ; GCN-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
109- ; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
110- ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
111- ; GCN-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
112- ; GCN-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
113- ; GCN-NEXT: s_endpgm
114- %a = load i64 , ptr undef
145+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
146+ ; GCN-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
147+ ; GCN-NEXT: s_setpc_b64 s[30:31]
148+ ;
149+ ; GI-LABEL: add_u64_sv:
150+ ; GI: ; %bb.0:
151+ ; GI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
152+ ; GI-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
153+ ; GI-NEXT: s_setpc_b64 s[30:31]
115154 %add = add i64 %v , %a
116- store i64 %add , ptr undef
117- ret void
155+ ret i64 %add
118156}
119157
120- define amdgpu_kernel void @add_u64_vs (i64 %a ) {
158+ define i64 @add_u64_vs (i64 %a , i64 %v ) {
121159; GCN-LABEL: add_u64_vs:
122160; GCN: ; %bb.0:
123- ; GCN-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
124- ; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
125- ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
126- ; GCN-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
127- ; GCN-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
128- ; GCN-NEXT: s_endpgm
129- %v = load i64 , ptr undef
161+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
162+ ; GCN-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 0, v[0:1]
163+ ; GCN-NEXT: s_setpc_b64 s[30:31]
164+ ;
165+ ; GI-LABEL: add_u64_vs:
166+ ; GI: ; %bb.0:
167+ ; GI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
168+ ; GI-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 0, v[0:1]
169+ ; GI-NEXT: s_setpc_b64 s[30:31]
130170 %add = add i64 %v , %a
131- store i64 %add , ptr undef
132- ret void
171+ ret i64 %add
133172}
134173
135- define amdgpu_kernel void @add_u64_ss (i64 %v , i64 %a ) {
174+ define i64 @add_u64_ss (i64 %v , i64 %a ) {
136175; GCN-LABEL: add_u64_ss:
137176; GCN: ; %bb.0:
138- ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
139- ; GCN-NEXT: s_waitcnt lgkmcnt(0)
140- ; GCN-NEXT: s_add_u32 s0, s0, s2
141- ; GCN-NEXT: s_addc_u32 s1, s1, s3
142- ; GCN-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
143- ; GCN-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
144- ; GCN-NEXT: s_endpgm
177+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
178+ ; GCN-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
179+ ; GCN-NEXT: s_setpc_b64 s[30:31]
180+ ;
181+ ; GI-LABEL: add_u64_ss:
182+ ; GI: ; %bb.0:
183+ ; GI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
184+ ; GI-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
185+ ; GI-NEXT: s_setpc_b64 s[30:31]
145186 %add = add i64 %v , %a
146- store i64 %add , ptr undef
147- ret void
187+ ret i64 %add
148188}
149189
150190define i32 @lshl_add_u64_gep (ptr %p , i64 %a ) {
@@ -155,6 +195,14 @@ define i32 @lshl_add_u64_gep(ptr %p, i64 %a) {
155195; GCN-NEXT: flat_load_dword v0, v[0:1]
156196; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
157197; GCN-NEXT: s_setpc_b64 s[30:31]
198+ ;
199+ ; GI-LABEL: lshl_add_u64_gep:
200+ ; GI: ; %bb.0:
201+ ; GI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
202+ ; GI-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
203+ ; GI-NEXT: flat_load_dword v0, v[0:1]
204+ ; GI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
205+ ; GI-NEXT: s_setpc_b64 s[30:31]
158206 %gep = getelementptr inbounds i32 , ptr %p , i64 %a
159207 %v = load i32 , ptr %gep
160208 ret i32 %v
0 commit comments