1+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
12; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
23
34define i64 @lshl_add_u64_v1v (i64 %v , i64 %a ) {
45; GCN-LABEL: lshl_add_u64_v1v:
5- ; GCN: v_lshl_add_u64 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 1, v[{{[0-9:]+}}]
6+ ; GCN: ; %bb.0:
7+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8+ ; GCN-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 1, v[2:3]
9+ ; GCN-NEXT: s_setpc_b64 s[30:31]
610 %shl = shl i64 %v , 1
711 %add = add i64 %shl , %a
812 ret i64 %add
913}
1014
1115define i64 @lshl_add_u64_v4v (i64 %v , i64 %a ) {
1216; GCN-LABEL: lshl_add_u64_v4v:
13- ; GCN: v_lshl_add_u64 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 4, v[{{[0-9:]+}}]
17+ ; GCN: ; %bb.0:
18+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19+ ; GCN-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 4, v[2:3]
20+ ; GCN-NEXT: s_setpc_b64 s[30:31]
1421 %shl = shl i64 %v , 4
1522 %add = add i64 %shl , %a
1623 ret i64 %add
1724}
1825
1926define i64 @lshl_add_u64_v5v (i64 %v , i64 %a ) {
2027; GCN-LABEL: lshl_add_u64_v5v:
21- ; GCN: v_lshlrev_b64
22- ; GCN-NEXT: v_lshl_add_u64 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 0, v[{{[0-9:]+}}]
28+ ; GCN: ; %bb.0:
29+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
30+ ; GCN-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 5, v[2:3]
31+ ; GCN-NEXT: s_setpc_b64 s[30:31]
2332 %shl = shl i64 %v , 5
2433 %add = add i64 %shl , %a
2534 ret i64 %add
2635}
2736
2837define i64 @lshl_add_u64_vvv (i64 %v , i64 %s , i64 %a ) {
2938; GCN-LABEL: lshl_add_u64_vvv:
30- ; GCN: v_lshlrev_b64
31- ; GCN-NEXT: v_lshl_add_u64 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 0, v[{{[0-9:]+}}]
39+ ; GCN: ; %bb.0:
40+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
41+ ; GCN-NEXT: v_lshl_add_u64 v[0:1], v[0:1], v2, v[4:5]
42+ ; GCN-NEXT: s_setpc_b64 s[30:31]
3243 %shl = shl i64 %v , %s
3344 %add = add i64 %shl , %a
3445 ret i64 %add
3546}
3647
3748define amdgpu_kernel void @lshl_add_u64_s2v (i64 %v ) {
3849; GCN-LABEL: lshl_add_u64_s2v:
39- ; GCN: v_lshl_add_u64 v[{{[0-9:]+}}], s[{{[0-9:]+}}], 2, v[{{[0-9:]+}}]
50+ ; GCN: ; %bb.0:
51+ ; GCN-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
52+ ; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
53+ ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
54+ ; GCN-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 2, v[0:1]
55+ ; GCN-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
56+ ; GCN-NEXT: s_endpgm
4057 %a = load i64 , ptr undef
4158 %shl = shl i64 %v , 2
4259 %add = add i64 %shl , %a
@@ -46,7 +63,13 @@ define amdgpu_kernel void @lshl_add_u64_s2v(i64 %v) {
4663
4764define amdgpu_kernel void @lshl_add_u64_v2s (i64 %a ) {
4865; GCN-LABEL: lshl_add_u64_v2s:
49- ; GCN: v_lshl_add_u64 v[{{[0-9:]+}}], v[{{[0-9:]+}}], 2, s[{{[0-9:]+}}]
66+ ; GCN: ; %bb.0:
67+ ; GCN-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
68+ ; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
69+ ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
70+ ; GCN-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[0:1]
71+ ; GCN-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
72+ ; GCN-NEXT: s_endpgm
5073 %v = load i64 , ptr undef
5174 %shl = shl i64 %v , 2
5275 %add = add i64 %shl , %a
@@ -56,9 +79,14 @@ define amdgpu_kernel void @lshl_add_u64_v2s(i64 %a) {
5679
5780define amdgpu_kernel void @lshl_add_u64_s2s (i64 %v , i64 %a ) {
5881; GCN-LABEL: lshl_add_u64_s2s:
59- ; GCN: s_lshl_b64
60- ; GCN: s_add_u32
61- ; GCN: s_addc_u32
82+ ; GCN: ; %bb.0:
83+ ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
84+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
85+ ; GCN-NEXT: v_mov_b32_e32 v0, s2
86+ ; GCN-NEXT: v_mov_b32_e32 v1, s3
87+ ; GCN-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 2, v[0:1]
88+ ; GCN-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
89+ ; GCN-NEXT: s_endpgm
6290 %shl = shl i64 %v , 2
6391 %add = add i64 %shl , %a
6492 store i64 %add , ptr undef
@@ -67,14 +95,23 @@ define amdgpu_kernel void @lshl_add_u64_s2s(i64 %v, i64 %a) {
6795
6896define i64 @add_u64_vv (i64 %v , i64 %a ) {
6997; GCN-LABEL: add_u64_vv:
70- ; GCN: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
98+ ; GCN: ; %bb.0:
99+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
100+ ; GCN-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
101+ ; GCN-NEXT: s_setpc_b64 s[30:31]
71102 %add = add i64 %v , %a
72103 ret i64 %add
73104}
74105
75106define amdgpu_kernel void @add_u64_sv (i64 %v ) {
76107; GCN-LABEL: add_u64_sv:
77- ; GCN: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
108+ ; GCN: ; %bb.0:
109+ ; GCN-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
110+ ; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
111+ ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
112+ ; GCN-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
113+ ; GCN-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
114+ ; GCN-NEXT: s_endpgm
78115 %a = load i64 , ptr undef
79116 %add = add i64 %v , %a
80117 store i64 %add , ptr undef
@@ -83,7 +120,13 @@ define amdgpu_kernel void @add_u64_sv(i64 %v) {
83120
84121define amdgpu_kernel void @add_u64_vs (i64 %a ) {
85122; GCN-LABEL: add_u64_vs:
86- ; GCN: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
123+ ; GCN: ; %bb.0:
124+ ; GCN-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
125+ ; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
126+ ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
127+ ; GCN-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1]
128+ ; GCN-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
129+ ; GCN-NEXT: s_endpgm
87130 %v = load i64 , ptr undef
88131 %add = add i64 %v , %a
89132 store i64 %add , ptr undef
@@ -92,16 +135,27 @@ define amdgpu_kernel void @add_u64_vs(i64 %a) {
92135
93136define amdgpu_kernel void @add_u64_ss (i64 %v , i64 %a ) {
94137; GCN-LABEL: add_u64_ss:
95- ; GCN: s_add_u32
96- ; GCN: s_addc_u32 s1, s1, s3
138+ ; GCN: ; %bb.0:
139+ ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
140+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
141+ ; GCN-NEXT: s_add_u32 s0, s0, s2
142+ ; GCN-NEXT: s_addc_u32 s1, s1, s3
143+ ; GCN-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
144+ ; GCN-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
145+ ; GCN-NEXT: s_endpgm
97146 %add = add i64 %v , %a
98147 store i64 %add , ptr undef
99148 ret void
100149}
101150
102151define i32 @lshl_add_u64_gep (ptr %p , i64 %a ) {
103152; GCN-LABEL: lshl_add_u64_gep:
104- ; GCN: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
153+ ; GCN: ; %bb.0:
154+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
155+ ; GCN-NEXT: v_lshl_add_u64 v[0:1], v[2:3], 2, v[0:1]
156+ ; GCN-NEXT: flat_load_dword v0, v[0:1]
157+ ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
158+ ; GCN-NEXT: s_setpc_b64 s[30:31]
105159 %gep = getelementptr inbounds i32 , ptr %p , i64 %a
106160 %v = load i32 , ptr %gep
107161 ret i32 %v
0 commit comments