Skip to content

Commit 72ae938

Browse files
committed
Fully use inreg to generate scalar shl
Signed-off-by: John Lu <[email protected]>
1 parent f936383 commit 72ae938

File tree

1 file changed

+75
-48
lines changed

1 file changed

+75
-48
lines changed

llvm/test/CodeGen/AMDGPU/shl64_reduce.ll

Lines changed: 75 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -147,62 +147,80 @@ define <4 x i64> @shl_v4_or16(<4 x i64> %arg0, <4 x i64> %shift_amt) {
147147
ret <4 x i64> %shl
148148
}
149149

150-
; test inreg
150+
; test SGPR
151151

152-
define i64 @shl_or16_inreg(i64 %arg0, i64 inreg %shift_amt) {
153-
; CHECK-LABEL: shl_or16_inreg:
152+
define i64 @shl_or16_sgpr(i64 inreg %arg0, i64 inreg %shift_amt) {
153+
; CHECK-LABEL: shl_or16_sgpr:
154154
; CHECK: ; %bb.0:
155155
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
156-
; CHECK-NEXT: s_or_b32 s4, s16, 16
157-
; CHECK-NEXT: v_lshlrev_b64 v[0:1], s4, v[0:1]
156+
; CHECK-NEXT: s_or_b32 s4, s18, 16
157+
; CHECK-NEXT: s_lshl_b64 s[4:5], s[16:17], s4
158+
; CHECK-NEXT: v_mov_b32_e32 v0, s4
159+
; CHECK-NEXT: v_mov_b32_e32 v1, s5
158160
; CHECK-NEXT: s_setpc_b64 s[30:31]
159161
%or = or i64 %shift_amt, 16
160162
%shl = shl i64 %arg0, %or
161163
ret i64 %shl
162164
}
163165

164-
define <2 x i64> @shl_v2_or16_inreg(<2 x i64> %arg0, <2 x i64> inreg %shift_amt) {
165-
; CHECK-LABEL: shl_v2_or16_inreg:
166+
define <2 x i64> @shl_v2_or16_sgpr(<2 x i64> inreg %arg0, <2 x i64> inreg %shift_amt) {
167+
; CHECK-LABEL: shl_v2_or16_sgpr:
166168
; CHECK: ; %bb.0:
167169
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
168-
; CHECK-NEXT: s_or_b32 s4, s18, 16
169-
; CHECK-NEXT: s_or_b32 s5, s16, 16
170-
; CHECK-NEXT: v_lshlrev_b64 v[0:1], s5, v[0:1]
171-
; CHECK-NEXT: v_lshlrev_b64 v[2:3], s4, v[2:3]
170+
; CHECK-NEXT: s_or_b32 s6, s22, 16
171+
; CHECK-NEXT: s_or_b32 s4, s20, 16
172+
; CHECK-NEXT: s_lshl_b64 s[4:5], s[16:17], s4
173+
; CHECK-NEXT: s_lshl_b64 s[6:7], s[18:19], s6
174+
; CHECK-NEXT: v_mov_b32_e32 v0, s4
175+
; CHECK-NEXT: v_mov_b32_e32 v1, s5
176+
; CHECK-NEXT: v_mov_b32_e32 v2, s6
177+
; CHECK-NEXT: v_mov_b32_e32 v3, s7
172178
; CHECK-NEXT: s_setpc_b64 s[30:31]
173179
%or = or <2 x i64> %shift_amt, splat (i64 16)
174180
%shl = shl <2 x i64> %arg0, %or
175181
ret <2 x i64> %shl
176182
}
177183

178-
define <3 x i64> @shl_v3_or16_inreg(<3 x i64> %arg0, <3 x i64> inreg %shift_amt) {
179-
; CHECK-LABEL: shl_v3_or16_inreg:
184+
define <3 x i64> @shl_v3_or16_sgpr(<3 x i64> inreg %arg0, <3 x i64> inreg %shift_amt) {
185+
; CHECK-LABEL: shl_v3_or16_sgpr:
180186
; CHECK: ; %bb.0:
181187
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
182-
; CHECK-NEXT: s_or_b32 s4, s20, 16
183-
; CHECK-NEXT: s_or_b32 s5, s18, 16
184-
; CHECK-NEXT: s_or_b32 s6, s16, 16
185-
; CHECK-NEXT: v_lshlrev_b64 v[0:1], s6, v[0:1]
186-
; CHECK-NEXT: v_lshlrev_b64 v[2:3], s5, v[2:3]
187-
; CHECK-NEXT: v_lshlrev_b64 v[4:5], s4, v[4:5]
188+
; CHECK-NEXT: s_or_b32 s8, s26, 16
189+
; CHECK-NEXT: s_or_b32 s6, s24, 16
190+
; CHECK-NEXT: s_or_b32 s4, s22, 16
191+
; CHECK-NEXT: s_lshl_b64 s[4:5], s[16:17], s4
192+
; CHECK-NEXT: s_lshl_b64 s[6:7], s[18:19], s6
193+
; CHECK-NEXT: s_lshl_b64 s[8:9], s[20:21], s8
194+
; CHECK-NEXT: v_mov_b32_e32 v0, s4
195+
; CHECK-NEXT: v_mov_b32_e32 v1, s5
196+
; CHECK-NEXT: v_mov_b32_e32 v2, s6
197+
; CHECK-NEXT: v_mov_b32_e32 v3, s7
198+
; CHECK-NEXT: v_mov_b32_e32 v4, s8
199+
; CHECK-NEXT: v_mov_b32_e32 v5, s9
188200
; CHECK-NEXT: s_setpc_b64 s[30:31]
189201
%or = or <3 x i64> %shift_amt, splat (i64 16)
190202
%shl = shl <3 x i64> %arg0, %or
191203
ret <3 x i64> %shl
192204
}
193205

194-
define <4 x i64> @shl_v4_or16_inreg(<4 x i64> %arg0, <4 x i64> inreg %shift_amt) {
195-
; CHECK-LABEL: shl_v4_or16_inreg:
206+
define <4 x i64> @shl_v4_or16_sgpr(<4 x i64> inreg %arg0, <4 x i64> inreg %shift_amt) {
207+
; CHECK-LABEL: shl_v4_or16_sgpr:
196208
; CHECK: ; %bb.0:
197209
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
198-
; CHECK-NEXT: s_or_b32 s4, s22, 16
199-
; CHECK-NEXT: s_or_b32 s5, s20, 16
200-
; CHECK-NEXT: s_or_b32 s6, s18, 16
201-
; CHECK-NEXT: s_or_b32 s7, s16, 16
202-
; CHECK-NEXT: v_lshlrev_b64 v[0:1], s7, v[0:1]
203-
; CHECK-NEXT: v_lshlrev_b64 v[2:3], s6, v[2:3]
204-
; CHECK-NEXT: v_lshlrev_b64 v[4:5], s5, v[4:5]
205-
; CHECK-NEXT: v_lshlrev_b64 v[6:7], s4, v[6:7]
210+
; CHECK-NEXT: v_or_b32_e32 v0, 16, v0
211+
; CHECK-NEXT: s_or_b32 s8, s28, 16
212+
; CHECK-NEXT: s_or_b32 s6, s26, 16
213+
; CHECK-NEXT: s_or_b32 s4, s24, 16
214+
; CHECK-NEXT: s_lshl_b64 s[4:5], s[16:17], s4
215+
; CHECK-NEXT: s_lshl_b64 s[6:7], s[18:19], s6
216+
; CHECK-NEXT: s_lshl_b64 s[8:9], s[20:21], s8
217+
; CHECK-NEXT: v_lshlrev_b64 v[6:7], v0, s[22:23]
218+
; CHECK-NEXT: v_mov_b32_e32 v0, s4
219+
; CHECK-NEXT: v_mov_b32_e32 v1, s5
220+
; CHECK-NEXT: v_mov_b32_e32 v2, s6
221+
; CHECK-NEXT: v_mov_b32_e32 v3, s7
222+
; CHECK-NEXT: v_mov_b32_e32 v4, s8
223+
; CHECK-NEXT: v_mov_b32_e32 v5, s9
206224
; CHECK-NEXT: s_setpc_b64 s[30:31]
207225
%or = or <4 x i64> %shift_amt, splat (i64 16)
208226
%shl = shl <4 x i64> %arg0, %or
@@ -276,61 +294,70 @@ define <4 x i64> @shl_v4_or32(<4 x i64> %arg0, <4 x i64> %shift_amt) {
276294
ret <4 x i64> %shl
277295
}
278296

279-
; test inreg
297+
; test SGPR
280298

281-
define i64 @shl_or32_inreg(i64 %arg0, i64 inreg %shift_amt) {
282-
; CHECK-LABEL: shl_or32_inreg:
299+
define i64 @shl_or32_sgpr(i64 inreg %arg0, i64 inreg %shift_amt) {
300+
; CHECK-LABEL: shl_or32_sgpr:
283301
; CHECK: ; %bb.0:
284302
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
285-
; CHECK-NEXT: v_lshlrev_b32_e32 v1, s16, v0
303+
; CHECK-NEXT: s_lshl_b32 s4, s16, s18
286304
; CHECK-NEXT: v_mov_b32_e32 v0, 0
305+
; CHECK-NEXT: v_mov_b32_e32 v1, s4
287306
; CHECK-NEXT: s_setpc_b64 s[30:31]
288307
%or = or i64 %shift_amt, 32
289308
%shl = shl i64 %arg0, %or
290309
ret i64 %shl
291310
}
292311

293-
define <2 x i64> @shl_v2_or32_inreg(<2 x i64> %arg0, <2 x i64> inreg %shift_amt) {
294-
; CHECK-LABEL: shl_v2_or32_inreg:
312+
define <2 x i64> @shl_v2_or32_sgpr(<2 x i64> inreg %arg0, <2 x i64> inreg %shift_amt) {
313+
; CHECK-LABEL: shl_v2_or32_sgpr:
295314
; CHECK: ; %bb.0:
296315
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
297-
; CHECK-NEXT: v_lshlrev_b32_e32 v1, s16, v0
298-
; CHECK-NEXT: v_lshlrev_b32_e32 v3, s18, v2
316+
; CHECK-NEXT: s_lshl_b32 s4, s16, s20
317+
; CHECK-NEXT: s_lshl_b32 s5, s18, s22
299318
; CHECK-NEXT: v_mov_b32_e32 v0, 0
319+
; CHECK-NEXT: v_mov_b32_e32 v1, s4
300320
; CHECK-NEXT: v_mov_b32_e32 v2, 0
321+
; CHECK-NEXT: v_mov_b32_e32 v3, s5
301322
; CHECK-NEXT: s_setpc_b64 s[30:31]
302323
%or = or <2 x i64> %shift_amt, splat (i64 32)
303324
%shl = shl <2 x i64> %arg0, %or
304325
ret <2 x i64> %shl
305326
}
306327

307-
define <3 x i64> @shl_v3_or32_inreg(<3 x i64> %arg0, <3 x i64> inreg %shift_amt) {
308-
; CHECK-LABEL: shl_v3_or32_inreg:
328+
define <3 x i64> @shl_v3_or32_sgpr(<3 x i64> inreg %arg0, <3 x i64> inreg %shift_amt) {
329+
; CHECK-LABEL: shl_v3_or32_sgpr:
309330
; CHECK: ; %bb.0:
310331
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
311-
; CHECK-NEXT: v_lshlrev_b32_e32 v1, s16, v0
312-
; CHECK-NEXT: v_lshlrev_b32_e32 v3, s18, v2
313-
; CHECK-NEXT: v_lshlrev_b32_e32 v5, s20, v4
332+
; CHECK-NEXT: s_lshl_b32 s4, s16, s22
333+
; CHECK-NEXT: s_lshl_b32 s5, s18, s24
334+
; CHECK-NEXT: s_lshl_b32 s6, s20, s26
314335
; CHECK-NEXT: v_mov_b32_e32 v0, 0
336+
; CHECK-NEXT: v_mov_b32_e32 v1, s4
315337
; CHECK-NEXT: v_mov_b32_e32 v2, 0
338+
; CHECK-NEXT: v_mov_b32_e32 v3, s5
316339
; CHECK-NEXT: v_mov_b32_e32 v4, 0
340+
; CHECK-NEXT: v_mov_b32_e32 v5, s6
317341
; CHECK-NEXT: s_setpc_b64 s[30:31]
318342
%or = or <3 x i64> %shift_amt, splat (i64 32)
319343
%shl = shl <3 x i64> %arg0, %or
320344
ret <3 x i64> %shl
321345
}
322346

323-
define <4 x i64> @shl_v4_or32_inreg(<4 x i64> %arg0, <4 x i64> inreg %shift_amt) {
324-
; CHECK-LABEL: shl_v4_or32_inreg:
347+
define <4 x i64> @shl_v4_or32_sgpr(<4 x i64> inreg %arg0, <4 x i64> inreg %shift_amt) {
348+
; CHECK-LABEL: shl_v4_or32_sgpr:
325349
; CHECK: ; %bb.0:
326350
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
327-
; CHECK-NEXT: v_lshlrev_b32_e32 v1, s16, v0
328-
; CHECK-NEXT: v_lshlrev_b32_e32 v3, s18, v2
329-
; CHECK-NEXT: v_lshlrev_b32_e32 v5, s20, v4
330-
; CHECK-NEXT: v_lshlrev_b32_e32 v7, s22, v6
351+
; CHECK-NEXT: s_lshl_b32 s4, s16, s24
352+
; CHECK-NEXT: s_lshl_b32 s5, s18, s26
353+
; CHECK-NEXT: s_lshl_b32 s6, s20, s28
354+
; CHECK-NEXT: v_lshlrev_b32_e64 v7, v0, s22
331355
; CHECK-NEXT: v_mov_b32_e32 v0, 0
356+
; CHECK-NEXT: v_mov_b32_e32 v1, s4
332357
; CHECK-NEXT: v_mov_b32_e32 v2, 0
358+
; CHECK-NEXT: v_mov_b32_e32 v3, s5
333359
; CHECK-NEXT: v_mov_b32_e32 v4, 0
360+
; CHECK-NEXT: v_mov_b32_e32 v5, s6
334361
; CHECK-NEXT: v_mov_b32_e32 v6, 0
335362
; CHECK-NEXT: s_setpc_b64 s[30:31]
336363
%or = or <4 x i64> %shift_amt, splat (i64 32)

0 commit comments

Comments
 (0)