Skip to content

Commit 8a04235

Browse files
committed
Unify test files. Correct comment about exact
Signed-off-by: John Lu <[email protected]>
1 parent 9cbf099 commit 8a04235

File tree

2 files changed

+39
-53
lines changed

2 files changed

+39
-53
lines changed

llvm/test/CodeGen/AMDGPU/srl64_reduce.ll

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
;;
88
;; DST = [srl i32 X, (Y & 0x1F), 0]
99

10-
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
10+
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck %s
1111

1212
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1313
; Test range with metadata
@@ -27,7 +27,21 @@ define i64 @srl_metadata(i64 %arg0, ptr %arg1.ptr) {
2727
ret i64 %srl
2828
}
2929

30-
; Shifted bits matter for exact shift. Reduction must not be done.
30+
define amdgpu_ps i64 @srl_metadata_sgpr_return(i64 inreg %arg0, ptr %arg1.ptr) {
31+
; CHECK-LABEL: srl_metadata_sgpr_return:
32+
; CHECK: ; %bb.0:
33+
; CHECK-NEXT: flat_load_dword v0, v[0:1]
34+
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
35+
; CHECK-NEXT: v_lshrrev_b64 v[0:1], v0, s[0:1]
36+
; CHECK-NEXT: s_mov_b32 s1, 0
37+
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
38+
; CHECK-NEXT: ; return to shader part epilog
39+
%shift.amt = load i64, ptr %arg1.ptr, !range !0, !noundef !{}
40+
%srl = lshr i64 %arg0, %shift.amt
41+
ret i64 %srl
42+
}
43+
44+
; Exact attribute does not inhibit reduction
3145
define i64 @srl_exact_metadata(i64 %arg0, ptr %arg1.ptr) {
3246
; CHECK-LABEL: srl_exact_metadata:
3347
; CHECK: ; %bb.0:
@@ -238,6 +252,17 @@ define i64 @srl_or16_sgpr(i64 inreg %arg0, i64 inreg %shift_amt) {
238252
ret i64 %srl
239253
}
240254

255+
define amdgpu_ps i64 @srl_or16_sgpr_return(i64 inreg %arg0, i64 inreg %shift_amt) {
256+
; CHECK-LABEL: srl_or16_sgpr_return:
257+
; CHECK: ; %bb.0:
258+
; CHECK-NEXT: s_or_b32 s2, s2, 16
259+
; CHECK-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
260+
; CHECK-NEXT: ; return to shader part epilog
261+
%or = or i64 %shift_amt, 16
262+
%srl = lshr i64 %arg0, %or
263+
ret i64 %srl
264+
}
265+
241266
define <2 x i64> @srl_v2_or16_sgpr(<2 x i64> inreg %arg0, <2 x i64> inreg %shift_amt) {
242267
; CHECK-LABEL: srl_v2_or16_sgpr:
243268
; CHECK: ; %bb.0:
@@ -386,6 +411,18 @@ define i64 @srl_or32_sgpr(i64 inreg %arg0, i64 inreg %shift_amt) {
386411
ret i64 %srl
387412
}
388413

414+
define amdgpu_ps i64 @srl_or32_sgpr_return(i64 inreg %arg0, i64 inreg %shift_amt) {
415+
; CHECK-LABEL: srl_or32_sgpr_return:
416+
; CHECK: ; %bb.0:
417+
; CHECK-NEXT: s_or_b32 s2, s2, 32
418+
; CHECK-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
419+
; CHECK-NEXT: s_mov_b32 s1, 0
420+
; CHECK-NEXT: ; return to shader part epilog
421+
%or = or i64 %shift_amt, 32
422+
%srl = lshr i64 %arg0, %or
423+
ret i64 %srl
424+
}
425+
389426
define <2 x i64> @srl_v2_or32_sgpr(<2 x i64> inreg %arg0, <2 x i64> inreg %shift_amt) {
390427
; CHECK-LABEL: srl_v2_or32_sgpr:
391428
; CHECK: ; %bb.0:

llvm/test/CodeGen/AMDGPU/srl64_reduce_sgpr_return.ll

Lines changed: 0 additions & 51 deletions
This file was deleted.

0 commit comments

Comments
 (0)