77;;
88;; DST = [srl i32 X, (Y & 0x1F), 0]
99
10- ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
10+ ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck %s
1111
1212;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1313; Test range with metadata
@@ -27,7 +27,21 @@ define i64 @srl_metadata(i64 %arg0, ptr %arg1.ptr) {
2727 ret i64 %srl
2828}
2929
30- ; Shifted bits matter for exact shift. Reduction must not be done.
30+ define amdgpu_ps i64 @srl_metadata_sgpr_return (i64 inreg %arg0 , ptr %arg1.ptr ) {
31+ ; CHECK-LABEL: srl_metadata_sgpr_return:
32+ ; CHECK: ; %bb.0:
33+ ; CHECK-NEXT: flat_load_dword v0, v[0:1]
34+ ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
35+ ; CHECK-NEXT: v_lshrrev_b64 v[0:1], v0, s[0:1]
36+ ; CHECK-NEXT: s_mov_b32 s1, 0
37+ ; CHECK-NEXT: v_readfirstlane_b32 s0, v0
38+ ; CHECK-NEXT: ; return to shader part epilog
39+ %shift.amt = load i64 , ptr %arg1.ptr , !range !0 , !noundef !{}
40+ %srl = lshr i64 %arg0 , %shift.amt
41+ ret i64 %srl
42+ }
43+
44+ ; Exact attribute does not inhibit reduction
3145define i64 @srl_exact_metadata (i64 %arg0 , ptr %arg1.ptr ) {
3246; CHECK-LABEL: srl_exact_metadata:
3347; CHECK: ; %bb.0:
@@ -238,6 +252,17 @@ define i64 @srl_or16_sgpr(i64 inreg %arg0, i64 inreg %shift_amt) {
238252 ret i64 %srl
239253}
240254
255+ define amdgpu_ps i64 @srl_or16_sgpr_return (i64 inreg %arg0 , i64 inreg %shift_amt ) {
256+ ; CHECK-LABEL: srl_or16_sgpr_return:
257+ ; CHECK: ; %bb.0:
258+ ; CHECK-NEXT: s_or_b32 s2, s2, 16
259+ ; CHECK-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
260+ ; CHECK-NEXT: ; return to shader part epilog
261+ %or = or i64 %shift_amt , 16
262+ %srl = lshr i64 %arg0 , %or
263+ ret i64 %srl
264+ }
265+
241266define <2 x i64 > @srl_v2_or16_sgpr (<2 x i64 > inreg %arg0 , <2 x i64 > inreg %shift_amt ) {
242267; CHECK-LABEL: srl_v2_or16_sgpr:
243268; CHECK: ; %bb.0:
@@ -386,6 +411,18 @@ define i64 @srl_or32_sgpr(i64 inreg %arg0, i64 inreg %shift_amt) {
386411 ret i64 %srl
387412}
388413
414+ define amdgpu_ps i64 @srl_or32_sgpr_return (i64 inreg %arg0 , i64 inreg %shift_amt ) {
415+ ; CHECK-LABEL: srl_or32_sgpr_return:
416+ ; CHECK: ; %bb.0:
417+ ; CHECK-NEXT: s_or_b32 s2, s2, 32
418+ ; CHECK-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
419+ ; CHECK-NEXT: s_mov_b32 s1, 0
420+ ; CHECK-NEXT: ; return to shader part epilog
421+ %or = or i64 %shift_amt , 32
422+ %srl = lshr i64 %arg0 , %or
423+ ret i64 %srl
424+ }
425+
389426define <2 x i64 > @srl_v2_or32_sgpr (<2 x i64 > inreg %arg0 , <2 x i64 > inreg %shift_amt ) {
390427; CHECK-LABEL: srl_v2_or32_sgpr:
391428; CHECK: ; %bb.0:
0 commit comments