Skip to content

Commit 7610a5d

Browse files
committed
Test moving to VALU
Signed-off-by: John Lu <[email protected]>
1 parent de2f09c commit 7610a5d

File tree

1 file changed

+90
-0
lines changed

1 file changed

+90
-0
lines changed
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -stop-after=si-fix-sgpr-copies < %s | FileCheck %s
3+
4+
define amdgpu_kernel void @lshl1_add(ptr addrspace(5) %alloca) {
5+
; CHECK-LABEL: name: lshl1_add
6+
; CHECK: bb.0 (%ir-block.0):
7+
; CHECK-NEXT: liveins: $sgpr4_sgpr5
8+
; CHECK-NEXT: {{ $}}
9+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5
10+
; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s32) from %ir.alloca.kernarg.offset, addrspace 4)
11+
; CHECK-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec
12+
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD killed [[V_MOV_B]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s32) from `ptr addrspace(1) null`, addrspace 1)
13+
; CHECK-NEXT: [[V_LSHL_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_LSHL_ADD_U32_e64 [[GLOBAL_LOAD_DWORD]], 1, killed [[S_LOAD_DWORD_IMM]], implicit $exec
14+
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
15+
; CHECK-NEXT: SCRATCH_STORE_SHORT killed [[V_MOV_B32_e32_]], killed [[V_LSHL_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %ir.gep, addrspace 5)
16+
; CHECK-NEXT: S_ENDPGM 0
17+
%vaddr = load volatile i32, ptr addrspace(1) null, align 4
18+
%1 = sext i32 %vaddr to i64
19+
%gep = getelementptr i16, ptr addrspace(5) %alloca, i64 %1
20+
store i16 0, ptr addrspace(5) %gep, align 2
21+
ret void
22+
}
23+
24+
define amdgpu_kernel void @lshl2_add(ptr addrspace(5) %alloca) {
25+
; CHECK-LABEL: name: lshl2_add
26+
; CHECK: bb.0 (%ir-block.0):
27+
; CHECK-NEXT: liveins: $sgpr4_sgpr5
28+
; CHECK-NEXT: {{ $}}
29+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5
30+
; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s32) from %ir.alloca.kernarg.offset, addrspace 4)
31+
; CHECK-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec
32+
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD killed [[V_MOV_B]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s32) from `ptr addrspace(1) null`, addrspace 1)
33+
; CHECK-NEXT: [[V_LSHL_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_LSHL_ADD_U32_e64 [[GLOBAL_LOAD_DWORD]], 2, killed [[S_LOAD_DWORD_IMM]], implicit $exec
34+
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
35+
; CHECK-NEXT: SCRATCH_STORE_DWORD killed [[V_MOV_B32_e32_]], killed [[V_LSHL_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.gep, addrspace 5)
36+
; CHECK-NEXT: S_ENDPGM 0
37+
%vaddr = load volatile i32, ptr addrspace(1) null, align 4
38+
%1 = sext i32 %vaddr to i64
39+
%gep = getelementptr i32, ptr addrspace(5) %alloca, i64 %1
40+
store i32 0, ptr addrspace(5) %gep, align 4
41+
ret void
42+
}
43+
44+
define amdgpu_kernel void @lshl3_add(ptr addrspace(5) %alloca) {
45+
; CHECK-LABEL: name: lshl3_add
46+
; CHECK: bb.0 (%ir-block.0):
47+
; CHECK-NEXT: liveins: $sgpr4_sgpr5
48+
; CHECK-NEXT: {{ $}}
49+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5
50+
; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s32) from %ir.alloca.kernarg.offset, addrspace 4)
51+
; CHECK-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec
52+
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD killed [[V_MOV_B]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s32) from `ptr addrspace(1) null`, addrspace 1)
53+
; CHECK-NEXT: [[V_LSHL_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_LSHL_ADD_U32_e64 [[GLOBAL_LOAD_DWORD]], 3, killed [[S_LOAD_DWORD_IMM]], implicit $exec
54+
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
55+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
56+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
57+
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
58+
; CHECK-NEXT: SCRATCH_STORE_DWORDX2 killed [[REG_SEQUENCE]], killed [[V_LSHL_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %ir.gep, addrspace 5)
59+
; CHECK-NEXT: S_ENDPGM 0
60+
%vaddr = load volatile i32, ptr addrspace(1) null, align 4
61+
%1 = sext i32 %vaddr to i64
62+
%gep = getelementptr i64, ptr addrspace(5) %alloca, i64 %1
63+
store i64 0, ptr addrspace(5) %gep, align 8
64+
ret void
65+
}
66+
67+
define amdgpu_kernel void @lshl4_add(ptr addrspace(5) %alloca) {
68+
; CHECK-LABEL: name: lshl4_add
69+
; CHECK: bb.0 (%ir-block.0):
70+
; CHECK-NEXT: liveins: $sgpr4_sgpr5
71+
; CHECK-NEXT: {{ $}}
72+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5
73+
; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s32) from %ir.alloca.kernarg.offset, addrspace 4)
74+
; CHECK-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec
75+
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD killed [[V_MOV_B]], 0, 0, implicit $exec :: (volatile "amdgpu-noclobber" load (s32) from `ptr addrspace(1) null`, addrspace 1)
76+
; CHECK-NEXT: [[V_LSHL_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_LSHL_ADD_U32_e64 [[GLOBAL_LOAD_DWORD]], 4, killed [[S_LOAD_DWORD_IMM]], implicit $exec
77+
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
78+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
79+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
80+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
81+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
82+
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
83+
; CHECK-NEXT: SCRATCH_STORE_DWORDX4 killed [[REG_SEQUENCE]], killed [[V_LSHL_ADD_U32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %ir.gep, addrspace 5)
84+
; CHECK-NEXT: S_ENDPGM 0
85+
%vaddr = load volatile i32, ptr addrspace(1) null, align 4
86+
%1 = sext i32 %vaddr to i64
87+
%gep = getelementptr i128, ptr addrspace(5) %alloca, i64 %1
88+
store i128 0, ptr addrspace(5) %gep, align 16
89+
ret void
90+
}

0 commit comments

Comments
 (0)