1+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
12; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=SI %s
23
3-
4- declare i32 @llvm.amdgcn.workitem.id.x () readnone
5-
6- ; SI-LABEL: {{^}}test_i64_vreg:
7- ; SI: v_add_i32
8- ; SI: v_addc_u32
94define amdgpu_kernel void @test_i64_vreg (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %inA , ptr addrspace (1 ) noalias %inB ) {
5+ ; SI-LABEL: test_i64_vreg:
6+ ; SI: ; %bb.0:
7+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
8+ ; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd
9+ ; SI-NEXT: s_mov_b32 s11, 0xf000
10+ ; SI-NEXT: s_mov_b32 s14, 0
11+ ; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0
12+ ; SI-NEXT: v_mov_b32_e32 v1, 0
13+ ; SI-NEXT: s_mov_b32 s15, s11
14+ ; SI-NEXT: s_mov_b64 s[6:7], s[14:15]
15+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
16+ ; SI-NEXT: s_mov_b64 s[12:13], s[2:3]
17+ ; SI-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[12:15], 0 addr64
18+ ; SI-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
19+ ; SI-NEXT: s_mov_b32 s10, -1
20+ ; SI-NEXT: s_mov_b32 s8, s0
21+ ; SI-NEXT: s_mov_b32 s9, s1
22+ ; SI-NEXT: s_waitcnt vmcnt(0)
23+ ; SI-NEXT: v_add_i32_e32 v0, vcc, v2, v0
24+ ; SI-NEXT: v_addc_u32_e32 v1, vcc, v3, v1, vcc
25+ ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0
26+ ; SI-NEXT: s_endpgm
1027 %tid = call i32 @llvm.amdgcn.workitem.id.x () readnone
1128 %a_ptr = getelementptr i64 , ptr addrspace (1 ) %inA , i32 %tid
1229 %b_ptr = getelementptr i64 , ptr addrspace (1 ) %inB , i32 %tid
@@ -18,10 +35,22 @@ define amdgpu_kernel void @test_i64_vreg(ptr addrspace(1) noalias %out, ptr addr
1835}
1936
2037; Check that the SGPR add operand is correctly moved to a VGPR.
21- ; SI-LABEL: {{^}}sgpr_operand:
22- ; SI: s_add_u32
23- ; SI: s_addc_u32
2438define amdgpu_kernel void @sgpr_operand (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %in , ptr addrspace (1 ) noalias %in_bar , i64 %a ) {
39+ ; SI-LABEL: sgpr_operand:
40+ ; SI: ; %bb.0:
41+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
42+ ; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xf
43+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
44+ ; SI-NEXT: s_load_dwordx2 s[6:7], s[2:3], 0x0
45+ ; SI-NEXT: s_mov_b32 s3, 0xf000
46+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
47+ ; SI-NEXT: s_add_u32 s4, s6, s4
48+ ; SI-NEXT: s_addc_u32 s5, s7, s5
49+ ; SI-NEXT: s_mov_b32 s2, -1
50+ ; SI-NEXT: v_mov_b32_e32 v0, s4
51+ ; SI-NEXT: v_mov_b32_e32 v1, s5
52+ ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
53+ ; SI-NEXT: s_endpgm
2554 %foo = load i64 , ptr addrspace (1 ) %in , align 8
2655 %result = add i64 %foo , %a
2756 store i64 %result , ptr addrspace (1 ) %out
@@ -30,35 +59,76 @@ define amdgpu_kernel void @sgpr_operand(ptr addrspace(1) noalias %out, ptr addrs
3059
3160; Swap the arguments. Check that the SGPR -> VGPR copy works with the
3261; SGPR as other operand.
33- ;
34- ; SI-LABEL: {{^}}sgpr_operand_reversed:
35- ; SI: s_add_u32
36- ; SI: s_addc_u32
3762define amdgpu_kernel void @sgpr_operand_reversed (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %in , i64 %a ) {
63+ ; SI-LABEL: sgpr_operand_reversed:
64+ ; SI: ; %bb.0:
65+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
66+ ; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd
67+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
68+ ; SI-NEXT: s_load_dwordx2 s[6:7], s[2:3], 0x0
69+ ; SI-NEXT: s_mov_b32 s3, 0xf000
70+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
71+ ; SI-NEXT: s_add_u32 s4, s4, s6
72+ ; SI-NEXT: s_addc_u32 s5, s5, s7
73+ ; SI-NEXT: s_mov_b32 s2, -1
74+ ; SI-NEXT: v_mov_b32_e32 v0, s4
75+ ; SI-NEXT: v_mov_b32_e32 v1, s5
76+ ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
77+ ; SI-NEXT: s_endpgm
3878 %foo = load i64 , ptr addrspace (1 ) %in , align 8
3979 %result = add i64 %a , %foo
4080 store i64 %result , ptr addrspace (1 ) %out
4181 ret void
4282}
4383
44-
45- ; SI-LABEL: {{^}}test_v2i64_sreg:
46- ; SI: s_add_u32
47- ; SI: s_addc_u32
48- ; SI: s_add_u32
49- ; SI: s_addc_u32
5084define amdgpu_kernel void @test_v2i64_sreg (ptr addrspace (1 ) noalias %out , <2 x i64 > %a , <2 x i64 > %b ) {
85+ ; SI-LABEL: test_v2i64_sreg:
86+ ; SI: ; %bb.0:
87+ ; SI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0xd
88+ ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
89+ ; SI-NEXT: s_mov_b32 s3, 0xf000
90+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
91+ ; SI-NEXT: s_add_u32 s4, s10, s14
92+ ; SI-NEXT: s_addc_u32 s5, s11, s15
93+ ; SI-NEXT: s_add_u32 s6, s8, s12
94+ ; SI-NEXT: s_addc_u32 s7, s9, s13
95+ ; SI-NEXT: s_mov_b32 s2, -1
96+ ; SI-NEXT: v_mov_b32_e32 v2, s4
97+ ; SI-NEXT: v_mov_b32_e32 v3, s5
98+ ; SI-NEXT: v_mov_b32_e32 v0, s6
99+ ; SI-NEXT: v_mov_b32_e32 v1, s7
100+ ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
101+ ; SI-NEXT: s_endpgm
51102 %result = add <2 x i64 > %a , %b
52103 store <2 x i64 > %result , ptr addrspace (1 ) %out
53104 ret void
54105}
55106
56- ; SI-LABEL: {{^}}test_v2i64_vreg:
57- ; SI: v_add_i32
58- ; SI: v_addc_u32
59- ; SI: v_add_i32
60- ; SI: v_addc_u32
61107define amdgpu_kernel void @test_v2i64_vreg (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %inA , ptr addrspace (1 ) noalias %inB ) {
108+ ; SI-LABEL: test_v2i64_vreg:
109+ ; SI: ; %bb.0:
110+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
111+ ; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd
112+ ; SI-NEXT: s_mov_b32 s11, 0xf000
113+ ; SI-NEXT: s_mov_b32 s14, 0
114+ ; SI-NEXT: v_lshlrev_b32_e32 v4, 4, v0
115+ ; SI-NEXT: v_mov_b32_e32 v5, 0
116+ ; SI-NEXT: s_mov_b32 s15, s11
117+ ; SI-NEXT: s_mov_b64 s[6:7], s[14:15]
118+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
119+ ; SI-NEXT: s_mov_b64 s[12:13], s[2:3]
120+ ; SI-NEXT: buffer_load_dwordx4 v[0:3], v[4:5], s[12:15], 0 addr64
121+ ; SI-NEXT: buffer_load_dwordx4 v[4:7], v[4:5], s[4:7], 0 addr64
122+ ; SI-NEXT: s_mov_b32 s10, -1
123+ ; SI-NEXT: s_mov_b32 s8, s0
124+ ; SI-NEXT: s_mov_b32 s9, s1
125+ ; SI-NEXT: s_waitcnt vmcnt(0)
126+ ; SI-NEXT: v_add_i32_e32 v2, vcc, v2, v6
127+ ; SI-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc
128+ ; SI-NEXT: v_add_i32_e32 v0, vcc, v0, v4
129+ ; SI-NEXT: v_addc_u32_e32 v1, vcc, v1, v5, vcc
130+ ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0
131+ ; SI-NEXT: s_endpgm
62132 %tid = call i32 @llvm.amdgcn.workitem.id.x () readnone
63133 %a_ptr = getelementptr <2 x i64 >, ptr addrspace (1 ) %inA , i32 %tid
64134 %b_ptr = getelementptr <2 x i64 >, ptr addrspace (1 ) %inB , i32 %tid
@@ -69,14 +139,19 @@ define amdgpu_kernel void @test_v2i64_vreg(ptr addrspace(1) noalias %out, ptr ad
69139 ret void
70140}
71141
72- ; SI-LABEL: {{^}}trunc_i64_add_to_i32:
73- ; SI: s_load_dword s[[SREG0:[0-9]+]]
74- ; SI: s_load_dword s[[SREG1:[0-9]+]]
75- ; SI: s_add_i32 [[SRESULT:s[0-9]+]], s[[SREG1]], s[[SREG0]]
76- ; SI-NOT: addc
77- ; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
78- ; SI: buffer_store_dword [[VRESULT]],
79142define amdgpu_kernel void @trunc_i64_add_to_i32 (ptr addrspace (1 ) %out , i32 , i64 %a , i32 , i64 %b ) {
143+ ; SI-LABEL: trunc_i64_add_to_i32:
144+ ; SI: ; %bb.0:
145+ ; SI-NEXT: s_load_dword s2, s[4:5], 0xd
146+ ; SI-NEXT: s_load_dword s6, s[4:5], 0x11
147+ ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
148+ ; SI-NEXT: s_mov_b32 s3, 0xf000
149+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
150+ ; SI-NEXT: s_add_i32 s4, s6, s2
151+ ; SI-NEXT: s_mov_b32 s2, -1
152+ ; SI-NEXT: v_mov_b32_e32 v0, s4
153+ ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
154+ ; SI-NEXT: s_endpgm
80155 %add = add i64 %b , %a
81156 %trunc = trunc i64 %add to i32
82157 store i32 %trunc , ptr addrspace (1 ) %out , align 8
0 commit comments