1
+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
1
2
; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=SI %s
2
3
3
-
4
- declare i32 @llvm.amdgcn.workitem.id.x () readnone
5
-
6
- ; SI-LABEL: {{^}}test_i64_vreg:
7
- ; SI: v_add_i32
8
- ; SI: v_addc_u32
9
4
define amdgpu_kernel void @test_i64_vreg (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %inA , ptr addrspace (1 ) noalias %inB ) {
5
+ ; SI-LABEL: test_i64_vreg:
6
+ ; SI: ; %bb.0:
7
+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
8
+ ; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd
9
+ ; SI-NEXT: s_mov_b32 s11, 0xf000
10
+ ; SI-NEXT: s_mov_b32 s14, 0
11
+ ; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0
12
+ ; SI-NEXT: v_mov_b32_e32 v1, 0
13
+ ; SI-NEXT: s_mov_b32 s15, s11
14
+ ; SI-NEXT: s_mov_b64 s[6:7], s[14:15]
15
+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
16
+ ; SI-NEXT: s_mov_b64 s[12:13], s[2:3]
17
+ ; SI-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[12:15], 0 addr64
18
+ ; SI-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64
19
+ ; SI-NEXT: s_mov_b32 s10, -1
20
+ ; SI-NEXT: s_mov_b32 s8, s0
21
+ ; SI-NEXT: s_mov_b32 s9, s1
22
+ ; SI-NEXT: s_waitcnt vmcnt(0)
23
+ ; SI-NEXT: v_add_i32_e32 v0, vcc, v2, v0
24
+ ; SI-NEXT: v_addc_u32_e32 v1, vcc, v3, v1, vcc
25
+ ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0
26
+ ; SI-NEXT: s_endpgm
10
27
%tid = call i32 @llvm.amdgcn.workitem.id.x () readnone
11
28
%a_ptr = getelementptr i64 , ptr addrspace (1 ) %inA , i32 %tid
12
29
%b_ptr = getelementptr i64 , ptr addrspace (1 ) %inB , i32 %tid
@@ -18,10 +35,22 @@ define amdgpu_kernel void @test_i64_vreg(ptr addrspace(1) noalias %out, ptr addr
18
35
}
19
36
20
37
; Check that the SGPR add operand is correctly moved to a VGPR.
21
- ; SI-LABEL: {{^}}sgpr_operand:
22
- ; SI: s_add_u32
23
- ; SI: s_addc_u32
24
38
define amdgpu_kernel void @sgpr_operand (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %in , ptr addrspace (1 ) noalias %in_bar , i64 %a ) {
39
+ ; SI-LABEL: sgpr_operand:
40
+ ; SI: ; %bb.0:
41
+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
42
+ ; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xf
43
+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
44
+ ; SI-NEXT: s_load_dwordx2 s[6:7], s[2:3], 0x0
45
+ ; SI-NEXT: s_mov_b32 s3, 0xf000
46
+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
47
+ ; SI-NEXT: s_add_u32 s4, s6, s4
48
+ ; SI-NEXT: s_addc_u32 s5, s7, s5
49
+ ; SI-NEXT: s_mov_b32 s2, -1
50
+ ; SI-NEXT: v_mov_b32_e32 v0, s4
51
+ ; SI-NEXT: v_mov_b32_e32 v1, s5
52
+ ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
53
+ ; SI-NEXT: s_endpgm
25
54
%foo = load i64 , ptr addrspace (1 ) %in , align 8
26
55
%result = add i64 %foo , %a
27
56
store i64 %result , ptr addrspace (1 ) %out
@@ -30,35 +59,76 @@ define amdgpu_kernel void @sgpr_operand(ptr addrspace(1) noalias %out, ptr addrs
30
59
31
60
; Swap the arguments. Check that the SGPR -> VGPR copy works with the
32
61
; SGPR as other operand.
33
- ;
34
- ; SI-LABEL: {{^}}sgpr_operand_reversed:
35
- ; SI: s_add_u32
36
- ; SI: s_addc_u32
37
62
define amdgpu_kernel void @sgpr_operand_reversed (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %in , i64 %a ) {
63
+ ; SI-LABEL: sgpr_operand_reversed:
64
+ ; SI: ; %bb.0:
65
+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
66
+ ; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd
67
+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
68
+ ; SI-NEXT: s_load_dwordx2 s[6:7], s[2:3], 0x0
69
+ ; SI-NEXT: s_mov_b32 s3, 0xf000
70
+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
71
+ ; SI-NEXT: s_add_u32 s4, s4, s6
72
+ ; SI-NEXT: s_addc_u32 s5, s5, s7
73
+ ; SI-NEXT: s_mov_b32 s2, -1
74
+ ; SI-NEXT: v_mov_b32_e32 v0, s4
75
+ ; SI-NEXT: v_mov_b32_e32 v1, s5
76
+ ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
77
+ ; SI-NEXT: s_endpgm
38
78
%foo = load i64 , ptr addrspace (1 ) %in , align 8
39
79
%result = add i64 %a , %foo
40
80
store i64 %result , ptr addrspace (1 ) %out
41
81
ret void
42
82
}
43
83
44
-
45
- ; SI-LABEL: {{^}}test_v2i64_sreg:
46
- ; SI: s_add_u32
47
- ; SI: s_addc_u32
48
- ; SI: s_add_u32
49
- ; SI: s_addc_u32
50
84
define amdgpu_kernel void @test_v2i64_sreg (ptr addrspace (1 ) noalias %out , <2 x i64 > %a , <2 x i64 > %b ) {
85
+ ; SI-LABEL: test_v2i64_sreg:
86
+ ; SI: ; %bb.0:
87
+ ; SI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0xd
88
+ ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
89
+ ; SI-NEXT: s_mov_b32 s3, 0xf000
90
+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
91
+ ; SI-NEXT: s_add_u32 s4, s10, s14
92
+ ; SI-NEXT: s_addc_u32 s5, s11, s15
93
+ ; SI-NEXT: s_add_u32 s6, s8, s12
94
+ ; SI-NEXT: s_addc_u32 s7, s9, s13
95
+ ; SI-NEXT: s_mov_b32 s2, -1
96
+ ; SI-NEXT: v_mov_b32_e32 v2, s4
97
+ ; SI-NEXT: v_mov_b32_e32 v3, s5
98
+ ; SI-NEXT: v_mov_b32_e32 v0, s6
99
+ ; SI-NEXT: v_mov_b32_e32 v1, s7
100
+ ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
101
+ ; SI-NEXT: s_endpgm
51
102
%result = add <2 x i64 > %a , %b
52
103
store <2 x i64 > %result , ptr addrspace (1 ) %out
53
104
ret void
54
105
}
55
106
56
- ; SI-LABEL: {{^}}test_v2i64_vreg:
57
- ; SI: v_add_i32
58
- ; SI: v_addc_u32
59
- ; SI: v_add_i32
60
- ; SI: v_addc_u32
61
107
define amdgpu_kernel void @test_v2i64_vreg (ptr addrspace (1 ) noalias %out , ptr addrspace (1 ) noalias %inA , ptr addrspace (1 ) noalias %inB ) {
108
+ ; SI-LABEL: test_v2i64_vreg:
109
+ ; SI: ; %bb.0:
110
+ ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
111
+ ; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd
112
+ ; SI-NEXT: s_mov_b32 s11, 0xf000
113
+ ; SI-NEXT: s_mov_b32 s14, 0
114
+ ; SI-NEXT: v_lshlrev_b32_e32 v4, 4, v0
115
+ ; SI-NEXT: v_mov_b32_e32 v5, 0
116
+ ; SI-NEXT: s_mov_b32 s15, s11
117
+ ; SI-NEXT: s_mov_b64 s[6:7], s[14:15]
118
+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
119
+ ; SI-NEXT: s_mov_b64 s[12:13], s[2:3]
120
+ ; SI-NEXT: buffer_load_dwordx4 v[0:3], v[4:5], s[12:15], 0 addr64
121
+ ; SI-NEXT: buffer_load_dwordx4 v[4:7], v[4:5], s[4:7], 0 addr64
122
+ ; SI-NEXT: s_mov_b32 s10, -1
123
+ ; SI-NEXT: s_mov_b32 s8, s0
124
+ ; SI-NEXT: s_mov_b32 s9, s1
125
+ ; SI-NEXT: s_waitcnt vmcnt(0)
126
+ ; SI-NEXT: v_add_i32_e32 v2, vcc, v2, v6
127
+ ; SI-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc
128
+ ; SI-NEXT: v_add_i32_e32 v0, vcc, v0, v4
129
+ ; SI-NEXT: v_addc_u32_e32 v1, vcc, v1, v5, vcc
130
+ ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0
131
+ ; SI-NEXT: s_endpgm
62
132
%tid = call i32 @llvm.amdgcn.workitem.id.x () readnone
63
133
%a_ptr = getelementptr <2 x i64 >, ptr addrspace (1 ) %inA , i32 %tid
64
134
%b_ptr = getelementptr <2 x i64 >, ptr addrspace (1 ) %inB , i32 %tid
@@ -69,14 +139,19 @@ define amdgpu_kernel void @test_v2i64_vreg(ptr addrspace(1) noalias %out, ptr ad
69
139
ret void
70
140
}
71
141
72
- ; SI-LABEL: {{^}}trunc_i64_add_to_i32:
73
- ; SI: s_load_dword s[[SREG0:[0-9]+]]
74
- ; SI: s_load_dword s[[SREG1:[0-9]+]]
75
- ; SI: s_add_i32 [[SRESULT:s[0-9]+]], s[[SREG1]], s[[SREG0]]
76
- ; SI-NOT: addc
77
- ; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
78
- ; SI: buffer_store_dword [[VRESULT]],
79
142
define amdgpu_kernel void @trunc_i64_add_to_i32 (ptr addrspace (1 ) %out , i32 , i64 %a , i32 , i64 %b ) {
143
+ ; SI-LABEL: trunc_i64_add_to_i32:
144
+ ; SI: ; %bb.0:
145
+ ; SI-NEXT: s_load_dword s2, s[4:5], 0xd
146
+ ; SI-NEXT: s_load_dword s6, s[4:5], 0x11
147
+ ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
148
+ ; SI-NEXT: s_mov_b32 s3, 0xf000
149
+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
150
+ ; SI-NEXT: s_add_i32 s4, s6, s2
151
+ ; SI-NEXT: s_mov_b32 s2, -1
152
+ ; SI-NEXT: v_mov_b32_e32 v0, s4
153
+ ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
154
+ ; SI-NEXT: s_endpgm
80
155
%add = add i64 %b , %a
81
156
%trunc = trunc i64 %add to i32
82
157
store i32 %trunc , ptr addrspace (1 ) %out , align 8
0 commit comments