@@ -12,123 +12,198 @@ declare void @llvm.nvvm.tcgen05.alloc.cg2(ptr %addr, i32 %ncols)
12
12
declare void @llvm.nvvm.tcgen05.alloc.shared.cg1 (ptr addrspace (3 ) %addr , i32 %ncols )
13
13
declare void @llvm.nvvm.tcgen05.alloc.shared.cg2 (ptr addrspace (3 ) %addr , i32 %ncols )
14
14
15
- ; CHECK-LABEL: test_tcgen05_alloc
16
- define void @test_tcgen05_alloc (ptr %addr , i32 %ncols ) {
17
- ; CHECK_PTX64-LABEL: test_tcgen05_alloc(
15
+ define void @test_tcgen05_alloc_cg1 (ptr %addr , i32 %ncols ) {
16
+ ; CHECK_PTX64-LABEL: test_tcgen05_alloc_cg1(
18
17
; CHECK_PTX64: {
19
18
; CHECK_PTX64-NEXT: .reg .b32 %r<2>;
20
19
; CHECK_PTX64-NEXT: .reg .b64 %rd<2>;
21
20
; CHECK_PTX64-EMPTY:
22
21
; CHECK_PTX64-NEXT: // %bb.0:
23
- ; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_param_0 ];
24
- ; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_param_1 ];
22
+ ; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_cg1_param_0 ];
23
+ ; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_cg1_param_1 ];
25
24
; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::1.sync.aligned.b32 [%rd1], %r1;
26
- ; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.b32 [%rd1], %r1;
27
25
; CHECK_PTX64-NEXT: ret;
28
26
;
29
- ; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc (
27
+ ; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc_cg1 (
30
28
; CHECK_PTX64_SHARED32: {
31
29
; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<2>;
32
30
; CHECK_PTX64_SHARED32-NEXT: .reg .b64 %rd<2>;
33
31
; CHECK_PTX64_SHARED32-EMPTY:
34
32
; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
35
- ; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_param_0 ];
36
- ; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_param_1 ];
33
+ ; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_cg1_param_0 ];
34
+ ; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_cg1_param_1 ];
37
35
; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::1.sync.aligned.b32 [%rd1], %r1;
38
- ; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.b32 [%rd1], %r1;
39
36
; CHECK_PTX64_SHARED32-NEXT: ret;
40
37
call void @llvm.nvvm.tcgen05.alloc.cg1 (ptr %addr , i32 %ncols )
41
- call void @llvm.nvvm.tcgen05.alloc.cg2 (ptr %addr , i32 %ncols )
38
+ ret void
39
+ }
42
40
41
+ define void @test_tcgen05_alloc_cg2 (ptr %addr , i32 %ncols ) {
42
+ ; CHECK_PTX64-LABEL: test_tcgen05_alloc_cg2(
43
+ ; CHECK_PTX64: {
44
+ ; CHECK_PTX64-NEXT: .reg .b32 %r<2>;
45
+ ; CHECK_PTX64-NEXT: .reg .b64 %rd<2>;
46
+ ; CHECK_PTX64-EMPTY:
47
+ ; CHECK_PTX64-NEXT: // %bb.0:
48
+ ; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_cg2_param_0];
49
+ ; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_cg2_param_1];
50
+ ; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.b32 [%rd1], %r1;
51
+ ; CHECK_PTX64-NEXT: ret;
52
+ ;
53
+ ; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc_cg2(
54
+ ; CHECK_PTX64_SHARED32: {
55
+ ; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<2>;
56
+ ; CHECK_PTX64_SHARED32-NEXT: .reg .b64 %rd<2>;
57
+ ; CHECK_PTX64_SHARED32-EMPTY:
58
+ ; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
59
+ ; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_cg2_param_0];
60
+ ; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_cg2_param_1];
61
+ ; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.b32 [%rd1], %r1;
62
+ ; CHECK_PTX64_SHARED32-NEXT: ret;
63
+ call void @llvm.nvvm.tcgen05.alloc.cg2 (ptr %addr , i32 %ncols )
43
64
ret void
44
65
}
45
66
46
- ; CHECK-LABEL: test_tcgen05_alloc_shared
47
- define void @test_tcgen05_alloc_shared (ptr addrspace (3 ) %addr , i32 %ncols ) {
48
- ; CHECK_PTX64-LABEL: test_tcgen05_alloc_shared(
67
+ define void @test_tcgen05_alloc_shared_cg1 (ptr addrspace (3 ) %addr , i32 %ncols ) {
68
+ ; CHECK_PTX64-LABEL: test_tcgen05_alloc_shared_cg1(
49
69
; CHECK_PTX64: {
50
70
; CHECK_PTX64-NEXT: .reg .b32 %r<2>;
51
71
; CHECK_PTX64-NEXT: .reg .b64 %rd<2>;
52
72
; CHECK_PTX64-EMPTY:
53
73
; CHECK_PTX64-NEXT: // %bb.0:
54
- ; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_shared_param_0 ];
55
- ; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_param_1 ];
74
+ ; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_shared_cg1_param_0 ];
75
+ ; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_cg1_param_1 ];
56
76
; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::1.sync.aligned.shared::cta.b32 [%rd1], %r1;
57
- ; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.shared::cta.b32 [%rd1], %r1;
58
77
; CHECK_PTX64-NEXT: ret;
59
78
;
60
- ; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc_shared (
79
+ ; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc_shared_cg1 (
61
80
; CHECK_PTX64_SHARED32: {
62
81
; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<3>;
63
82
; CHECK_PTX64_SHARED32-EMPTY:
64
83
; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
65
- ; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_param_0 ];
66
- ; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_alloc_shared_param_1 ];
84
+ ; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_cg1_param_0 ];
85
+ ; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_alloc_shared_cg1_param_1 ];
67
86
; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::1.sync.aligned.shared::cta.b32 [%r1], %r2;
68
- ; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.shared::cta.b32 [%r1], %r2;
69
87
; CHECK_PTX64_SHARED32-NEXT: ret;
70
88
call void @llvm.nvvm.tcgen05.alloc.shared.cg1 (ptr addrspace (3 ) %addr , i32 %ncols )
89
+ ret void
90
+ }
71
91
92
+ define void @test_tcgen05_alloc_shared_cg2 (ptr addrspace (3 ) %addr , i32 %ncols ) {
93
+ ; CHECK_PTX64-LABEL: test_tcgen05_alloc_shared_cg2(
94
+ ; CHECK_PTX64: {
95
+ ; CHECK_PTX64-NEXT: .reg .b32 %r<2>;
96
+ ; CHECK_PTX64-NEXT: .reg .b64 %rd<2>;
97
+ ; CHECK_PTX64-EMPTY:
98
+ ; CHECK_PTX64-NEXT: // %bb.0:
99
+ ; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_shared_cg2_param_0];
100
+ ; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_cg2_param_1];
101
+ ; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.shared::cta.b32 [%rd1], %r1;
102
+ ; CHECK_PTX64-NEXT: ret;
103
+ ;
104
+ ; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc_shared_cg2(
105
+ ; CHECK_PTX64_SHARED32: {
106
+ ; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<3>;
107
+ ; CHECK_PTX64_SHARED32-EMPTY:
108
+ ; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
109
+ ; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_cg2_param_0];
110
+ ; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_alloc_shared_cg2_param_1];
111
+ ; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.shared::cta.b32 [%r1], %r2;
112
+ ; CHECK_PTX64_SHARED32-NEXT: ret;
72
113
call void @llvm.nvvm.tcgen05.alloc.shared.cg2 (ptr addrspace (3 ) %addr , i32 %ncols )
73
114
ret void
74
115
}
75
116
76
117
declare void @llvm.nvvm.tcgen05.dealloc.cg1 (ptr addrspace (6 ) %tmem_addr , i32 %ncols )
77
118
declare void @llvm.nvvm.tcgen05.dealloc.cg2 (ptr addrspace (6 ) %tmem_addr , i32 %ncols )
78
119
79
- ; CHECK-LABEL: test_tcgen05_dealloc
80
- define void @test_tcgen05_dealloc (ptr addrspace (6 ) %tmem_addr , i32 %ncols ) {
81
- ; CHECK_PTX64-LABEL: test_tcgen05_dealloc(
120
+ define void @test_tcgen05_dealloc_cg1 (ptr addrspace (6 ) %tmem_addr , i32 %ncols ) {
121
+ ; CHECK_PTX64-LABEL: test_tcgen05_dealloc_cg1(
82
122
; CHECK_PTX64: {
83
123
; CHECK_PTX64-NEXT: .reg .b32 %r<3>;
84
124
; CHECK_PTX64-EMPTY:
85
125
; CHECK_PTX64-NEXT: // %bb.0:
86
- ; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_param_0 ];
87
- ; CHECK_PTX64-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_param_1 ];
126
+ ; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_cg1_param_0 ];
127
+ ; CHECK_PTX64-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_cg1_param_1 ];
88
128
; CHECK_PTX64-NEXT: tcgen05.dealloc.cta_group::1.sync.aligned.b32 %r1, %r2;
89
- ; CHECK_PTX64-NEXT: tcgen05.dealloc.cta_group::2.sync.aligned.b32 %r1, %r2;
90
129
; CHECK_PTX64-NEXT: ret;
91
130
;
92
- ; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_dealloc (
131
+ ; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_dealloc_cg1 (
93
132
; CHECK_PTX64_SHARED32: {
94
133
; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<3>;
95
134
; CHECK_PTX64_SHARED32-EMPTY:
96
135
; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
97
- ; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_param_0 ];
98
- ; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_param_1 ];
136
+ ; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_cg1_param_0 ];
137
+ ; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_cg1_param_1 ];
99
138
; CHECK_PTX64_SHARED32-NEXT: tcgen05.dealloc.cta_group::1.sync.aligned.b32 %r1, %r2;
100
- ; CHECK_PTX64_SHARED32-NEXT: tcgen05.dealloc.cta_group::2.sync.aligned.b32 %r1, %r2;
101
139
; CHECK_PTX64_SHARED32-NEXT: ret;
102
140
call void @llvm.nvvm.tcgen05.dealloc.cg1 (ptr addrspace (6 ) %tmem_addr , i32 %ncols )
141
+ ret void
142
+ }
103
143
144
+ define void @test_tcgen05_dealloc_cg2 (ptr addrspace (6 ) %tmem_addr , i32 %ncols ) {
145
+ ; CHECK_PTX64-LABEL: test_tcgen05_dealloc_cg2(
146
+ ; CHECK_PTX64: {
147
+ ; CHECK_PTX64-NEXT: .reg .b32 %r<3>;
148
+ ; CHECK_PTX64-EMPTY:
149
+ ; CHECK_PTX64-NEXT: // %bb.0:
150
+ ; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_cg2_param_0];
151
+ ; CHECK_PTX64-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_cg2_param_1];
152
+ ; CHECK_PTX64-NEXT: tcgen05.dealloc.cta_group::2.sync.aligned.b32 %r1, %r2;
153
+ ; CHECK_PTX64-NEXT: ret;
154
+ ;
155
+ ; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_dealloc_cg2(
156
+ ; CHECK_PTX64_SHARED32: {
157
+ ; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<3>;
158
+ ; CHECK_PTX64_SHARED32-EMPTY:
159
+ ; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
160
+ ; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_cg2_param_0];
161
+ ; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_cg2_param_1];
162
+ ; CHECK_PTX64_SHARED32-NEXT: tcgen05.dealloc.cta_group::2.sync.aligned.b32 %r1, %r2;
163
+ ; CHECK_PTX64_SHARED32-NEXT: ret;
104
164
call void @llvm.nvvm.tcgen05.dealloc.cg2 (ptr addrspace (6 ) %tmem_addr , i32 %ncols )
105
165
ret void
106
166
}
107
167
108
168
declare void @llvm.nvvm.tcgen05.relinq.alloc.permit.cg1 ()
109
169
declare void @llvm.nvvm.tcgen05.relinq.alloc.permit.cg2 ()
110
170
111
- ; CHECK-LABEL: test_tcgen05_relinquish_alloc_permit
112
- define void @test_tcgen05_relinquish_alloc_permit () {
113
- ; CHECK_PTX64-LABEL: test_tcgen05_relinquish_alloc_permit(
171
+ define void @test_tcgen05_relinquish_alloc_permit_cg1 () {
172
+ ; CHECK_PTX64-LABEL: test_tcgen05_relinquish_alloc_permit_cg1(
114
173
; CHECK_PTX64: {
115
174
; CHECK_PTX64-EMPTY:
116
175
; CHECK_PTX64-EMPTY:
117
176
; CHECK_PTX64-NEXT: // %bb.0:
118
177
; CHECK_PTX64-NEXT: tcgen05.relinquish_alloc_permit.cta_group::1.sync.aligned;
119
- ; CHECK_PTX64-NEXT: tcgen05.relinquish_alloc_permit.cta_group::2.sync.aligned;
120
178
; CHECK_PTX64-NEXT: ret;
121
179
;
122
- ; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_relinquish_alloc_permit (
180
+ ; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_relinquish_alloc_permit_cg1 (
123
181
; CHECK_PTX64_SHARED32: {
124
182
; CHECK_PTX64_SHARED32-EMPTY:
125
183
; CHECK_PTX64_SHARED32-EMPTY:
126
184
; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
127
185
; CHECK_PTX64_SHARED32-NEXT: tcgen05.relinquish_alloc_permit.cta_group::1.sync.aligned;
128
- ; CHECK_PTX64_SHARED32-NEXT: tcgen05.relinquish_alloc_permit.cta_group::2.sync.aligned;
129
186
; CHECK_PTX64_SHARED32-NEXT: ret;
130
187
call void @llvm.nvvm.tcgen05.relinq.alloc.permit.cg1 ()
188
+ ret void
189
+ }
131
190
191
+ define void @test_tcgen05_relinquish_alloc_permit_cg2 () {
192
+ ; CHECK_PTX64-LABEL: test_tcgen05_relinquish_alloc_permit_cg2(
193
+ ; CHECK_PTX64: {
194
+ ; CHECK_PTX64-EMPTY:
195
+ ; CHECK_PTX64-EMPTY:
196
+ ; CHECK_PTX64-NEXT: // %bb.0:
197
+ ; CHECK_PTX64-NEXT: tcgen05.relinquish_alloc_permit.cta_group::2.sync.aligned;
198
+ ; CHECK_PTX64-NEXT: ret;
199
+ ;
200
+ ; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_relinquish_alloc_permit_cg2(
201
+ ; CHECK_PTX64_SHARED32: {
202
+ ; CHECK_PTX64_SHARED32-EMPTY:
203
+ ; CHECK_PTX64_SHARED32-EMPTY:
204
+ ; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
205
+ ; CHECK_PTX64_SHARED32-NEXT: tcgen05.relinquish_alloc_permit.cta_group::2.sync.aligned;
206
+ ; CHECK_PTX64_SHARED32-NEXT: ret;
132
207
call void @llvm.nvvm.tcgen05.relinq.alloc.permit.cg2 ()
133
208
ret void
134
209
}
0 commit comments