Skip to content

Commit 443d027

Browse files
authored
[NFC][NVPTX-Tests] Split tcgen05 cta group tests (#162597)
This change splits cta_group::1 and cta_group::2 into two separate functions because they are not supported inside a single function. ptxas from 13.0 release onwards emits an error message for this case.
1 parent 2997611 commit 443d027

File tree

4 files changed

+584
-188
lines changed

4 files changed

+584
-188
lines changed

llvm/test/CodeGen/NVPTX/tcgen05-alloc.ll

Lines changed: 112 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -12,123 +12,198 @@ declare void @llvm.nvvm.tcgen05.alloc.cg2(ptr %addr, i32 %ncols)
1212
declare void @llvm.nvvm.tcgen05.alloc.shared.cg1(ptr addrspace(3) %addr, i32 %ncols)
1313
declare void @llvm.nvvm.tcgen05.alloc.shared.cg2(ptr addrspace(3) %addr, i32 %ncols)
1414

15-
; CHECK-LABEL: test_tcgen05_alloc
16-
define void @test_tcgen05_alloc(ptr %addr, i32 %ncols) {
17-
; CHECK_PTX64-LABEL: test_tcgen05_alloc(
15+
define void @test_tcgen05_alloc_cg1(ptr %addr, i32 %ncols) {
16+
; CHECK_PTX64-LABEL: test_tcgen05_alloc_cg1(
1817
; CHECK_PTX64: {
1918
; CHECK_PTX64-NEXT: .reg .b32 %r<2>;
2019
; CHECK_PTX64-NEXT: .reg .b64 %rd<2>;
2120
; CHECK_PTX64-EMPTY:
2221
; CHECK_PTX64-NEXT: // %bb.0:
23-
; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_param_0];
24-
; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_param_1];
22+
; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_cg1_param_0];
23+
; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_cg1_param_1];
2524
; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::1.sync.aligned.b32 [%rd1], %r1;
26-
; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.b32 [%rd1], %r1;
2725
; CHECK_PTX64-NEXT: ret;
2826
;
29-
; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc(
27+
; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc_cg1(
3028
; CHECK_PTX64_SHARED32: {
3129
; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<2>;
3230
; CHECK_PTX64_SHARED32-NEXT: .reg .b64 %rd<2>;
3331
; CHECK_PTX64_SHARED32-EMPTY:
3432
; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
35-
; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_param_0];
36-
; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_param_1];
33+
; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_cg1_param_0];
34+
; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_cg1_param_1];
3735
; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::1.sync.aligned.b32 [%rd1], %r1;
38-
; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.b32 [%rd1], %r1;
3936
; CHECK_PTX64_SHARED32-NEXT: ret;
4037
call void @llvm.nvvm.tcgen05.alloc.cg1(ptr %addr, i32 %ncols)
41-
call void @llvm.nvvm.tcgen05.alloc.cg2(ptr %addr, i32 %ncols)
38+
ret void
39+
}
4240

41+
define void @test_tcgen05_alloc_cg2(ptr %addr, i32 %ncols) {
42+
; CHECK_PTX64-LABEL: test_tcgen05_alloc_cg2(
43+
; CHECK_PTX64: {
44+
; CHECK_PTX64-NEXT: .reg .b32 %r<2>;
45+
; CHECK_PTX64-NEXT: .reg .b64 %rd<2>;
46+
; CHECK_PTX64-EMPTY:
47+
; CHECK_PTX64-NEXT: // %bb.0:
48+
; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_cg2_param_0];
49+
; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_cg2_param_1];
50+
; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.b32 [%rd1], %r1;
51+
; CHECK_PTX64-NEXT: ret;
52+
;
53+
; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc_cg2(
54+
; CHECK_PTX64_SHARED32: {
55+
; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<2>;
56+
; CHECK_PTX64_SHARED32-NEXT: .reg .b64 %rd<2>;
57+
; CHECK_PTX64_SHARED32-EMPTY:
58+
; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
59+
; CHECK_PTX64_SHARED32-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_cg2_param_0];
60+
; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_cg2_param_1];
61+
; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.b32 [%rd1], %r1;
62+
; CHECK_PTX64_SHARED32-NEXT: ret;
63+
call void @llvm.nvvm.tcgen05.alloc.cg2(ptr %addr, i32 %ncols)
4364
ret void
4465
}
4566

46-
; CHECK-LABEL: test_tcgen05_alloc_shared
47-
define void @test_tcgen05_alloc_shared(ptr addrspace(3) %addr, i32 %ncols) {
48-
; CHECK_PTX64-LABEL: test_tcgen05_alloc_shared(
67+
define void @test_tcgen05_alloc_shared_cg1(ptr addrspace(3) %addr, i32 %ncols) {
68+
; CHECK_PTX64-LABEL: test_tcgen05_alloc_shared_cg1(
4969
; CHECK_PTX64: {
5070
; CHECK_PTX64-NEXT: .reg .b32 %r<2>;
5171
; CHECK_PTX64-NEXT: .reg .b64 %rd<2>;
5272
; CHECK_PTX64-EMPTY:
5373
; CHECK_PTX64-NEXT: // %bb.0:
54-
; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_shared_param_0];
55-
; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_param_1];
74+
; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_shared_cg1_param_0];
75+
; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_cg1_param_1];
5676
; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::1.sync.aligned.shared::cta.b32 [%rd1], %r1;
57-
; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.shared::cta.b32 [%rd1], %r1;
5877
; CHECK_PTX64-NEXT: ret;
5978
;
60-
; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc_shared(
79+
; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc_shared_cg1(
6180
; CHECK_PTX64_SHARED32: {
6281
; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<3>;
6382
; CHECK_PTX64_SHARED32-EMPTY:
6483
; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
65-
; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_param_0];
66-
; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_alloc_shared_param_1];
84+
; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_cg1_param_0];
85+
; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_alloc_shared_cg1_param_1];
6786
; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::1.sync.aligned.shared::cta.b32 [%r1], %r2;
68-
; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.shared::cta.b32 [%r1], %r2;
6987
; CHECK_PTX64_SHARED32-NEXT: ret;
7088
call void @llvm.nvvm.tcgen05.alloc.shared.cg1(ptr addrspace(3) %addr, i32 %ncols)
89+
ret void
90+
}
7191

92+
define void @test_tcgen05_alloc_shared_cg2(ptr addrspace(3) %addr, i32 %ncols) {
93+
; CHECK_PTX64-LABEL: test_tcgen05_alloc_shared_cg2(
94+
; CHECK_PTX64: {
95+
; CHECK_PTX64-NEXT: .reg .b32 %r<2>;
96+
; CHECK_PTX64-NEXT: .reg .b64 %rd<2>;
97+
; CHECK_PTX64-EMPTY:
98+
; CHECK_PTX64-NEXT: // %bb.0:
99+
; CHECK_PTX64-NEXT: ld.param.b64 %rd1, [test_tcgen05_alloc_shared_cg2_param_0];
100+
; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_cg2_param_1];
101+
; CHECK_PTX64-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.shared::cta.b32 [%rd1], %r1;
102+
; CHECK_PTX64-NEXT: ret;
103+
;
104+
; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_alloc_shared_cg2(
105+
; CHECK_PTX64_SHARED32: {
106+
; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<3>;
107+
; CHECK_PTX64_SHARED32-EMPTY:
108+
; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
109+
; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_alloc_shared_cg2_param_0];
110+
; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_alloc_shared_cg2_param_1];
111+
; CHECK_PTX64_SHARED32-NEXT: tcgen05.alloc.cta_group::2.sync.aligned.shared::cta.b32 [%r1], %r2;
112+
; CHECK_PTX64_SHARED32-NEXT: ret;
72113
call void @llvm.nvvm.tcgen05.alloc.shared.cg2(ptr addrspace(3) %addr, i32 %ncols)
73114
ret void
74115
}
75116

76117
declare void @llvm.nvvm.tcgen05.dealloc.cg1(ptr addrspace(6) %tmem_addr, i32 %ncols)
77118
declare void @llvm.nvvm.tcgen05.dealloc.cg2(ptr addrspace(6) %tmem_addr, i32 %ncols)
78119

79-
; CHECK-LABEL: test_tcgen05_dealloc
80-
define void @test_tcgen05_dealloc(ptr addrspace(6) %tmem_addr, i32 %ncols) {
81-
; CHECK_PTX64-LABEL: test_tcgen05_dealloc(
120+
define void @test_tcgen05_dealloc_cg1(ptr addrspace(6) %tmem_addr, i32 %ncols) {
121+
; CHECK_PTX64-LABEL: test_tcgen05_dealloc_cg1(
82122
; CHECK_PTX64: {
83123
; CHECK_PTX64-NEXT: .reg .b32 %r<3>;
84124
; CHECK_PTX64-EMPTY:
85125
; CHECK_PTX64-NEXT: // %bb.0:
86-
; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_param_0];
87-
; CHECK_PTX64-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_param_1];
126+
; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_cg1_param_0];
127+
; CHECK_PTX64-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_cg1_param_1];
88128
; CHECK_PTX64-NEXT: tcgen05.dealloc.cta_group::1.sync.aligned.b32 %r1, %r2;
89-
; CHECK_PTX64-NEXT: tcgen05.dealloc.cta_group::2.sync.aligned.b32 %r1, %r2;
90129
; CHECK_PTX64-NEXT: ret;
91130
;
92-
; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_dealloc(
131+
; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_dealloc_cg1(
93132
; CHECK_PTX64_SHARED32: {
94133
; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<3>;
95134
; CHECK_PTX64_SHARED32-EMPTY:
96135
; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
97-
; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_param_0];
98-
; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_param_1];
136+
; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_cg1_param_0];
137+
; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_cg1_param_1];
99138
; CHECK_PTX64_SHARED32-NEXT: tcgen05.dealloc.cta_group::1.sync.aligned.b32 %r1, %r2;
100-
; CHECK_PTX64_SHARED32-NEXT: tcgen05.dealloc.cta_group::2.sync.aligned.b32 %r1, %r2;
101139
; CHECK_PTX64_SHARED32-NEXT: ret;
102140
call void @llvm.nvvm.tcgen05.dealloc.cg1(ptr addrspace(6) %tmem_addr, i32 %ncols)
141+
ret void
142+
}
103143

144+
define void @test_tcgen05_dealloc_cg2(ptr addrspace(6) %tmem_addr, i32 %ncols) {
145+
; CHECK_PTX64-LABEL: test_tcgen05_dealloc_cg2(
146+
; CHECK_PTX64: {
147+
; CHECK_PTX64-NEXT: .reg .b32 %r<3>;
148+
; CHECK_PTX64-EMPTY:
149+
; CHECK_PTX64-NEXT: // %bb.0:
150+
; CHECK_PTX64-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_cg2_param_0];
151+
; CHECK_PTX64-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_cg2_param_1];
152+
; CHECK_PTX64-NEXT: tcgen05.dealloc.cta_group::2.sync.aligned.b32 %r1, %r2;
153+
; CHECK_PTX64-NEXT: ret;
154+
;
155+
; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_dealloc_cg2(
156+
; CHECK_PTX64_SHARED32: {
157+
; CHECK_PTX64_SHARED32-NEXT: .reg .b32 %r<3>;
158+
; CHECK_PTX64_SHARED32-EMPTY:
159+
; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
160+
; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r1, [test_tcgen05_dealloc_cg2_param_0];
161+
; CHECK_PTX64_SHARED32-NEXT: ld.param.b32 %r2, [test_tcgen05_dealloc_cg2_param_1];
162+
; CHECK_PTX64_SHARED32-NEXT: tcgen05.dealloc.cta_group::2.sync.aligned.b32 %r1, %r2;
163+
; CHECK_PTX64_SHARED32-NEXT: ret;
104164
call void @llvm.nvvm.tcgen05.dealloc.cg2(ptr addrspace(6) %tmem_addr, i32 %ncols)
105165
ret void
106166
}
107167

108168
declare void @llvm.nvvm.tcgen05.relinq.alloc.permit.cg1()
109169
declare void @llvm.nvvm.tcgen05.relinq.alloc.permit.cg2()
110170

111-
; CHECK-LABEL: test_tcgen05_relinquish_alloc_permit
112-
define void @test_tcgen05_relinquish_alloc_permit() {
113-
; CHECK_PTX64-LABEL: test_tcgen05_relinquish_alloc_permit(
171+
define void @test_tcgen05_relinquish_alloc_permit_cg1() {
172+
; CHECK_PTX64-LABEL: test_tcgen05_relinquish_alloc_permit_cg1(
114173
; CHECK_PTX64: {
115174
; CHECK_PTX64-EMPTY:
116175
; CHECK_PTX64-EMPTY:
117176
; CHECK_PTX64-NEXT: // %bb.0:
118177
; CHECK_PTX64-NEXT: tcgen05.relinquish_alloc_permit.cta_group::1.sync.aligned;
119-
; CHECK_PTX64-NEXT: tcgen05.relinquish_alloc_permit.cta_group::2.sync.aligned;
120178
; CHECK_PTX64-NEXT: ret;
121179
;
122-
; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_relinquish_alloc_permit(
180+
; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_relinquish_alloc_permit_cg1(
123181
; CHECK_PTX64_SHARED32: {
124182
; CHECK_PTX64_SHARED32-EMPTY:
125183
; CHECK_PTX64_SHARED32-EMPTY:
126184
; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
127185
; CHECK_PTX64_SHARED32-NEXT: tcgen05.relinquish_alloc_permit.cta_group::1.sync.aligned;
128-
; CHECK_PTX64_SHARED32-NEXT: tcgen05.relinquish_alloc_permit.cta_group::2.sync.aligned;
129186
; CHECK_PTX64_SHARED32-NEXT: ret;
130187
call void @llvm.nvvm.tcgen05.relinq.alloc.permit.cg1()
188+
ret void
189+
}
131190

191+
define void @test_tcgen05_relinquish_alloc_permit_cg2() {
192+
; CHECK_PTX64-LABEL: test_tcgen05_relinquish_alloc_permit_cg2(
193+
; CHECK_PTX64: {
194+
; CHECK_PTX64-EMPTY:
195+
; CHECK_PTX64-EMPTY:
196+
; CHECK_PTX64-NEXT: // %bb.0:
197+
; CHECK_PTX64-NEXT: tcgen05.relinquish_alloc_permit.cta_group::2.sync.aligned;
198+
; CHECK_PTX64-NEXT: ret;
199+
;
200+
; CHECK_PTX64_SHARED32-LABEL: test_tcgen05_relinquish_alloc_permit_cg2(
201+
; CHECK_PTX64_SHARED32: {
202+
; CHECK_PTX64_SHARED32-EMPTY:
203+
; CHECK_PTX64_SHARED32-EMPTY:
204+
; CHECK_PTX64_SHARED32-NEXT: // %bb.0:
205+
; CHECK_PTX64_SHARED32-NEXT: tcgen05.relinquish_alloc_permit.cta_group::2.sync.aligned;
206+
; CHECK_PTX64_SHARED32-NEXT: ret;
132207
call void @llvm.nvvm.tcgen05.relinq.alloc.permit.cg2()
133208
ret void
134209
}

0 commit comments

Comments
 (0)