1010#endif
1111
1212// CHECK: @Kernel1() #[[ATTR0:[0-9]+]]
13+ // CHECK: @Kernel2() #[[ATTR1:[0-9]+]]
14+ // CHECK: @{{.*}}Kernel3{{.*}}() #[[ATTR1]]
1315// CHECK: @{{.*}}Kernel4{{.*}}() #[[ATTR0]]
14- // CHECK: @{{.*}}Kernel5{{.*}}() #[[ATTR1:[0-9]+]]
15- // CHECK: @{{.*}}Kernel6{{.*}}() #[[ATTR0]]
16- // CHECK: @{{.*}}Kernel8{{.*}}() #[[ATTR3:[0-9]+]]
17-
18- // CHECK: attributes #[[ATTR0]] = {{{.*}} "nvvm.minctasm"="2" {{.*}}}
19- // CHECK: attributes #[[ATTR1]] = {{{.*}} "nvvm.minctasm"="258" {{.*}}}
20- // CHECK: attributes #[[ATTR3]] = {{{.*}} "nvvm.minctasm"="12" {{.*}}}
21-
22- // CHECK_MAX_BLOCKS: @Kernel1_sm_90() #[[ATTR4:[0-9]+]]
23- // CHECK_MAX_BLOCKS: @{{.*}}Kernel4_sm_90{{.*}} #[[ATTR4]]
24- // CHECK_MAX_BLOCKS: @{{.*}}Kernel5_sm_90{{.*}} #[[ATTR5:[0-9]+]]
25- // CHECK_MAX_BLOCKS: @{{.*}}Kernel8_sm_90{{.*}} #[[ATTR6:[0-9]+]]
26-
27- // CHECK_MAX_BLOCKS: attributes #[[ATTR4]] = {{{.*}} "nvvm.maxclusterrank"="4" "nvvm.minctasm"="2" {{.*}}}
28- // CHECK_MAX_BLOCKS: attributes #[[ATTR5]] = {{{.*}} "nvvm.maxclusterrank"="260" "nvvm.minctasm"="258" {{.*}}}
29- // CHECK_MAX_BLOCKS: attributes #[[ATTR6]] = {{{.*}} "nvvm.maxclusterrank"="14" "nvvm.minctasm"="12" {{.*}}}
16+ // CHECK: @{{.*}}Kernel5{{.*}}() #[[ATTR2:[0-9]+]]
17+ // CHECK: @{{.*}}Kernel6{{.*}}() #[[ATTR3:[0-9]+]]
18+ // CHECK: @{{.*}}Kernel7{{.*}}() #[[ATTR1]]
19+ // CHECK: @{{.*}}Kernel8{{.*}}() #[[ATTR4:[0-9]+]]
20+
21+ // CHECK-DAG: attributes #[[ATTR0]] = {{{.*}} "nvvm.maxntid"="256" "nvvm.minctasm"="2" {{.*}}}
22+ // CHECK-DAG: attributes #[[ATTR1]] = {{{.*}} "nvvm.maxntid"="256" {{.*}}}
23+ // CHECK-DAG: attributes #[[ATTR2]] = {{{.*}} "nvvm.maxntid"="356" "nvvm.minctasm"="258" {{.*}}}
24+ // CHECK-DAG: attributes #[[ATTR3]] = {{{.*}} "nvvm.minctasm"="2" {{.*}}}
25+ // CHECK-DAG: attributes #[[ATTR4]] = {{{.*}} "nvvm.maxntid"="100" "nvvm.minctasm"="12" {{.*}}}
26+
27+ // CHECK_MAX_BLOCKS: @Kernel1_sm_90() #[[ATTR0:[0-9]+]]
28+ // CHECK_MAX_BLOCKS: @{{.*}}Kernel4_sm_90{{.*}} #[[ATTR0]]
29+ // CHECK_MAX_BLOCKS: @{{.*}}Kernel5_sm_90{{.*}} #[[ATTR1:[0-9]+]]
30+ // CHECK_MAX_BLOCKS: @{{.*}}Kernel7_sm_90{{.*}} #[[ATTR2:[0-9]+]]
31+ // CHECK_MAX_BLOCKS: @{{.*}}Kernel8_sm_90{{.*}} #[[ATTR3:[0-9]+]]
32+
33+ // CHECK_MAX_BLOCKS-DAG: attributes #[[ATTR0]] = {{{.*}} "nvvm.maxclusterrank"="4" "nvvm.maxntid"="256" "nvvm.minctasm"="2" {{.*}}}
34+ // CHECK_MAX_BLOCKS-DAG: attributes #[[ATTR1]] = {{{.*}} "nvvm.maxclusterrank"="260" "nvvm.maxntid"="356" "nvvm.minctasm"="258" {{.*}}}
35+ // CHECK_MAX_BLOCKS-DAG: attributes #[[ATTR2]] = {{{.*}} "nvvm.maxntid"="256" {{.*}}}
36+ // CHECK_MAX_BLOCKS-DAG: attributes #[[ATTR3]] = {{{.*}} "nvvm.maxclusterrank"="14" "nvvm.maxntid"="100" "nvvm.minctasm"="12" {{.*}}}
3037
3138// Test both max threads per block and Min cta per sm.
3239extern " C" {
@@ -37,8 +44,6 @@ Kernel1()
3744}
3845}
3946
40- // CHECK: !{{[0-9]+}} = !{ptr @Kernel1, !"maxntidx", i32 256}
41-
4247#ifdef USE_MAX_BLOCKS
4348// Test max threads per block and min/max cta per sm.
4449extern " C" {
@@ -48,8 +53,6 @@ Kernel1_sm_90()
4853{
4954}
5055}
51-
52- // CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @Kernel1_sm_90, !"maxntidx", i32 256}
5356#endif // USE_MAX_BLOCKS
5457
5558// Test only max threads per block. Min cta per sm defaults to 0, and
@@ -62,8 +65,6 @@ Kernel2()
6265}
6366}
6467
65- // CHECK: !{{[0-9]+}} = !{ptr @Kernel2, !"maxntidx", i32 256}
66-
6768template <int max_threads_per_block>
6869__global__ void
6970__launch_bounds__ (max_threads_per_block)
@@ -72,7 +73,6 @@ Kernel3()
7273}
7374
7475template __global__ void Kernel3<MAX_THREADS_PER_BLOCK>();
75- // CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel3{{.*}}, !"maxntidx", i32 256}
7676
7777template <int max_threads_per_block, int min_blocks_per_mp>
7878__global__ void
@@ -82,7 +82,6 @@ Kernel4()
8282}
8383template __global__ void Kernel4<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>();
8484
85- // CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4{{.*}}, !"maxntidx", i32 256}
8685
8786#ifdef USE_MAX_BLOCKS
8887template <int max_threads_per_block, int min_blocks_per_mp, int max_blocks_per_mp>
@@ -93,7 +92,6 @@ Kernel4_sm_90()
9392}
9493template __global__ void Kernel4_sm_90<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP, MAX_BLOCKS_PER_MP>();
9594
96- // CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4_sm_90{{.*}}, !"maxntidx", i32 256}
9795#endif // USE_MAX_BLOCKS
9896
9997const int constint = 100 ;
@@ -106,8 +104,6 @@ Kernel5()
106104}
107105template __global__ void Kernel5<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>();
108106
109- // CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5{{.*}}, !"maxntidx", i32 356}
110-
111107#ifdef USE_MAX_BLOCKS
112108
113109template <int max_threads_per_block, int min_blocks_per_mp, int max_blocks_per_mp>
@@ -120,7 +116,6 @@ Kernel5_sm_90()
120116}
121117template __global__ void Kernel5_sm_90<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP, MAX_BLOCKS_PER_MP>();
122118
123- // CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5_sm_90{{.*}}, !"maxntidx", i32 356}
124119#endif // USE_MAX_BLOCKS
125120
126121// Make sure we don't emit negative launch bounds values.
@@ -129,33 +124,25 @@ __launch_bounds__( -MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP )
129124Kernel6()
130125{
131126}
132- // CHECK-NOT: !{{[0-9]+}} = !{ptr @{{.*}}Kernel6{{.*}}, !"maxntidx",
133127
134128__global__ void
135129__launch_bounds__ ( MAX_THREADS_PER_BLOCK, -MIN_BLOCKS_PER_MP )
136130Kernel7()
137131{
138132}
139- // CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel7{{.*}}, !"maxntidx",
140- // CHECK-NOT: !{{[0-9]+}} = !{ptr @{{.*}}Kernel7{{.*}}, !"minctasm",
141133
142134#ifdef USE_MAX_BLOCKS
143135__global__ void
144136__launch_bounds__ ( MAX_THREADS_PER_BLOCK, -MIN_BLOCKS_PER_MP, -MAX_BLOCKS_PER_MP )
145137Kernel7_sm_90()
146138{
147139}
148- // CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel7_sm_90{{.*}}, !"maxntidx",
149- // CHECK_MAX_BLOCKS-NOT: !{{[0-9]+}} = !{ptr @{{.*}}Kernel7_sm_90{{.*}}, !"minctasm",
150- // CHECK_MAX_BLOCKS-NOT: !{{[0-9]+}} = !{ptr @{{.*}}Kernel7_sm_90{{.*}}, !"maxclusterrank",
151140#endif // USE_MAX_BLOCKS
152141
153142const char constchar = 12 ;
154143__global__ void __launch_bounds__ (constint, constchar) Kernel8() {}
155- // CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8{{.*}}, !"maxntidx", i32 100
156144
157145#ifdef USE_MAX_BLOCKS
158146const char constchar_2 = 14 ;
159147__global__ void __launch_bounds__ (constint, constchar, constchar_2) Kernel8_sm_90() {}
160- // CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8_sm_90{{.*}}, !"maxntidx", i32 100
161148#endif // USE_MAX_BLOCKS
0 commit comments