99#define MAX_BLOCKS_PER_MP 4
1010#endif
1111
12+ // CHECK: @Kernel1() #[[ATTR0:[0-9]+]]
13+ // CHECK: @{{.*}}Kernel4{{.*}}() #[[ATTR0]]
14+ // CHECK: @{{.*}}Kernel5{{.*}}() #[[ATTR1:[0-9]+]]
15+ // CHECK: @{{.*}}Kernel6{{.*}}() #[[ATTR0]]
16+ // CHECK: @{{.*}}Kernel8{{.*}}() #[[ATTR3:[0-9]+]]
17+
18+ // CHECK: attributes #[[ATTR0]] = {{{.*}} "nvvm.minctasm"="2" {{.*}}}
19+ // CHECK: attributes #[[ATTR1]] = {{{.*}} "nvvm.minctasm"="258" {{.*}}}
20+ // CHECK: attributes #[[ATTR3]] = {{{.*}} "nvvm.minctasm"="12" {{.*}}}
21+
22+ // CHECK_MAX_BLOCKS: @Kernel1_sm_90() #[[ATTR4:[0-9]+]]
23+ // CHECK_MAX_BLOCKS: @{{.*}}Kernel4_sm_90{{.*}} #[[ATTR4]]
24+ // CHECK_MAX_BLOCKS: @{{.*}}Kernel5_sm_90{{.*}} #[[ATTR5:[0-9]+]]
25+ // CHECK_MAX_BLOCKS: @{{.*}}Kernel8_sm_90{{.*}} #[[ATTR6:[0-9]+]]
26+
27+ // CHECK_MAX_BLOCKS: attributes #[[ATTR4]] = {{{.*}} "nvvm.maxclusterrank"="4" "nvvm.minctasm"="2" {{.*}}}
28+ // CHECK_MAX_BLOCKS: attributes #[[ATTR5]] = {{{.*}} "nvvm.maxclusterrank"="260" "nvvm.minctasm"="258" {{.*}}}
29+ // CHECK_MAX_BLOCKS: attributes #[[ATTR6]] = {{{.*}} "nvvm.maxclusterrank"="14" "nvvm.minctasm"="12" {{.*}}}
30+
1231// Test both max threads per block and Min cta per sm.
1332extern " C" {
1433__global__ void
@@ -19,7 +38,6 @@ Kernel1()
1938}
2039
2140// CHECK: !{{[0-9]+}} = !{ptr @Kernel1, !"maxntidx", i32 256}
22- // CHECK: !{{[0-9]+}} = !{ptr @Kernel1, !"minctasm", i32 2}
2341
2442#ifdef USE_MAX_BLOCKS
2543// Test max threads per block and min/max cta per sm.
@@ -32,8 +50,6 @@ Kernel1_sm_90()
3250}
3351
3452// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @Kernel1_sm_90, !"maxntidx", i32 256}
35- // CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @Kernel1_sm_90, !"minctasm", i32 2}
36- // CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @Kernel1_sm_90, !"maxclusterrank", i32 4}
3753#endif // USE_MAX_BLOCKS
3854
3955// Test only max threads per block. Min cta per sm defaults to 0, and
@@ -67,7 +83,6 @@ Kernel4()
6783template __global__ void Kernel4<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>();
6884
6985// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4{{.*}}, !"maxntidx", i32 256}
70- // CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4{{.*}}, !"minctasm", i32 2}
7186
7287#ifdef USE_MAX_BLOCKS
7388template <int max_threads_per_block, int min_blocks_per_mp, int max_blocks_per_mp>
@@ -79,8 +94,6 @@ Kernel4_sm_90()
7994template __global__ void Kernel4_sm_90<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP, MAX_BLOCKS_PER_MP>();
8095
8196// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4_sm_90{{.*}}, !"maxntidx", i32 256}
82- // CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4_sm_90{{.*}}, !"minctasm", i32 2}
83- // CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel4_sm_90{{.*}}, !"maxclusterrank", i32 4}
8497#endif // USE_MAX_BLOCKS
8598
8699const int constint = 100 ;
@@ -94,7 +107,6 @@ Kernel5()
94107template __global__ void Kernel5<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>();
95108
96109// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5{{.*}}, !"maxntidx", i32 356}
97- // CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5{{.*}}, !"minctasm", i32 258}
98110
99111#ifdef USE_MAX_BLOCKS
100112
@@ -109,8 +121,6 @@ Kernel5_sm_90()
109121template __global__ void Kernel5_sm_90<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP, MAX_BLOCKS_PER_MP>();
110122
111123// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5_sm_90{{.*}}, !"maxntidx", i32 356}
112- // CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5_sm_90{{.*}}, !"minctasm", i32 258}
113- // CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel5_sm_90{{.*}}, !"maxclusterrank", i32 260}
114124#endif // USE_MAX_BLOCKS
115125
116126// Make sure we don't emit negative launch bounds values.
@@ -120,7 +130,6 @@ Kernel6()
120130{
121131}
122132// CHECK-NOT: !{{[0-9]+}} = !{ptr @{{.*}}Kernel6{{.*}}, !"maxntidx",
123- // CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel6{{.*}}, !"minctasm",
124133
125134__global__ void
126135__launch_bounds__ ( MAX_THREADS_PER_BLOCK, -MIN_BLOCKS_PER_MP )
@@ -144,12 +153,9 @@ Kernel7_sm_90()
144153const char constchar = 12 ;
145154__global__ void __launch_bounds__ (constint, constchar) Kernel8() {}
146155// CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8{{.*}}, !"maxntidx", i32 100
147- // CHECK: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8{{.*}}, !"minctasm", i32 12
148156
149157#ifdef USE_MAX_BLOCKS
150158const char constchar_2 = 14 ;
151159__global__ void __launch_bounds__ (constint, constchar, constchar_2) Kernel8_sm_90() {}
152160// CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8_sm_90{{.*}}, !"maxntidx", i32 100
153- // CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8_sm_90{{.*}}, !"minctasm", i32 12
154- // CHECK_MAX_BLOCKS: !{{[0-9]+}} = !{ptr @{{.*}}Kernel8_sm_90{{.*}}, !"maxclusterrank", i32 14
155161#endif // USE_MAX_BLOCKS
0 commit comments