@@ -23,11 +23,15 @@ module attributes {gpu.container_module, dlti.dl_spec = #dlti.dl_spec<#dlti.dl_e
2323 // CHECK: %[[ALLOCA:.*]] = fir.alloca f32
2424 %c1 = arith.constant 1 : index
2525 %c11_i32 = arith.constant 11 : i32
26+ %c1024_i32 = arith.constant 1024 : i32
2627 %c6_i32 = arith.constant 6 : i32
2728 %c1_i32 = arith.constant 1 : i32
2829 // CHECK: gpu.launch_func @cuda_device_mod::@_QPsub_device1 blocks in (%{{.*}}, %{{.*}}, %{{.*}}) threads in (%{{.*}}, %{{.*}}, %{{.*}}) dynamic_shared_memory_size %c0{{.*}}
2930 cuf.kernel_launch @cuda_device_mod::@_QPsub_device1<<<%c1_i32, %c1_i32, %c1_i32, %c1_i32, %c1_i32, %c1_i32>>>()
3031
32+ // CHECK: gpu.launch_func @cuda_device_mod::@_QPsub_device1 blocks in (%{{.*}}, %{{.*}}, %{{.*}}) threads in (%{{.*}}, %{{.*}}, %{{.*}}) dynamic_shared_memory_size %c1024{{.*}}
33+ cuf.kernel_launch @cuda_device_mod::@_QPsub_device1<<<%c1_i32, %c1_i32, %c1_i32, %c1_i32, %c1_i32, %c1_i32, %c1024_i32>>>()
34+
3135 // CHECK: gpu.launch_func @cuda_device_mod::@_QPsub_device2 blocks in (%{{.*}}, %{{.*}}, %{{.*}}) threads in (%{{.*}}, %{{.*}}, %{{.*}}) dynamic_shared_memory_size %c0{{.*}} args(%[[ALLOCA]] : !fir.ref<f32>)
3236 cuf.kernel_launch @cuda_device_mod::@_QPsub_device2<<<%c1_i32, %c1_i32, %c1_i32, %c1_i32, %c1_i32, %c1_i32>>>(%0) : (!fir.ref<f32>)
3337 return
0 commit comments