Skip to content

Commit c391f89

Browse files
sifakismatthewdcong
authored andcommitted
Adding kernel launch helpers with dynamic shared memory support to nanovdb::util
* adding launcher helpers for dynamic shared memory * adding stream parameter * adding code example in doxygen comments Signed-off-by: Matthew Cong <[email protected]>
1 parent c4a3cb3 commit c391f89

File tree

1 file changed

+40
-0
lines changed
  • nanovdb/nanovdb/util/cuda

1 file changed

+40
-0
lines changed

nanovdb/nanovdb/util/cuda/Util.h

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,46 @@ void operatorKernel(
233233
op( args... );
234234
}
235235

236+
/// @brief Cuda kernel that launches device operator functors with arbitrary arguments, using dynamic shared memory
237+
template<class Operator, typename... Args>
238+
__global__
239+
__launch_bounds__(Operator::MaxThreadsPerBlock, Operator::MinBlocksPerMultiprocessor)
240+
void operatorKernelDynamic(Args... args)
241+
{
242+
extern __shared__ char smem_buf[];
243+
Operator op;
244+
op( args..., smem_buf );
245+
}
246+
247+
/// @brief Wrapper for launching a device operator that leverages dynamic shared memory, with a specified size
248+
/// @code
249+
/// struct MyFunctor
250+
/// {
251+
/// // These are passed to __launch_bounds__
252+
/// static constexpr int MaxThreadsPerBlock = <nThreads>
253+
/// static constexpr int MinBlocksPerMultiprocessor = 1;
254+
///
255+
/// struct SharedStorage {
256+
/// // Include whatever is needed in smem
257+
/// };
258+
///
259+
/// __device__
260+
/// void operator()(Args ... myArgs, char smem_buf[])
261+
/// { ... }
262+
/// };
263+
///
264+
/// dynamicSharedMemoryLauncher<MyFunctor>(nBlocks, sizeof(typename MyFunctor::SharedStorage), myArgs...);
265+
/// // smem_buff of size sizeof(MyFunctor::SharedStorage) will be automatically passed along
266+
/// @endcode
267+
template<class Operator, typename... Args>
268+
void dynamicSharedMemoryLauncher(const size_t numItems, const size_t smem_size, cudaStream_t stream, Args... args)
269+
{
270+
cudaCheck(cudaFuncSetAttribute(operatorKernelDynamic<Operator, Args...>,
271+
cudaFuncAttributeMaxDynamicSharedMemorySize,smem_size));
272+
operatorKernelDynamic<Operator>
273+
<<<numItems, Operator::MaxThreadsPerBlock, smem_size, stream>>>( args ... );
274+
}
275+
236276
#endif// __CUDACC__
237277

238278
}// namespace util::cuda ============================================================

0 commit comments

Comments
 (0)