@@ -149,8 +149,7 @@ struct CUDAKernelTy : public GenericKernelTy {
149149 // The maximum number of threads cannot exceed the maximum of the kernel.
150150 MaxNumThreads = std::min (MaxNumThreads, (uint32_t )MaxThreads);
151151
152- // Retrieve the size of the arguments.
153- return initArgsSize ();
152+ return Plugin::success ();
154153 }
155154
156155 // / Launch the CUDA kernel function.
@@ -174,32 +173,11 @@ struct CUDAKernelTy : public GenericKernelTy {
174173 }
175174
176175private:
177- // / Initialize the size of the arguments.
178- Error initArgsSize () {
179- CUresult Res;
180- size_t ArgOffset, ArgSize;
181- size_t Arg = 0 ;
182-
183- ArgsSize = 0 ;
184-
185- // Find the last argument to know the total size of the arguments.
186- while ((Res = cuFuncGetParamInfo (Func, Arg++, &ArgOffset, &ArgSize)) ==
187- CUDA_SUCCESS)
188- ArgsSize = ArgOffset + ArgSize;
189-
190- if (Res != CUDA_ERROR_INVALID_VALUE)
191- return Plugin::check (Res, " error in cuFuncGetParamInfo: %s" );
192- return Plugin::success ();
193- }
194-
195176 // / The CUDA kernel function to execute.
196177 CUfunction Func;
197178 // / The maximum amount of dynamic shared memory per thread group. By default,
198179 // / this is set to 48 KB.
199180 mutable uint32_t MaxDynCGroupMemLimit = 49152 ;
200-
201- // / The size of the kernel arguments.
202- size_t ArgsSize;
203181};
204182
205183// / Class wrapping a CUDA stream reference. These are the objects handled by the
@@ -1452,23 +1430,16 @@ Error CUDAKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
14521430 AsyncInfoWrapperTy &AsyncInfoWrapper) const {
14531431 CUDADeviceTy &CUDADevice = static_cast <CUDADeviceTy &>(GenericDevice);
14541432
1455- // The args size passed in LaunchParams may have tail padding, which is not
1456- // accepted by the CUDA driver.
1457- if (ArgsSize > LaunchParams.Size )
1458- return Plugin::error (ErrorCode::INVALID_ARGUMENT,
1459- " mismatch in kernel arguments" );
1460-
14611433 CUstream Stream;
14621434 if (auto Err = CUDADevice.getStream (AsyncInfoWrapper, Stream))
14631435 return Err;
14641436
14651437 uint32_t MaxDynCGroupMem =
14661438 std::max (KernelArgs.DynCGroupMem , GenericDevice.getDynamicMemorySize ());
14671439
1468- size_t ConfigArgsSize = ArgsSize;
14691440 void *Config[] = {CU_LAUNCH_PARAM_BUFFER_POINTER, LaunchParams.Data ,
14701441 CU_LAUNCH_PARAM_BUFFER_SIZE,
1471- reinterpret_cast <void *>(&ConfigArgsSize ),
1442+ reinterpret_cast <void *>(&LaunchParams. Size ),
14721443 CU_LAUNCH_PARAM_END};
14731444
14741445 // If we are running an RPC server we want to wake up the server thread
0 commit comments