Skip to content

Commit 39ea262

Browse files
committed
Remove deprecated cudaLimitDevRuntimeSyncDepth functionality
cudaLimitDevRuntimeSyncDepth is the maximum grid depth at which a thread can issue the device runtime call cudaDeviceSynchronize() to wait on child grid launches to complete. Use of cudaDeviceSynchronize() in device code was deprecated in CUDA 11.6, and removed for devices with compute capability 9.0 or higher, while it requires explicit opt-in via the compile-time flag -DCUDA_FORCE_CDP1_IF_SUPPORTED for other devices. The current code fails at runtime on an NVIDIA H100 or newer GPUs.
1 parent 547258c commit 39ea262

File tree

1 file changed

+0
-10
lines changed

1 file changed

+0
-10
lines changed

HeterogeneousCore/CUDAServices/plugins/CUDAService.cc

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,6 @@ CUDAService::CUDAService(edm::ParameterSet const& config) : verbose_(config.getU
221221
auto printfFifoSize = limits.getUntrackedParameter<int>("cudaLimitPrintfFifoSize");
222222
auto stackSize = limits.getUntrackedParameter<int>("cudaLimitStackSize");
223223
auto mallocHeapSize = limits.getUntrackedParameter<int>("cudaLimitMallocHeapSize");
224-
auto devRuntimeSyncDepth = limits.getUntrackedParameter<int>("cudaLimitDevRuntimeSyncDepth");
225224
auto devRuntimePendingLaunchCount = limits.getUntrackedParameter<int>("cudaLimitDevRuntimePendingLaunchCount");
226225

227226
std::set<std::string> models;
@@ -367,11 +366,6 @@ CUDAService::CUDAService(edm::ParameterSet const& config) : verbose_(config.getU
367366
setCudaLimit(cudaLimitMallocHeapSize, "cudaLimitMallocHeapSize", mallocHeapSize);
368367
}
369368
if ((properties.major > 3) or (properties.major == 3 and properties.minor >= 5)) {
370-
// cudaLimitDevRuntimeSyncDepth controls the maximum nesting depth of a grid at which
371-
// a thread can safely call cudaDeviceSynchronize().
372-
if (devRuntimeSyncDepth >= 0) {
373-
setCudaLimit(cudaLimitDevRuntimeSyncDepth, "cudaLimitDevRuntimeSyncDepth", devRuntimeSyncDepth);
374-
}
375369
// cudaLimitDevRuntimePendingLaunchCount controls the maximum number of outstanding
376370
// device runtime launches that can be made from the current device.
377371
if (devRuntimePendingLaunchCount >= 0) {
@@ -391,8 +385,6 @@ CUDAService::CUDAService(edm::ParameterSet const& config) : verbose_(config.getU
391385
cudaCheck(cudaDeviceGetLimit(&value, cudaLimitMallocHeapSize));
392386
log << " malloc heap size: " << std::setw(10) << value / (1 << 20) << " MB\n";
393387
if ((properties.major > 3) or (properties.major == 3 and properties.minor >= 5)) {
394-
cudaCheck(cudaDeviceGetLimit(&value, cudaLimitDevRuntimeSyncDepth));
395-
log << " runtime sync depth: " << std::setw(10) << value << '\n';
396388
cudaCheck(cudaDeviceGetLimit(&value, cudaLimitDevRuntimePendingLaunchCount));
397389
log << " runtime pending launch count: " << std::setw(10) << value << '\n';
398390
}
@@ -458,8 +450,6 @@ void CUDAService::fillDescriptions(edm::ConfigurationDescriptions& descriptions)
458450
limits.addUntracked<int>("cudaLimitStackSize", -1)->setComment("Stack size in bytes of each GPU thread.");
459451
limits.addUntracked<int>("cudaLimitMallocHeapSize", -1)
460452
->setComment("Size in bytes of the heap used by the malloc() and free() device system calls.");
461-
limits.addUntracked<int>("cudaLimitDevRuntimeSyncDepth", -1)
462-
->setComment("Maximum nesting depth of a grid at which a thread can safely call cudaDeviceSynchronize().");
463453
limits.addUntracked<int>("cudaLimitDevRuntimePendingLaunchCount", -1)
464454
->setComment("Maximum number of outstanding device runtime launches that can be made from the current device.");
465455
desc.addUntracked<edm::ParameterSetDescription>("limits", limits)

0 commit comments

Comments
 (0)