Skip to content

Commit 5ebe8d8

Browse files
committed
Add abstract interfaces for the CUDA and ROCm services
Split the CUDA and ROCm services into an interface and a concrete implementation. Split the CUDAService into CUDAInterface and CUDAService. The former implements the CUDAService interface, while the latter implement the conrete functionality, and is used only if: - CUDA is available on the current platforms, OS and compiler; - the current system has at least one available CUDA GPU; - gpu-nvidia is among the process.options.accelerators. Split the ROCmService into ROCmInterface and ROCmService. The former implements the ROCmService interface, while the latter implement the conrete functionality, and is used only if: - ROCm is available on the current platforms, OS and compiler; - the current system has at least one available ROCm GPU; - gpu-amd is among the process.options.accelerators. Update all uses of the edm::Service<CUDAService> and edm::Service<ROCmService> to the interface classes, and check explicitly that a concrete implementation of the service is available.
1 parent a2fe732 commit 5ebe8d8

File tree

59 files changed

+557
-405
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+557
-405
lines changed

EventFilter/HcalRawToDigi/plugins/HcalDigisProducerGPU.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
#include "FWCore/ParameterSet/interface/ParameterSet.h"
1010
#include "FWCore/ServiceRegistry/interface/Service.h"
1111
#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"
12-
#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
12+
#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
1313
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
1414

1515
class HcalDigisProducerGPU : public edm::stream::EDProducer<edm::ExternalWork> {
@@ -97,8 +97,8 @@ HcalDigisProducerGPU::HcalDigisProducerGPU(const edm::ParameterSet& ps)
9797
hf3_.stride = hcal::compute_stride<hcal::Flavor3>(QIE11DigiCollection::MAXSAMPLES);
9898

9999
// preallocate pinned host memory only if CUDA is available
100-
edm::Service<CUDAService> cs;
101-
if (cs and cs->enabled()) {
100+
edm::Service<CUDAInterface> cuda;
101+
if (cuda and cuda->enabled()) {
102102
hf01_.reserve(config_.maxChannelsF01HE);
103103
hf5_.reserve(config_.maxChannelsF5HB);
104104
hf3_.reserve(config_.maxChannelsF3HB);

HeterogeneousCore/AlpakaCore/python/ProcessAcceleratorAlpaka.py

Lines changed: 51 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
import FWCore.ParameterSet.Config as cms
22

3+
import os
4+
5+
from HeterogeneousCore.Common.PlatformStatus import PlatformStatus
6+
37
class ModuleTypeResolverAlpaka:
48
def __init__(self, accelerators, backend):
59
# first element is used as the default if nothing is set
@@ -47,30 +51,64 @@ class ProcessAcceleratorAlpaka(cms.ProcessAccelerator):
4751
ProcessAcceleratorCUDA) define.
4852
"""
4953
def __init__(self):
50-
super(ProcessAcceleratorAlpaka,self).__init__()
54+
super(ProcessAcceleratorAlpaka, self).__init__()
5155
self._backend = None
56+
5257
# User-facing interface
5358
def setBackend(self, backend):
5459
self._backend = backend
60+
5561
# Framework-facing interface
5662
def moduleTypeResolver(self, accelerators):
5763
return ModuleTypeResolverAlpaka(accelerators, self._backend)
64+
5865
def apply(self, process, accelerators):
59-
if not hasattr(process, "AlpakaServiceSerialSync"):
66+
# Propagate the AlpakaService messages through the MessageLogger
67+
if not hasattr(process.MessageLogger, "AlpakaService"):
68+
process.MessageLogger.AlpakaService = cms.untracked.PSet()
69+
70+
# Check if the CPU backend is available
71+
try:
72+
if not "cpu" in accelerators:
73+
raise False
6074
from HeterogeneousCore.AlpakaServices.AlpakaServiceSerialSync_cfi import AlpakaServiceSerialSync
61-
process.add_(AlpakaServiceSerialSync)
62-
if not hasattr(process, "AlpakaServiceCudaAsync"):
75+
except:
76+
# the CPU backend is not available, do not load the AlpakaServiceSerialSync
77+
if hasattr(process, "AlpakaServiceSerialSync"):
78+
del process.AlpakaServiceSerialSync
79+
else:
80+
# the CPU backend is available, ensure the AlpakaServiceSerialSync is loaded
81+
if not hasattr(process, "AlpakaServiceSerialSync"):
82+
process.add_(AlpakaServiceSerialSync)
83+
84+
# Check if CUDA is available, and if the system has at least one usable NVIDIA GPU
85+
try:
86+
if not "gpu-nvidia" in accelerators:
87+
raise False
6388
from HeterogeneousCore.AlpakaServices.AlpakaServiceCudaAsync_cfi import AlpakaServiceCudaAsync
64-
process.add_(AlpakaServiceCudaAsync)
65-
if not hasattr(process, "AlpakaServiceROCmAsync"):
66-
from HeterogeneousCore.AlpakaServices.AlpakaServiceROCmAsync_cfi import AlpakaServiceROCmAsync
67-
process.add_(AlpakaServiceROCmAsync)
89+
except:
90+
# CUDA is not available, do not load the AlpakaServiceCudaAsync
91+
if hasattr(process, "AlpakaServiceCudaAsync"):
92+
del process.AlpakaServiceCudaAsync
93+
else:
94+
# CUDA is available, ensure the AlpakaServiceCudaAsync is loaded
95+
if not hasattr(process, "AlpakaServiceCudaAsync"):
96+
process.add_(AlpakaServiceCudaAsync)
6897

69-
if not hasattr(process.MessageLogger, "AlpakaService"):
70-
process.MessageLogger.AlpakaService = cms.untracked.PSet()
98+
# Check if ROCm is available, and if the system has at least one usable AMD GPU
99+
try:
100+
if not "gpu-amd" in accelerators:
101+
raise False
102+
from HeterogeneousCore.AlpakaServices.AlpakaServiceROCmAsync_cfi import AlpakaServiceROCmAsync
103+
except:
104+
# ROCm is not available, do not load the AlpakaServiceROCmAsync
105+
if hasattr(process, "AlpakaServiceROCmAsync"):
106+
del process.AlpakaServiceROCmAsync
107+
else:
108+
# ROCm is available, ensure the AlpakaServiceROCmAsync is loaded
109+
if not hasattr(process, "AlpakaServiceROCmAsync"):
110+
process.add_(AlpakaServiceROCmAsync)
71111

72-
process.AlpakaServiceSerialSync.enabled = "cpu" in accelerators
73-
process.AlpakaServiceCudaAsync.enabled = "gpu-nvidia" in accelerators
74-
process.AlpakaServiceROCmAsync.enabled = "gpu-amd" in accelerators
75112

113+
# Ensure this module is kept in the configuration when dumping it
76114
cms.specialImportRegistry.registerSpecialImportForType(ProcessAcceleratorAlpaka, "from HeterogeneousCore.AlpakaCore.ProcessAcceleratorAlpaka import ProcessAcceleratorAlpaka")

HeterogeneousCore/AlpakaServices/src/alpaka/AlpakaService.cc

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,12 @@
1616

1717
#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
1818
#include "FWCore/ServiceRegistry/interface/Service.h"
19-
#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
19+
#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
2020
#endif // ALPAKA_ACC_GPU_CUDA_ENABLED
2121

2222
#ifdef ALPAKA_ACC_GPU_HIP_ENABLED
2323
#include "FWCore/ServiceRegistry/interface/Service.h"
24-
#include "HeterogeneousCore/ROCmServices/interface/ROCmService.h"
24+
#include "HeterogeneousCore/ROCmServices/interface/ROCmInterface.h"
2525
#endif // ALPAKA_ACC_GPU_HIP_ENABLED
2626

2727
namespace ALPAKA_ACCELERATOR_NAMESPACE {
@@ -31,11 +31,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
3131
verbose_(config.getUntrackedParameter<bool>("verbose")) {
3232
#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
3333
// rely on the CUDAService to initialise the CUDA devices
34-
edm::Service<CUDAService> cudaService;
34+
edm::Service<CUDAInterface> cuda;
3535
#endif // ALPAKA_ACC_GPU_CUDA_ENABLED
3636
#ifdef ALPAKA_ACC_GPU_HIP_ENABLED
3737
// rely on the ROCmService to initialise the ROCm devices
38-
edm::Service<ROCmService> rocmService;
38+
edm::Service<ROCmInterface> rocm;
3939
#endif // ALPAKA_ACC_GPU_HIP_ENABLED
4040

4141
// TODO from Andrea Bocci:
@@ -48,14 +48,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
4848
}
4949

5050
#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
51-
if (not cudaService->enabled()) {
51+
if (not cuda or not cuda->enabled()) {
5252
enabled_ = false;
5353
edm::LogInfo("AlpakaService") << ALPAKA_TYPE_ALIAS_NAME(AlpakaService) << " disabled by CUDAService";
5454
return;
5555
}
5656
#endif // ALPAKA_ACC_GPU_CUDA_ENABLED
5757
#ifdef ALPAKA_ACC_GPU_HIP_ENABLED
58-
if (not rocmService->enabled()) {
58+
if (not rocm or not rocm->enabled()) {
5959
enabled_ = false;
6060
edm::LogInfo("AlpakaService") << ALPAKA_TYPE_ALIAS_NAME(AlpakaService) << " disabled by ROCmService";
6161
return;

HeterogeneousCore/CUDACore/README.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,14 @@ This page documents the CUDA integration within CMSSW
8383
stream must synchronize with the work queued on other CUDA
8484
streams (with CUDA events and `cudaStreamWaitEvent()`)
8585
4. Outside of `acquire()`/`produce()`, CUDA API functions may be
86-
called only if `CUDAService::enabled()` returns `true`.
86+
called only if the `CUDAService` implementation of the `CUDAInterface`
87+
is available and `CUDAService::enabled()` returns `true`:
88+
```c++
89+
edm::Service<CUDAInterface> cuda;
90+
if (cuda and cuda->enabled()) {
91+
// CUDA calls ca be made here
92+
}
93+
```
8794
* With point 3 it follows that in these cases multiple devices have
8895
to be dealt with explicitly, as well as CUDA streams
8996

HeterogeneousCore/CUDACore/python/ProcessAcceleratorCUDA.py

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,29 +2,44 @@
22

33
import os
44

5+
from HeterogeneousCore.Common.PlatformStatus import PlatformStatus
6+
57
class ProcessAcceleratorCUDA(cms.ProcessAccelerator):
68
def __init__(self):
7-
super(ProcessAcceleratorCUDA,self).__init__()
9+
super(ProcessAcceleratorCUDA, self).__init__()
810
self._label = "gpu-nvidia"
11+
912
def labels(self):
10-
return [self._label]
13+
return [ self._label ]
14+
1115
def enabledLabels(self):
12-
enabled = (os.system("cudaIsEnabled") == 0)
13-
if enabled:
14-
return self.labels()
15-
else:
16-
return []
17-
def apply(self, process, accelerators):
18-
if not hasattr(process, "CUDAService"):
19-
from HeterogeneousCore.CUDAServices.CUDAService_cfi import CUDAService
20-
process.add_(CUDAService)
16+
# Check if CUDA is available, and if the system has at least one usable device.
17+
# These should be checked on each worker node, because it depends both
18+
# on the architecture and on the actual hardware present in the machine.
19+
status = PlatformStatus(os.waitstatus_to_exitcode(os.system("cudaIsEnabled")))
20+
return self.labels() if status == PlatformStatus.Success else []
2121

22-
if not hasattr(process.MessageLogger, "CUDAService"):
23-
process.MessageLogger.CUDAService = cms.untracked.PSet()
22+
def apply(self, process, accelerators):
2423

2524
if self._label in accelerators:
26-
process.CUDAService.enabled = True
25+
# Ensure that the CUDAService is loaded
26+
if not hasattr(process, "CUDAService"):
27+
from HeterogeneousCore.CUDAServices.CUDAService_cfi import CUDAService
28+
process.add_(CUDAService)
29+
30+
# Propagate the CUDAService messages through the MessageLogger
31+
if not hasattr(process.MessageLogger, "CUDAService"):
32+
process.MessageLogger.CUDAService = cms.untracked.PSet()
33+
2734
else:
28-
process.CUDAService.enabled = False
29-
35+
# Make sure the CUDAService is not loaded
36+
if hasattr(process, "CUDAService"):
37+
del process.CUDAService
38+
39+
# Drop the CUDAService messages from the MessageLogger
40+
if hasattr(process.MessageLogger, "CUDAService"):
41+
del process.MessageLogger.CUDAService
42+
43+
44+
# Ensure this module is kept in the configuration when dumping it
3045
cms.specialImportRegistry.registerSpecialImportForType(ProcessAcceleratorCUDA, "from HeterogeneousCore.CUDACore.ProcessAcceleratorCUDA import ProcessAcceleratorCUDA")
Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,18 @@
11
#include "FWCore/ServiceRegistry/interface/Service.h"
22
#include "FWCore/Utilities/interface/Exception.h"
3-
#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
3+
#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
44

55
#include "chooseDevice.h"
66

77
namespace cms::cuda {
88
int chooseDevice(edm::StreamID id) {
9-
edm::Service<CUDAService> cudaService;
10-
if (not cudaService->enabled()) {
9+
edm::Service<CUDAInterface> cuda;
10+
if (not cuda or not cuda->enabled()) {
1111
cms::Exception ex("CUDAError");
12-
ex << "Unable to choose current device because CUDAService is disabled. If CUDAService was not explicitly\n"
13-
"disabled in the configuration, the probable cause is that there is no GPU or there is some problem\n"
14-
"in the CUDA runtime or drivers.";
12+
ex << "Unable to choose current device because CUDAService is not preset or disabled.\n"
13+
<< "If CUDAService was not explicitly disabled in the configuration, the probable\n"
14+
<< "cause is that there is no GPU or there is some problem in the CUDA runtime or\n"
15+
<< "drivers.";
1516
ex.addContext("Calling cms::cuda::chooseDevice()");
1617
throw ex;
1718
}
@@ -22,6 +23,6 @@ namespace cms::cuda {
2223
// (and even then there is no load balancing).
2324
//
2425
// TODO: improve the "assignment" logic
25-
return id % cudaService->numberOfDevices();
26+
return id % cuda->numberOfDevices();
2627
}
2728
} // namespace cms::cuda
Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,4 @@
1-
<iftool name="cuda">
2-
<use name="FWCore/ServiceRegistry"/>
3-
<use name="FWCore/ParameterSet"/>
4-
<use name="FWCore/MessageLogger"/>
5-
<use name="FWCore/Utilities"/>
6-
<use name="HeterogeneousCore/CUDAUtilities"/>
7-
<use name="cuda"/>
8-
<use name="cuda-nvml"/>
9-
<export>
10-
<lib name="1"/>
11-
</export>
12-
</iftool>
1+
<use name="FWCore/ServiceRegistry"/>
2+
<export>
3+
<lib name="1"/>
4+
</export>
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#ifndef HeterogeneousCore_CUDAServices_interface_CUDAInterface
2+
#define HeterogeneousCore_CUDAServices_interface_CUDAInterface
3+
4+
#include <utility>
5+
6+
class CUDAInterface {
7+
public:
8+
CUDAInterface() = default;
9+
virtual ~CUDAInterface() = default;
10+
11+
virtual bool enabled() const = 0;
12+
13+
virtual int numberOfDevices() const = 0;
14+
15+
// Returns the (major, minor) CUDA compute capability of the given device.
16+
virtual std::pair<int, int> computeCapability(int device) const = 0;
17+
};
18+
19+
#endif // HeterogeneousCore_CUDAServices_interface_CUDAInterface

HeterogeneousCore/CUDAServices/interface/CUDAService.h

Lines changed: 0 additions & 42 deletions
This file was deleted.

HeterogeneousCore/CUDAServices/plugins/BuildFile.xml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99
<use name="FWCore/Utilities"/>
1010
<use name="HLTrigger/Timer"/>
1111
<use name="HeterogeneousCore/CUDAServices"/>
12-
<library file="*.cc" name="HeterogeneousCoreCUDAServicesPlugins">
12+
<use name="HeterogeneousCore/CUDAUtilities"/>
13+
<library file="CUDAMonitoringService.cc CUDAService.cc NVProfilerService.cc" name="HeterogeneousCoreCUDAServicesPlugins">
1314
<flags EDM_PLUGIN="1"/>
1415
</library>
1516
</iftool>

0 commit comments

Comments
 (0)