Skip to content

Commit 3d761d8

Browse files
authored
Merge pull request cms-sw#40832 from fwyzard/more_Alpaka_updates_131x
CUDA, ROCm and Alpaka-related updates
2 parents ff2213a + 1c30d1c commit 3d761d8

File tree

81 files changed

+761
-629
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

81 files changed

+761
-629
lines changed

Alignment/CommonAlignment/python/tools/trackselectionRefitting.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -363,10 +363,6 @@ def getSequence(process, collection,
363363
## put the sequence together ##
364364
###############################
365365

366-
if "Fast" in TTRHBuilder:
367-
print("PixelCPEFast has been chosen, here we must include CUDAService first")
368-
process.load('HeterogeneousCore.CUDAServices.CUDAService_cfi')
369-
370366
modules = []
371367
src = collection
372368
prevsrc = None

Configuration/StandardSequences/python/Accelerators_cff.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@
44
# used in production
55

66
from HeterogeneousCore.CUDACore.ProcessAcceleratorCUDA_cfi import ProcessAcceleratorCUDA
7+
from HeterogeneousCore.ROCmCore.ProcessAcceleratorROCm_cfi import ProcessAcceleratorROCm
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#include "DataFormats/Common/interface/DeviceProduct.h"
2+
#include "DataFormats/Common/interface/Wrapper.h"
3+
#include "DataFormats/Portable/interface/Product.h"
4+
#include "DataFormats/PortableTestObjects/interface/TestSoA.h"
5+
#include "DataFormats/PortableTestObjects/interface/alpaka/TestDeviceCollection.h"
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
<lcgdict>
2+
<class name="alpaka_rocm_async::portabletest::TestDeviceCollection" persistent="false"/>
3+
<class name="edm::DeviceProduct<alpaka_rocm_async::portabletest::TestDeviceCollection>" persistent="false"/>
4+
<class name="edm::Wrapper<edm::DeviceProduct<alpaka_rocm_async::portabletest::TestDeviceCollection>>" persistent="false"/>
5+
</lcgdict>

EventFilter/HcalRawToDigi/plugins/HcalDigisProducerGPU.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
#include "FWCore/ParameterSet/interface/ParameterSet.h"
1010
#include "FWCore/ServiceRegistry/interface/Service.h"
1111
#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"
12-
#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
12+
#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
1313
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
1414

1515
class HcalDigisProducerGPU : public edm::stream::EDProducer<edm::ExternalWork> {
@@ -97,8 +97,8 @@ HcalDigisProducerGPU::HcalDigisProducerGPU(const edm::ParameterSet& ps)
9797
hf3_.stride = hcal::compute_stride<hcal::Flavor3>(QIE11DigiCollection::MAXSAMPLES);
9898

9999
// preallocate pinned host memory only if CUDA is available
100-
edm::Service<CUDAService> cs;
101-
if (cs and cs->enabled()) {
100+
edm::Service<CUDAInterface> cuda;
101+
if (cuda and cuda->enabled()) {
102102
hf01_.reserve(config_.maxChannelsF01HE);
103103
hf5_.reserve(config_.maxChannelsF5HB);
104104
hf3_.reserve(config_.maxChannelsF3HB);

EventFilter/HcalRawToDigi/plugins/HcalRawToDigiGPU.cc

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
#include <iostream>
2-
1+
#include "CUDADataFormats/Common/interface/Product.h"
32
#include "CondFormats/DataRecord/interface/HcalElectronicsMapRcd.h"
43
#include "DataFormats/FEDRawData/interface/FEDNumbering.h"
54
#include "DataFormats/FEDRawData/interface/FEDRawDataCollection.h"
@@ -8,11 +7,7 @@
87
#include "FWCore/Framework/interface/MakerMacros.h"
98
#include "FWCore/Framework/interface/stream/EDProducer.h"
109
#include "FWCore/ParameterSet/interface/ParameterSet.h"
11-
#include "FWCore/ServiceRegistry/interface/Service.h"
1210
#include "HeterogeneousCore/CUDACore/interface/ScopedContext.h"
13-
#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
14-
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
15-
#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
1611

1712
#include "DeclsForKernels.h"
1813
#include "DecodeGPU.h"

HeterogeneousCore/AlpakaCore/python/ProcessAcceleratorAlpaka.py

Lines changed: 54 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,17 @@
11
import FWCore.ParameterSet.Config as cms
22

3+
import os
4+
5+
from HeterogeneousCore.Common.PlatformStatus import PlatformStatus
6+
37
class ModuleTypeResolverAlpaka:
48
def __init__(self, accelerators, backend):
5-
# first element is used as the default is nothing is set
9+
# first element is used as the default if nothing is set
610
self._valid_backends = []
711
if "gpu-nvidia" in accelerators:
812
self._valid_backends.append("cuda_async")
13+
if "gpu-amd" in accelerators:
14+
self._valid_backends.append("rocm_async")
915
if "cpu" in accelerators:
1016
self._valid_backends.append("serial_sync")
1117
if len(self._valid_backends) == 0:
@@ -45,26 +51,64 @@ class ProcessAcceleratorAlpaka(cms.ProcessAccelerator):
4551
ProcessAcceleratorCUDA) define.
4652
"""
4753
def __init__(self):
48-
super(ProcessAcceleratorAlpaka,self).__init__()
54+
super(ProcessAcceleratorAlpaka, self).__init__()
4955
self._backend = None
56+
5057
# User-facing interface
5158
def setBackend(self, backend):
5259
self._backend = backend
60+
5361
# Framework-facing interface
5462
def moduleTypeResolver(self, accelerators):
5563
return ModuleTypeResolverAlpaka(accelerators, self._backend)
64+
5665
def apply(self, process, accelerators):
57-
if not hasattr(process, "AlpakaServiceSerialSync"):
66+
# Propagate the AlpakaService messages through the MessageLogger
67+
if not hasattr(process.MessageLogger, "AlpakaService"):
68+
process.MessageLogger.AlpakaService = cms.untracked.PSet()
69+
70+
# Check if the CPU backend is available
71+
try:
72+
if not "cpu" in accelerators:
73+
raise False
5874
from HeterogeneousCore.AlpakaServices.AlpakaServiceSerialSync_cfi import AlpakaServiceSerialSync
59-
process.add_(AlpakaServiceSerialSync)
60-
if not hasattr(process, "AlpakaServiceCudaAsync"):
75+
except:
76+
# the CPU backend is not available, do not load the AlpakaServiceSerialSync
77+
if hasattr(process, "AlpakaServiceSerialSync"):
78+
del process.AlpakaServiceSerialSync
79+
else:
80+
# the CPU backend is available, ensure the AlpakaServiceSerialSync is loaded
81+
if not hasattr(process, "AlpakaServiceSerialSync"):
82+
process.add_(AlpakaServiceSerialSync)
83+
84+
# Check if CUDA is available, and if the system has at least one usable NVIDIA GPU
85+
try:
86+
if not "gpu-nvidia" in accelerators:
87+
raise False
6188
from HeterogeneousCore.AlpakaServices.AlpakaServiceCudaAsync_cfi import AlpakaServiceCudaAsync
62-
process.add_(AlpakaServiceCudaAsync)
89+
except:
90+
# CUDA is not available, do not load the AlpakaServiceCudaAsync
91+
if hasattr(process, "AlpakaServiceCudaAsync"):
92+
del process.AlpakaServiceCudaAsync
93+
else:
94+
# CUDA is available, ensure the AlpakaServiceCudaAsync is loaded
95+
if not hasattr(process, "AlpakaServiceCudaAsync"):
96+
process.add_(AlpakaServiceCudaAsync)
6397

64-
if not hasattr(process.MessageLogger, "AlpakaService"):
65-
process.MessageLogger.AlpakaService = cms.untracked.PSet()
98+
# Check if ROCm is available, and if the system has at least one usable AMD GPU
99+
try:
100+
if not "gpu-amd" in accelerators:
101+
raise False
102+
from HeterogeneousCore.AlpakaServices.AlpakaServiceROCmAsync_cfi import AlpakaServiceROCmAsync
103+
except:
104+
# ROCm is not available, do not load the AlpakaServiceROCmAsync
105+
if hasattr(process, "AlpakaServiceROCmAsync"):
106+
del process.AlpakaServiceROCmAsync
107+
else:
108+
# ROCm is available, ensure the AlpakaServiceROCmAsync is loaded
109+
if not hasattr(process, "AlpakaServiceROCmAsync"):
110+
process.add_(AlpakaServiceROCmAsync)
66111

67-
process.AlpakaServiceSerialSync.enabled = "cpu" in accelerators
68-
process.AlpakaServiceCudaAsync.enabled = "gpu-nvidia" in accelerators
69112

113+
# Ensure this module is kept in the configuration when dumping it
70114
cms.specialImportRegistry.registerSpecialImportForType(ProcessAcceleratorAlpaka, "from HeterogeneousCore.AlpakaCore.ProcessAcceleratorAlpaka import ProcessAcceleratorAlpaka")

HeterogeneousCore/AlpakaCore/src/module_backend_config.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ namespace cms::alpakatools {
1717
descAlpaka.addUntracked<std::string>("backend", "")
1818
->setComment(
1919
"Alpaka backend for this module. Can be empty string (for the global default), 'serial_sync', or "
20-
"'cuda_async'");
20+
" - depending on the architecture and available hardware - 'cuda_async', 'rocm_async'");
2121

2222
if (iDesc.defaultDescription()) {
2323
if (iDesc.defaultDescription()->isLabelUnused(kPSetName)) {

HeterogeneousCore/AlpakaServices/src/alpaka/AlpakaService.cc

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,12 @@
1616

1717
#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
1818
#include "FWCore/ServiceRegistry/interface/Service.h"
19-
#include "HeterogeneousCore/CUDAServices/interface/CUDAService.h"
19+
#include "HeterogeneousCore/CUDAServices/interface/CUDAInterface.h"
2020
#endif // ALPAKA_ACC_GPU_CUDA_ENABLED
2121

2222
#ifdef ALPAKA_ACC_GPU_HIP_ENABLED
2323
#include "FWCore/ServiceRegistry/interface/Service.h"
24-
#include "HeterogeneousCore/ROCmServices/interface/ROCmService.h"
24+
#include "HeterogeneousCore/ROCmServices/interface/ROCmInterface.h"
2525
#endif // ALPAKA_ACC_GPU_HIP_ENABLED
2626

2727
namespace ALPAKA_ACCELERATOR_NAMESPACE {
@@ -31,11 +31,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
3131
verbose_(config.getUntrackedParameter<bool>("verbose")) {
3232
#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
3333
// rely on the CUDAService to initialise the CUDA devices
34-
edm::Service<CUDAService> cudaService;
34+
edm::Service<CUDAInterface> cuda;
3535
#endif // ALPAKA_ACC_GPU_CUDA_ENABLED
3636
#ifdef ALPAKA_ACC_GPU_HIP_ENABLED
3737
// rely on the ROCmService to initialise the ROCm devices
38-
edm::Service<ROCmService> rocmService;
38+
edm::Service<ROCmInterface> rocm;
3939
#endif // ALPAKA_ACC_GPU_HIP_ENABLED
4040

4141
// TODO from Andrea Bocci:
@@ -48,14 +48,14 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
4848
}
4949

5050
#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
51-
if (not cudaService->enabled()) {
51+
if (not cuda or not cuda->enabled()) {
5252
enabled_ = false;
5353
edm::LogInfo("AlpakaService") << ALPAKA_TYPE_ALIAS_NAME(AlpakaService) << " disabled by CUDAService";
5454
return;
5555
}
5656
#endif // ALPAKA_ACC_GPU_CUDA_ENABLED
5757
#ifdef ALPAKA_ACC_GPU_HIP_ENABLED
58-
if (not rocmService->enabled()) {
58+
if (not rocm or not rocm->enabled()) {
5959
enabled_ = false;
6060
edm::LogInfo("AlpakaService") << ALPAKA_TYPE_ALIAS_NAME(AlpakaService) << " disabled by ROCmService";
6161
return;

HeterogeneousCore/CUDACore/README.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,14 @@ This page documents the CUDA integration within CMSSW
8383
stream must synchronize with the work queued on other CUDA
8484
streams (with CUDA events and `cudaStreamWaitEvent()`)
8585
4. Outside of `acquire()`/`produce()`, CUDA API functions may be
86-
called only if `CUDAService::enabled()` returns `true`.
86+
called only if the `CUDAService` implementation of the `CUDAInterface`
87+
is available and `CUDAService::enabled()` returns `true`:
88+
```c++
89+
edm::Service<CUDAInterface> cuda;
90+
if (cuda and cuda->enabled()) {
91+
// CUDA calls ca be made here
92+
}
93+
```
8794
* With point 3 it follows that in these cases multiple devices have
8895
to be dealt with explicitly, as well as CUDA streams
8996

0 commit comments

Comments
 (0)