Skip to content

Commit 247f6db

Browse files
authored
Merge pull request #45368 from fwyzard/Alpaka_CachingAllocator_debug_141x
Fix CachingAllocator debug for non-async operations
2 parents 70af9f3 + 70f422b commit 247f6db

File tree

4 files changed

+89
-21
lines changed

4 files changed

+89
-21
lines changed

Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1102,10 +1102,12 @@ def setup_(self, step, stepName, stepDict, k, properties):
11021102
digi = {
11031103
# customize the ECAL Local Reco part of the HLT menu for Alpaka
11041104
'--procModifiers': 'alpaka', # alpaka modifier activates customiseHLTForAlpaka
1105+
'--customise' : 'HeterogeneousCore/AlpakaServices/customiseAlpakaServiceMemoryFilling.customiseAlpakaServiceMemoryFilling',
11051106
},
11061107
reco = {
11071108
'-s': 'RAW2DIGI:RawToDigi_ecalOnly,RECO:reconstruction_ecalOnly,VALIDATION:@ecalOnlyValidation,DQM:@ecalOnly',
1108-
'--procModifiers': 'alpaka'
1109+
'--procModifiers': 'alpaka',
1110+
'--customise' : 'HeterogeneousCore/AlpakaServices/customiseAlpakaServiceMemoryFilling.customiseAlpakaServiceMemoryFilling',
11091111
},
11101112
harvest = {
11111113
'-s': 'HARVESTING:@ecalOnlyValidation+@ecal'
@@ -1276,10 +1278,12 @@ def setup_(self, step, stepName, stepDict, k, properties):
12761278
upgradeWFs['PatatrackHCALOnlyAlpakaValidation'] = PatatrackWorkflow(
12771279
digi = {
12781280
'--procModifiers': 'alpaka', # alpaka modifier activates customiseHLTForAlpaka
1281+
'--customise' : 'HeterogeneousCore/AlpakaServices/customiseAlpakaServiceMemoryFilling.customiseAlpakaServiceMemoryFilling',
12791282
},
12801283
reco = {
12811284
'-s': 'RAW2DIGI:RawToDigi_hcalOnly,RECO:reconstruction_hcalOnly,VALIDATION:@hcalOnlyValidation,DQM:@hcalOnly+@hcal2Only',
1282-
'--procModifiers': 'alpaka'
1285+
'--procModifiers': 'alpaka',
1286+
'--customise' : 'HeterogeneousCore/AlpakaServices/customiseAlpakaServiceMemoryFilling.customiseAlpakaServiceMemoryFilling',
12831287
},
12841288
harvest = {
12851289
'-s': 'HARVESTING:@hcalOnlyValidation'
@@ -1294,10 +1298,12 @@ def setup_(self, step, stepName, stepDict, k, properties):
12941298
upgradeWFs['PatatrackHCALOnlyGPUandAlpakaValidation'] = PatatrackWorkflow(
12951299
digi = {
12961300
'--procModifiers': 'alpaka', # alpaka modifier activates customiseHLTForAlpaka
1301+
'--customise' : 'HeterogeneousCore/AlpakaServices/customiseAlpakaServiceMemoryFilling.customiseAlpakaServiceMemoryFilling',
12971302
},
12981303
reco = {
1299-
'-s' : 'RAW2DIGI:RawToDigi_hcalOnly,RECO:reconstruction_hcalOnlyLegacy+reconstruction_hcalOnly,VALIDATION:@hcalOnlyValidation+pfClusterHBHEOnlyAlpakaComparisonSequence,DQM:@hcalOnly+@hcal2Only+hcalOnlyOfflineSourceSequenceAlpaka',
1300-
'--procModifiers': 'alpaka'
1304+
'-s': 'RAW2DIGI:RawToDigi_hcalOnly,RECO:reconstruction_hcalOnlyLegacy+reconstruction_hcalOnly,VALIDATION:@hcalOnlyValidation+pfClusterHBHEOnlyAlpakaComparisonSequence,DQM:@hcalOnly+@hcal2Only+hcalOnlyOfflineSourceSequenceAlpaka',
1305+
'--procModifiers': 'alpaka',
1306+
'--customise' : 'HeterogeneousCore/AlpakaServices/customiseAlpakaServiceMemoryFilling.customiseAlpakaServiceMemoryFilling',
13011307
},
13021308
harvest = {
13031309
'-s': 'HARVESTING:@hcalOnlyValidation'
@@ -1329,11 +1335,13 @@ def setup_(self, step, stepName, stepDict, k, properties):
13291335
upgradeWFs['PatatrackFullRecoAlpaka'] = PatatrackWorkflow(
13301336
digi = {
13311337
'--procModifiers': 'alpaka', # alpaka modifier activates customiseHLTForAlpaka
1338+
'--customise' : 'HeterogeneousCore/AlpakaServices/customiseAlpakaServiceMemoryFilling.customiseAlpakaServiceMemoryFilling',
13321339
},
13331340
reco = {
13341341
# skip the @pixelTrackingOnlyValidation which cannot run together with the full reconstruction
13351342
'-s': 'RAW2DIGI:RawToDigi+RawToDigi_pixelOnly,L1Reco,RECO:reconstruction+reconstruction_pixelTrackingOnly,RECOSIM,PAT,VALIDATION:@standardValidation+@miniAODValidation,DQM:@standardDQM+@ExtraHLT+@miniAODDQM+@pixelTrackingOnlyDQM',
1336-
'--procModifiers': 'alpaka,pixelNtupletFit'
1343+
'--procModifiers': 'alpaka,pixelNtupletFit',
1344+
'--customise' : 'HeterogeneousCore/AlpakaServices/customiseAlpakaServiceMemoryFilling.customiseAlpakaServiceMemoryFilling',
13371345
},
13381346
harvest = {
13391347
# skip the @pixelTrackingOnlyDQM harvesting
@@ -1646,16 +1654,15 @@ def setup_(self, step, stepName, stepDict, k, properties):
16461654
offset = 0.597,
16471655
)
16481656

1649-
1650-
# Alpaka workflows
1651-
16521657
upgradeWFs['PatatrackPixelOnlyAlpaka'] = PatatrackWorkflow(
16531658
digi = {
16541659
'--procModifiers': 'alpaka', # alpaka modifier activates customiseHLTForAlpaka
1660+
'--customise' : 'HeterogeneousCore/AlpakaServices/customiseAlpakaServiceMemoryFilling.customiseAlpakaServiceMemoryFilling',
16551661
},
16561662
reco = {
16571663
'-s': 'RAW2DIGI:RawToDigi_pixelOnly,RECO:reconstruction_pixelTrackingOnly,VALIDATION:@pixelTrackingOnlyValidation,DQM:@pixelTrackingOnlyDQM',
1658-
'--procModifiers': 'alpaka'
1664+
'--procModifiers': 'alpaka',
1665+
'--customise' : 'HeterogeneousCore/AlpakaServices/customiseAlpakaServiceMemoryFilling.customiseAlpakaServiceMemoryFilling',
16591666
},
16601667
harvest = {
16611668
'-s': 'HARVESTING:@trackingOnlyValidation+@pixelTrackingOnlyDQM'
@@ -1667,10 +1674,12 @@ def setup_(self, step, stepName, stepDict, k, properties):
16671674
upgradeWFs['PatatrackPixelOnlyAlpakaValidation'] = PatatrackWorkflow(
16681675
digi = {
16691676
'--procModifiers': 'alpaka', # alpaka modifier activates customiseHLTForAlpaka
1677+
'--customise' : 'HeterogeneousCore/AlpakaServices/customiseAlpakaServiceMemoryFilling.customiseAlpakaServiceMemoryFilling',
16701678
},
16711679
reco = {
16721680
'-s': 'RAW2DIGI:RawToDigi_pixelOnly,RECO:reconstruction_pixelTrackingOnly,VALIDATION:@pixelTrackingOnlyValidation,DQM:@pixelTrackingOnlyDQM',
1673-
'--procModifiers': 'alpakaValidation'
1681+
'--procModifiers': 'alpakaValidation',
1682+
'--customise' : 'HeterogeneousCore/AlpakaServices/customiseAlpakaServiceMemoryFilling.customiseAlpakaServiceMemoryFilling',
16741683
},
16751684
harvest = {
16761685
'-s': 'HARVESTING:@trackingOnlyValidation+@pixelTrackingOnlyDQM'

HeterogeneousCore/AlpakaCore/README.md

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -358,13 +358,8 @@ There are a few different options for using Alpaka-based modules in the CMSSW co
358358
In all cases the configuration must load the necessary `ProcessAccelerator` objects (see below) For accelerators used in production, these are aggregated in `Configuration.StandardSequences.Accelerators_cff`. The `runTheMatrix.py` handles the loading of this `Accelerators_cff` automatically. The HLT menus also load the necessary `ProcessAccelerator`s.
359359
```python
360360
## Load explicitly
361-
# One ProcessAccelerator for each accelerator technology
361+
# One ProcessAccelerator for each accelerator technology, plus a generic one for Alpaka
362362
process.load("Configuration.StandardSequences.Accelerators_cff")
363-
364-
# And one ProcessAccelerator for Alpaka
365-
# (eventually to be absorbed to Accelerators_cff)
366-
process.load("HeterogeneousCore.AlpakaCore.ProcessAcceleratorAlpaka_cfi")
367-
368363
```
369364

370365
### Explicit module type (non-portable)

HeterogeneousCore/AlpakaInterface/interface/CachingAllocator.h

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -122,8 +122,10 @@ namespace cms::alpakatools {
122122
explicit CachingAllocator(
123123
Device const& device,
124124
AllocatorConfig const& config,
125-
bool reuseSameQueueAllocations, // reuse non-ready allocations if they are in the same queue as the new one;
126-
// this is safe only if all memory operations are scheduled in the same queue
125+
bool reuseSameQueueAllocations, // Reuse non-ready allocations if they are in the same queue as the new one;
126+
// this is safe only if all memory operations are scheduled in the same queue.
127+
// In particular, this is not safe if the memory will be accessed without using
128+
// any queue, like host memory accessed directly or with immediate operations.
127129
bool debug = false)
128130
: device_(device),
129131
binGrowth_(config.binGrowth),
@@ -175,6 +177,22 @@ namespace cms::alpakatools {
175177
return cachedBytes_;
176178
}
177179

180+
// Fill a memory buffer with the specified bye value.
181+
// If the underlying device is the host and the allocator is configured to support immediate
182+
// (non queue-ordered) operations, fill the memory synchronously using std::memset.
183+
// Otherwise, let the alpaka queue schedule the operation.
184+
//
185+
// This is not used for deallocation/caching, because the memory may still be in use until the
186+
// corresponding event is reached.
187+
void immediateOrAsyncMemset(Queue queue, Buffer buffer, uint8_t value) {
188+
// host-only
189+
if (std::is_same_v<Device, alpaka::DevCpu> and not reuseSameQueueAllocations_) {
190+
std::memset(buffer.data(), value, alpaka::getExtentProduct(buffer) * sizeof(alpaka::Elem<Buffer>));
191+
} else {
192+
alpaka::memset(queue, buffer, value);
193+
}
194+
}
195+
178196
// Allocate given number of bytes on the current device associated to given queue
179197
void* allocate(size_t bytes, Queue queue) {
180198
// create a block descriptor for the requested allocation
@@ -187,15 +205,15 @@ namespace cms::alpakatools {
187205
if (tryReuseCachedBlock(block)) {
188206
// fill the re-used memory block with a pattern
189207
if (fillReallocations_) {
190-
alpaka::memset(*block.queue, *block.buffer, fillReallocationValue_);
208+
immediateOrAsyncMemset(*block.queue, *block.buffer, fillReallocationValue_);
191209
} else if (fillAllocations_) {
192-
alpaka::memset(*block.queue, *block.buffer, fillAllocationValue_);
210+
immediateOrAsyncMemset(*block.queue, *block.buffer, fillAllocationValue_);
193211
}
194212
} else {
195213
allocateNewBlock(block);
196214
// fill the newly allocated memory block with a pattern
197215
if (fillAllocations_) {
198-
alpaka::memset(*block.queue, *block.buffer, fillAllocationValue_);
216+
immediateOrAsyncMemset(*block.queue, *block.buffer, fillAllocationValue_);
199217
}
200218
}
201219

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import FWCore.ParameterSet.Config as cms
2+
3+
def customiseAlpakaServiceMemoryFilling(process):
4+
# load all variants of the AlpakaService
5+
# ProcessAcceleratorAlpaka will take care of removing the unused ones
6+
7+
process.load('HeterogeneousCore.AlpakaServices.AlpakaServiceSerialSync_cfi')
8+
9+
# load the CUDAService and the AlpakaService for the CUDA backend, if available
10+
try:
11+
process.load('HeterogeneousCore.CUDAServices.CUDAService_cfi')
12+
process.load('HeterogeneousCore.AlpakaServices.AlpakaServiceCudaAsync_cfi')
13+
except:
14+
pass
15+
16+
# load the ROCmService and the AlpakaService for the ROCm backend, if available
17+
try:
18+
process.load('HeterogeneousCore.ROCmServices.ROCmService_cfi')
19+
process.load('HeterogeneousCore.AlpakaServices.AlpakaServiceROCmAsync_cfi')
20+
except:
21+
pass
22+
23+
# enable junk memory filling for all AlpakaServices
24+
for name in process.services_():
25+
if name.startswith('AlpakaService'):
26+
service = getattr(process, name)
27+
# host allocator
28+
service.hostAllocator.fillAllocations = True
29+
service.hostAllocator.fillAllocationValue = 0xB4
30+
service.hostAllocator.fillReallocations = True
31+
service.hostAllocator.fillReallocationValue = 0x78
32+
service.hostAllocator.fillDeallocations = True
33+
service.hostAllocator.fillDeallocationValue = 0x4B
34+
service.hostAllocator.fillCaches = True
35+
service.hostAllocator.fillCacheValue = 0x87
36+
# device allocator
37+
service.deviceAllocator.fillAllocations = True
38+
service.deviceAllocator.fillAllocationValue = 0xA5
39+
service.deviceAllocator.fillReallocations = True
40+
service.deviceAllocator.fillReallocationValue = 0x69
41+
service.deviceAllocator.fillDeallocations = True
42+
service.deviceAllocator.fillDeallocationValue = 0x5A
43+
service.deviceAllocator.fillCaches = True
44+
service.deviceAllocator.fillCacheValue = 0x96
45+
46+
return process

0 commit comments

Comments
 (0)