Skip to content

Commit 34eb6f4

Browse files
authored
ITS GPU: Make threads and blocks configurable from CLI (AliceO2Group#13596)
1 parent 415a7b5 commit 34eb6f4

File tree

11 files changed

+53
-64
lines changed

11 files changed

+53
-64
lines changed

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,6 @@ class TimeFrameGPU : public TimeFrame
184184
void registerHostMemory(const int);
185185
void unregisterHostMemory(const int);
186186
void initialise(const int, const TrackingParameters&, const int, IndexTableUtils* utils = nullptr, const TimeFrameGPUParameters* pars = nullptr);
187-
void initialiseHybrid(const int, const TrackingParameters&, const int, IndexTableUtils* utils = nullptr, const TimeFrameGPUParameters* pars = nullptr);
188187
void initDevice(IndexTableUtils*, const TrackingParameters& trkParam, const TimeFrameGPUParameters&, const int, const int);
189188
void initDeviceSAFitting();
190189
void loadTrackingFrameInfoDevice(const int);

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ void trackSeedHandler(CellSeed* trackSeeds,
5959
float maxChi2ClusterAttachment,
6060
float maxChi2NDF,
6161
const o2::base::Propagator* propagator,
62-
const o2::base::PropagatorF::MatCorrType matCorrType);
62+
const o2::base::PropagatorF::MatCorrType matCorrType,
63+
const int nBlocks,
64+
const int nThreads);
6365
} // namespace o2::its
6466
#endif // ITSTRACKINGGPU_TRACKINGKERNELS_H_

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -362,27 +362,6 @@ void TimeFrameGPU<nLayers>::initialise(const int iteration,
362362
const int maxLayers,
363363
IndexTableUtils* utils,
364364
const TimeFrameGPUParameters* gpuParam)
365-
{
366-
mGpuStreams.resize(mGpuParams.nTimeFrameChunks);
367-
mHostNTracklets.resize((nLayers - 1) * mGpuParams.nTimeFrameChunks, 0);
368-
mHostNCells.resize((nLayers - 2) * mGpuParams.nTimeFrameChunks, 0);
369-
370-
auto init = [&]() -> void {
371-
this->initDevice(utils, trkParam, *gpuParam, maxLayers, iteration);
372-
};
373-
std::thread t1{init};
374-
RANGE("tf_cpu_initialisation", 1);
375-
o2::its::TimeFrame::initialise(iteration, trkParam, maxLayers);
376-
// registerHostMemory(maxLayers);
377-
t1.join();
378-
}
379-
380-
template <int nLayers>
381-
void TimeFrameGPU<nLayers>::initialiseHybrid(const int iteration,
382-
const TrackingParameters& trkParam,
383-
const int maxLayers,
384-
IndexTableUtils* utils,
385-
const TimeFrameGPUParameters* gpuParam)
386365
{
387366
mGpuStreams.resize(mGpuParams.nTimeFrameChunks);
388367
o2::its::TimeFrame::initialise(iteration, trkParam, maxLayers);

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
#include "ITStrackingGPU/TrackerTraitsGPU.h"
2222
#include "ITStrackingGPU/TrackingKernels.h"
23+
#include "ITStracking/TrackingConfigParam.h"
2324

2425
namespace o2::its
2526
{
@@ -28,7 +29,7 @@ constexpr int UnusedIndex{-1};
2829
template <int nLayers>
2930
void TrackerTraitsGPU<nLayers>::initialiseTimeFrame(const int iteration)
3031
{
31-
mTimeFrameGPU->initialiseHybrid(iteration, mTrkParams[iteration], nLayers);
32+
mTimeFrameGPU->initialise(iteration, mTrkParams[iteration], nLayers);
3233
mTimeFrameGPU->loadTrackingFrameInfoDevice(iteration);
3334
}
3435

@@ -397,7 +398,7 @@ void TrackerTraitsGPU<nLayers>::findRoads(const int iteration)
397398
}
398399
mTimeFrameGPU->createTrackITSExtDevice(trackSeeds);
399400
mTimeFrameGPU->loadTrackSeedsDevice(trackSeeds);
400-
401+
auto& conf = o2::its::ITSGpuTrackingParamConfig::Instance();
401402
trackSeedHandler(
402403
mTimeFrameGPU->getDeviceTrackSeeds(), // CellSeed* trackSeeds,
403404
mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(), // TrackingFrameInfo** foundTrackingFrameInfo,
@@ -408,7 +409,9 @@ void TrackerTraitsGPU<nLayers>::findRoads(const int iteration)
408409
mTrkParams[0].MaxChi2ClusterAttachment, // float maxChi2ClusterAttachment,
409410
mTrkParams[0].MaxChi2NDF, // float maxChi2NDF,
410411
mTimeFrameGPU->getDevicePropagator(), // const o2::base::Propagator* propagator
411-
mCorrType); // o2::base::PropagatorImpl<float>::MatCorrType
412+
mCorrType, // o2::base::PropagatorImpl<float>::MatCorrType
413+
conf.nBlocks,
414+
conf.nThreads);
412415

413416
mTimeFrameGPU->downloadTrackITSExtDevice(trackSeeds);
414417

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -717,9 +717,11 @@ void trackSeedHandler(CellSeed* trackSeeds,
717717
float maxChi2ClusterAttachment,
718718
float maxChi2NDF,
719719
const o2::base::Propagator* propagator,
720-
const o2::base::PropagatorF::MatCorrType matCorrType)
720+
const o2::base::PropagatorF::MatCorrType matCorrType,
721+
const int nBlocks,
722+
const int nThreads)
721723
{
722-
gpu::fitTrackSeedsKernel<<<20, 256>>>(
724+
gpu::fitTrackSeedsKernel<<<nBlocks, nThreads>>>(
723725
trackSeeds, // CellSeed* trackSeeds,
724726
foundTrackingFrameInfo, // TrackingFrameInfo** foundTrackingFrameInfo,
725727
tracks, // o2::its::TrackITSExt* tracks,
@@ -734,4 +736,4 @@ void trackSeedHandler(CellSeed* trackSeeds,
734736
gpuCheckError(cudaPeekAtLastError());
735737
gpuCheckError(cudaDeviceSynchronize());
736738
}
737-
} // namespace o2::its
739+
} // namespace o2::its

Detectors/ITSMFT/ITS/tracking/GPU/cuda/VertexerTraitsGPU.cxx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ void VertexerTraitsGPU::initialise(const TrackingParameters& trackingParams, con
3737
{
3838
mTimeFrameGPU->initialise(0, trackingParams, 3, &mIndexTableUtils, &mTfGPUParams);
3939
}
40+
4041
void VertexerTraitsGPU::updateVertexingParameters(const std::vector<VertexingParameters>& vrtPar, const TimeFrameGPUParameters& tfPar)
4142
{
4243
mVrtParams = vrtPar;

Detectors/ITSMFT/ITS/tracking/GPU/cuda/VertexingKernels.cu

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,23 @@ void trackletFinderHandler(const Cluster* clustersNextLayer, // 0 2
3939
const unsigned int startRofId,
4040
const unsigned int rofSize,
4141
const float phiCut,
42-
const size_t maxTrackletsPerCluster)
42+
const unsigned int maxTrackletsPerCluster,
43+
const int nBlocks,
44+
const int nThreads)
4345
{
46+
gpu::trackleterKernelMultipleRof<Mode><<<nBlocks, nThreads>>>(
47+
clustersNextLayer, // const Cluster* clustersNextLayer, // 0 2
48+
clustersCurrentLayer, // const Cluster* clustersCurrentLayer, // 1 1
49+
sizeNextLClusters, // const int* sizeNextLClusters,
50+
sizeCurrentLClusters, // const int* sizeCurrentLClusters,
51+
nextIndexTables, // const int* nextIndexTables,
52+
Tracklets, // Tracklet* Tracklets,
53+
foundTracklets, // int* foundTracklets,
54+
utils, // const IndexTableUtils* utils,
55+
startRofId, // const unsigned int startRofId,
56+
rofSize, // const unsigned int rofSize,
57+
phiCut, // const float phiCut,
58+
maxTrackletsPerCluster); // const unsigned int maxTrackletsPerCluster = 1e2
4459
}
4560
/*
4661
GPUd() float smallestAngleDifference(float a, float b)
@@ -96,7 +111,7 @@ GPUd() void printOnBlock(const unsigned int bId, const char* str, Args... args)
96111
}
97112
}
98113
99-
GPUg() void printBufferOnThread(const int* v, size_t size, const int len = 150, const unsigned int tId = 0)
114+
GPUg() void printBufferOnThread(const int* v, unsigned int size, const int len = 150, const unsigned int tId = 0)
100115
{
101116
if (blockIdx.x * blockDim.x + threadIdx.x == tId) {
102117
for (int i{0}; i < size; ++i) {
@@ -109,7 +124,7 @@ GPUg() void printBufferOnThread(const int* v, size_t size, const int len = 150,
109124
}
110125
}
111126
112-
GPUg() void printBufferOnThreadF(const float* v, size_t size, const unsigned int tId = 0)
127+
GPUg() void printBufferOnThreadF(const float* v, unsigned int size, const unsigned int tId = 0)
113128
{
114129
if (blockIdx.x * blockDim.x + threadIdx.x == tId) {
115130
printf("vector :");
@@ -127,7 +142,7 @@ GPUg() void resetTrackletsKernel(Tracklet* tracklets, const int nTracklets)
127142
}
128143
}
129144
130-
GPUg() void dumpFoundTrackletsKernel(const Tracklet* tracklets, const int* nTracklet, const size_t nClustersMiddleLayer, const int maxTrackletsPerCluster)
145+
GPUg() void dumpFoundTrackletsKernel(const Tracklet* tracklets, const int* nTracklet, const unsigned int nClustersMiddleLayer, const int maxTrackletsPerCluster)
131146
{
132147
for (int iCurrentLayerClusterIndex = blockIdx.x * blockDim.x + threadIdx.x; iCurrentLayerClusterIndex < nClustersMiddleLayer; iCurrentLayerClusterIndex += blockDim.x * gridDim.x) {
133148
const int stride{iCurrentLayerClusterIndex * maxTrackletsPerCluster};
@@ -160,15 +175,15 @@ GPUg() void trackleterKernelSingleRof(
160175
int* foundTracklets,
161176
const IndexTableUtils* utils,
162177
const short rofId,
163-
const size_t maxTrackletsPerCluster = 1e2)
178+
const unsigned int maxTrackletsPerCluster = 1e2)
164179
{
165180
const int phiBins{utils->getNphiBins()};
166181
const int zBins{utils->getNzBins()};
167182
// loop on layer1 clusters
168183
for (int iCurrentLayerClusterIndex = blockIdx.x * blockDim.x + threadIdx.x; iCurrentLayerClusterIndex < sizeCurrentLClusters; iCurrentLayerClusterIndex += blockDim.x * gridDim.x) {
169184
if (iCurrentLayerClusterIndex < sizeCurrentLClusters) {
170185
unsigned int storedTracklets{0};
171-
const size_t stride{iCurrentLayerClusterIndex * maxTrackletsPerCluster};
186+
const unsigned int stride{iCurrentLayerClusterIndex * maxTrackletsPerCluster};
172187
const Cluster& currentCluster = clustersCurrentLayer[iCurrentLayerClusterIndex];
173188
const int4 selectedBinsRect{VertexerTraits::getBinsRect(currentCluster, (int)Mode, 0.f, 50.f, phiCut / 2, *utils)};
174189
if (selectedBinsRect.x != 0 || selectedBinsRect.y != 0 || selectedBinsRect.z != 0 || selectedBinsRect.w != 0) {
@@ -218,7 +233,7 @@ GPUg() void trackleterKernelMultipleRof(
218233
const short startRofId,
219234
const short rofSize,
220235
const float phiCut,
221-
const size_t maxTrackletsPerCluster = 1e2)
236+
const unsigned int maxTrackletsPerCluster = 1e2)
222237
{
223238
const int phiBins{utils->getNphiBins()};
224239
const int zBins{utils->getNzBins()};
@@ -235,7 +250,7 @@ GPUg() void trackleterKernelMultipleRof(
235250
// single rof loop on layer1 clusters
236251
for (int iCurrentLayerClusterIndex = threadIdx.x; iCurrentLayerClusterIndex < nClustersCurrentLayerRof; iCurrentLayerClusterIndex += blockDim.x) {
237252
unsigned int storedTracklets{0};
238-
const size_t stride{iCurrentLayerClusterIndex * maxTrackletsPerCluster};
253+
const unsigned int stride{iCurrentLayerClusterIndex * maxTrackletsPerCluster};
239254
const Cluster& currentCluster = clustersCurrentLayerRof[iCurrentLayerClusterIndex];
240255
const int4 selectedBinsRect{VertexerTraits::getBinsRect(currentCluster, (int)Mode, 0.f, 50.f, phiCut / 2, *utils)};
241256
if (selectedBinsRect.x != 0 || selectedBinsRect.y != 0 || selectedBinsRect.z != 0 || selectedBinsRect.w != 0) {
@@ -276,7 +291,7 @@ template <bool initRun>
276291
GPUg() void trackletSelectionKernelSingleRof(
277292
const Cluster* clusters0,
278293
const Cluster* clusters1,
279-
const size_t nClustersMiddleLayer,
294+
const unsigned int nClustersMiddleLayer,
280295
Tracklet* tracklets01,
281296
Tracklet* tracklets12,
282297
const int* nFoundTracklet01,
@@ -436,7 +451,7 @@ GPUg() void computeCentroidsKernel(
436451
Line* lines,
437452
int* nFoundLines,
438453
int* nExclusiveFoundLines,
439-
const size_t nClustersMiddleLayer,
454+
const unsigned int nClustersMiddleLayer,
440455
float* centroids,
441456
const float lowHistX,
442457
const float highHistX,
@@ -446,7 +461,7 @@ GPUg() void computeCentroidsKernel(
446461
{
447462
const int nLines = nExclusiveFoundLines[nClustersMiddleLayer - 1] + nFoundLines[nClustersMiddleLayer - 1];
448463
const int maxIterations{nLines * (nLines - 1) / 2};
449-
for (size_t currentThreadIndex = blockIdx.x * blockDim.x + threadIdx.x; currentThreadIndex < maxIterations; currentThreadIndex += blockDim.x * gridDim.x) {
464+
for (unsigned int currentThreadIndex = blockIdx.x * blockDim.x + threadIdx.x; currentThreadIndex < maxIterations; currentThreadIndex += blockDim.x * gridDim.x) {
450465
int iFirstLine = currentThreadIndex / nLines;
451466
int iSecondLine = currentThreadIndex % nLines;
452467
// All unique pairs
@@ -496,7 +511,7 @@ GPUg() void computeZCentroidsKernel(
496511
const int binOpeningX,
497512
const int binOpeningY)
498513
{
499-
for (size_t currentThreadIndex = blockIdx.x * blockDim.x + threadIdx.x; currentThreadIndex < nLines; currentThreadIndex += blockDim.x * gridDim.x) {
514+
for (unsigned int currentThreadIndex = blockIdx.x * blockDim.x + threadIdx.x; currentThreadIndex < nLines; currentThreadIndex += blockDim.x * gridDim.x) {
500515
if (tmpVtX[0].value || tmpVtX[1].value) {
501516
float tmpX{lowHistX + tmpVtX[0].key * binSizeHistX + binSizeHistX / 2};
502517
int sumWX{tmpVtX[0].value};
@@ -543,7 +558,7 @@ GPUg() void computeVertexKernel(
543558
const int minContributors,
544559
const int binOpeningZ)
545560
{
546-
for (size_t currentThreadIndex = blockIdx.x * blockDim.x + threadIdx.x; currentThreadIndex < binOpeningZ; currentThreadIndex += blockDim.x * gridDim.x) {
561+
for (unsigned int currentThreadIndex = blockIdx.x * blockDim.x + threadIdx.x; currentThreadIndex < binOpeningZ; currentThreadIndex += blockDim.x * gridDim.x) {
547562
if (currentThreadIndex == 0) {
548563
if (tmpVertexBins[2].value > 1 && (tmpVertexBins[0].value || tmpVertexBins[1].value)) {
549564
float z{lowHistZ + tmpVertexBins[2].key * binSizeHistZ + binSizeHistZ / 2};

Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@ struct VertexerParamConfig : public o2::conf::ConfigurableParamHelper<VertexerPa
5555
};
5656

5757
struct TrackerParamConfig : public o2::conf::ConfigurableParamHelper<TrackerParamConfig> {
58-
5958
// Use TGeo for mat. budget
6059
bool useMatCorrTGeo = false;
6160
bool useFastMaterial = false;
@@ -89,24 +88,13 @@ struct TrackerParamConfig : public o2::conf::ConfigurableParamHelper<TrackerPara
8988
O2ParamDef(TrackerParamConfig, "ITSCATrackerParam");
9089
};
9190

92-
struct GpuRecoParamConfig : public o2::conf::ConfigurableParamHelper<GpuRecoParamConfig> {
91+
struct ITSGpuTrackingParamConfig : public o2::conf::ConfigurableParamHelper<ITSGpuTrackingParamConfig> {
9392
// GPU-specific parameters
94-
size_t tmpCUBBufferSize = 1e5; // In average in pp events there are required 4096 bytes
95-
size_t maxTrackletsPerCluster = 1e2;
96-
size_t clustersPerLayerCapacity = 2.5e5;
97-
size_t clustersPerROfCapacity = 1.5e3;
98-
// size_t trackletsCapacity = maxTrackletsPerCluster * clustersPerLayerCapacity;
99-
size_t validatedTrackletsCapacity = 1e5;
100-
size_t cellsLUTsize = validatedTrackletsCapacity;
101-
size_t maxNeighboursSize = 1e4;
102-
size_t neighboursLUTsize = maxNeighboursSize;
103-
size_t maxRoadPerRofSize = 5e2; // pp!
104-
size_t maxLinesCapacity = 1e2;
105-
size_t maxVerticesCapacity = 5e4;
106-
size_t nTimeFramePartitions = 3;
107-
int maxGPUMemoryGB = -1;
93+
unsigned int tmpCUBBufferSize = 1e5; // In average in pp events there are required 4096 bytes
94+
int nBlocks = 20;
95+
int nThreads = 256;
10896

109-
O2ParamDef(GpuRecoParamConfig, "ITSGpuRecoParam");
97+
O2ParamDef(ITSGpuTrackingParamConfig, "ITSGpuTrackingParam");
11098
};
11199

112100
} // namespace its

Detectors/ITSMFT/ITS/tracking/src/TrackingConfigParam.cxx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,10 @@ namespace its
1818
{
1919
static auto& sVertexerParamITS = o2::its::VertexerParamConfig::Instance();
2020
static auto& sCATrackerParamITS = o2::its::TrackerParamConfig::Instance();
21-
static auto& sGpuRecoParamITS = o2::its::GpuRecoParamConfig::Instance();
21+
static auto& sGpuRecoParamITS = o2::its::ITSGpuTrackingParamConfig::Instance();
2222

2323
O2ParamImpl(o2::its::VertexerParamConfig);
2424
O2ParamImpl(o2::its::TrackerParamConfig);
25-
O2ParamImpl(o2::its::GpuRecoParamConfig);
25+
O2ParamImpl(o2::its::ITSGpuTrackingParamConfig);
2626
} // namespace its
2727
} // namespace o2

Detectors/ITSMFT/ITS/tracking/src/TrackingLinkDef.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
#pragma link C++ class o2::its::TrackerParamConfig + ;
3131
#pragma link C++ class o2::conf::ConfigurableParamHelper < o2::its::TrackerParamConfig> + ;
3232

33-
#pragma link C++ class o2::its::GpuRecoParamConfig + ;
34-
#pragma link C++ class o2::conf::ConfigurableParamHelper < o2::its::GpuRecoParamConfig> + ;
33+
#pragma link C++ class o2::its::ITSGpuTrackingParamConfig + ;
34+
#pragma link C++ class o2::conf::ConfigurableParamHelper < o2::its::ITSGpuTrackingParamConfig> + ;
3535

3636
#endif

0 commit comments

Comments
 (0)