Skip to content

Commit 33b4212

Browse files
authored
ITS-GPU: Move Tracklet finder on GPU (AliceO2Group#13737)
* Fix hybrid vertexer printouts * Move multiplicity mask to a vector<uint8_t> * Add gpuSpan * Debugging getSpan * Checkpointing * Fix access in tracklet finding * Fix tracklet LUTs issue * Debugging small discrepancies * Fix bad PhiBins pick * Add tracklet counting * Fix indices for used clusters * Add tracklet writing on the buffer * tracklets on gpu * Tracklet finder on GPU
1 parent 4bffbfa commit 33b4212

File tree

13 files changed

+848
-607
lines changed

13 files changed

+848
-607
lines changed

Detectors/ITSMFT/ITS/reconstruction/include/ITSReconstruction/FastMultEst.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ struct FastMultEst {
4545

4646
static uint32_t getCurrentRandomSeed();
4747
int selectROFs(const gsl::span<const o2::itsmft::ROFRecord> rofs, const gsl::span<const o2::itsmft::CompClusterExt> clus,
48-
const gsl::span<const o2::itsmft::PhysTrigger> trig, std::vector<bool>& sel);
48+
const gsl::span<const o2::itsmft::PhysTrigger> trig, std::vector<uint8_t>& sel);
4949

5050
void fillNClPerLayer(const gsl::span<const o2::itsmft::CompClusterExt>& clusters);
5151
float process(const std::array<int, NLayers> ncl)

Detectors/ITSMFT/ITS/reconstruction/src/FastMultEst.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ float FastMultEst::processNoiseImposed(const std::array<int, NLayers> ncl)
125125
}
126126

127127
int FastMultEst::selectROFs(const gsl::span<const o2::itsmft::ROFRecord> rofs, const gsl::span<const o2::itsmft::CompClusterExt> clus,
128-
const gsl::span<const o2::itsmft::PhysTrigger> trig, std::vector<bool>& sel)
128+
const gsl::span<const o2::itsmft::PhysTrigger> trig, std::vector<uint8_t>& sel)
129129
{
130130
int nrof = rofs.size(), nsel = 0;
131131
const auto& multEstConf = FastMultEstConfig::Instance(); // parameters for mult estimation and cuts

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h

Lines changed: 46 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,19 @@ class TimeFrameGPU : public TimeFrame
5151
void initialise(const int, const TrackingParameters&, const int, IndexTableUtils* utils = nullptr, const TimeFrameGPUParameters* pars = nullptr);
5252
void initDevice(IndexTableUtils*, const TrackingParameters& trkParam, const TimeFrameGPUParameters&, const int, const int);
5353
void initDeviceSAFitting();
54+
void loadIndexTableUtils(const int);
5455
void loadTrackingFrameInfoDevice(const int);
5556
void loadUnsortedClustersDevice(const int);
5657
void loadClustersDevice(const int);
58+
void loadClustersIndexTables(const int iteration);
59+
void createUsedClustersDevice(const int);
60+
void loadUsedClustersDevice();
61+
void loadROframeClustersDevice(const int);
62+
void loadMultiplicityCutMask(const int);
63+
void loadVertices(const int);
64+
65+
///
66+
void createTrackletsLUTDevice(const int);
5767
void loadTrackletsDevice();
5868
void loadTrackletsLUTDevice();
5969
void loadCellsDevice();
@@ -62,6 +72,7 @@ class TimeFrameGPU : public TimeFrame
6272
void loadTrackSeedsChi2Device();
6373
void loadRoadsDevice();
6474
void loadTrackSeedsDevice(std::vector<CellSeed>&);
75+
void createTrackletsBuffers();
6576
void createCellsBuffers(const int);
6677
void createCellsDevice();
6778
void createCellsLUTDevice();
@@ -93,7 +104,7 @@ class TimeFrameGPU : public TimeFrame
93104
std::vector<std::vector<o2::MCCompLabel>>& getLabelsInChunks() { return mLabelsInChunks; }
94105
int getNAllocatedROFs() const { return mNrof; } // Allocated means maximum nROF for each chunk while populated is the number of loaded ones.
95106
StaticTrackingParameters<nLayers>* getDeviceTrackingParameters() { return mTrackingParamsDevice; }
96-
Vertex* getDeviceVertices() { return mVerticesDevice; }
107+
Vertex* getDeviceVertices() { return mPrimaryVerticesDevice; }
97108
int* getDeviceROFramesPV() { return mROFramesPVDevice; }
98109
unsigned char* getDeviceUsedClusters(const int);
99110
const o2::base::Propagator* getChainPropagator();
@@ -107,26 +118,32 @@ class TimeFrameGPU : public TimeFrame
107118
const TrackingFrameInfo** getDeviceArrayTrackingFrameInfo() const { return mTrackingFrameInfoDeviceArray; }
108119
const Cluster** getDeviceArrayClusters() const { return mClustersDeviceArray; }
109120
const Cluster** getDeviceArrayUnsortedClusters() const { return mUnsortedClustersDeviceArray; }
110-
const Tracklet** getDeviceArrayTracklets() const { return mTrackletsDeviceArray; }
111-
const int** getDeviceArrayTrackletsLUT() const { return mTrackletsLUTDeviceArray; }
121+
const int** getDeviceArrayClustersIndexTables() const { return mClustersIndexTablesDeviceArray; }
122+
std::vector<unsigned int> getClusterSizes();
123+
const unsigned char** getDeviceArrayUsedClusters() const { return mUsedClustersDeviceArray; }
124+
const int** getDeviceROframeClusters() const { return mROFrameClustersDeviceArray; }
125+
Tracklet** getDeviceArrayTracklets() { return mTrackletsDeviceArray; }
126+
int** getDeviceArrayTrackletsLUT() const { return mTrackletsLUTDeviceArray; }
112127
int** getDeviceArrayCellsLUT() const { return mCellsLUTDeviceArray; }
113128
int** getDeviceArrayNeighboursCellLUT() const { return mNeighboursCellLUTDeviceArray; }
114129
CellSeed** getDeviceArrayCells() const { return mCellsDeviceArray; }
115130
CellSeed* getDeviceTrackSeeds() { return mTrackSeedsDevice; }
116131
o2::track::TrackParCovF** getDeviceArrayTrackSeeds() { return mCellSeedsDeviceArray; }
117132
float** getDeviceArrayTrackSeedsChi2() { return mCellSeedsChi2DeviceArray; }
118133
int* getDeviceNeighboursIndexTables(const int layer) { return mNeighboursIndexTablesDevice[layer]; }
134+
uint8_t* getDeviceMultCutMask() { return mMultMaskDevice; }
119135

120136
void setDevicePropagator(const o2::base::PropagatorImpl<float>*) override;
121137

122138
// Host-specific getters
123-
gsl::span<int> getHostNTracklets(const int chunkId);
124-
gsl::span<int> getHostNCells(const int chunkId);
139+
gsl::span<int, nLayers - 1> getNTracklets() { return mNTracklets; }
140+
gsl::span<int, nLayers - 2> getNCells() { return mNCells; }
125141

126142
// Host-available device getters
143+
gsl::span<int*> getDeviceTrackletsLUTs() { return mTrackletsLUTDevice; }
127144
gsl::span<int*> getDeviceCellLUTs() { return mCellsLUTDevice; }
145+
gsl::span<Tracklet*> getDeviceTracklet() { return mTrackletsDevice; }
128146
gsl::span<CellSeed*> getDeviceCells() { return mCellsDevice; }
129-
gsl::span<int, nLayers - 2> getNCellsDevice() { return mNCells; }
130147

131148
private:
132149
void allocMemAsync(void**, size_t, Stream*, bool); // Abstract owned and unowned memory allocations
@@ -136,31 +153,37 @@ class TimeFrameGPU : public TimeFrame
136153
StaticTrackingParameters<nLayers> mStaticTrackingParams;
137154

138155
// Host-available device buffer sizes
156+
std::array<int, nLayers - 1> mNTracklets;
139157
std::array<int, nLayers - 2> mNCells;
140158

141159
// Device pointers
142160
StaticTrackingParameters<nLayers>* mTrackingParamsDevice;
143161
IndexTableUtils* mIndexTableUtilsDevice;
144-
std::array<int*, nLayers> mROFramesClustersDevice;
145-
std::array<unsigned char*, nLayers> mUsedClustersDevice;
146-
Vertex* mVerticesDevice;
147-
int* mROFramesPVDevice;
148162

149163
// Hybrid pref
164+
uint8_t* mMultMaskDevice;
165+
Vertex* mPrimaryVerticesDevice;
166+
int* mROFramesPVDevice;
150167
std::array<Cluster*, nLayers> mClustersDevice;
151168
std::array<Cluster*, nLayers> mUnsortedClustersDevice;
169+
std::array<int*, nLayers> mClustersIndexTablesDevice;
170+
std::array<unsigned char*, nLayers> mUsedClustersDevice;
171+
std::array<int*, nLayers> mROFramesClustersDevice;
152172
const Cluster** mClustersDeviceArray;
153173
const Cluster** mUnsortedClustersDeviceArray;
174+
const int** mClustersIndexTablesDeviceArray;
175+
const unsigned char** mUsedClustersDeviceArray;
176+
const int** mROFrameClustersDeviceArray;
154177
std::array<Tracklet*, nLayers - 1> mTrackletsDevice;
155-
const Tracklet** mTrackletsDeviceArray;
156-
const int** mTrackletsLUTDeviceArray;
157-
std::array<int*, nLayers - 2> mTrackletsLUTDevice;
178+
Tracklet** mTrackletsDeviceArray;
179+
std::array<int*, nLayers - 1> mTrackletsLUTDevice;
158180
std::array<int*, nLayers - 2> mCellsLUTDevice;
159181
std::array<int*, nLayers - 3> mNeighboursLUTDevice;
160182

161183
int** mCellsLUTDeviceArray;
162184
int** mNeighboursCellDeviceArray;
163185
int** mNeighboursCellLUTDeviceArray;
186+
int** mTrackletsLUTDeviceArray;
164187
std::array<CellSeed*, nLayers - 2> mCellsDevice;
165188
std::array<int*, nLayers - 2> mNeighboursIndexTablesDevice;
166189
CellSeed* mTrackSeedsDevice;
@@ -186,10 +209,6 @@ class TimeFrameGPU : public TimeFrame
186209
std::vector<std::vector<int>> mNVerticesInChunks;
187210
std::vector<std::vector<o2::MCCompLabel>> mLabelsInChunks;
188211

189-
// Host memory used only in GPU tracking
190-
std::vector<int> mHostNTracklets;
191-
std::vector<int> mHostNCells;
192-
193212
// Temporary buffer for storing output tracks from GPU tracking
194213
std::vector<TrackITSExt> mTrackITSExt;
195214
};
@@ -215,6 +234,16 @@ inline int TimeFrameGPU<nLayers>::getNClustersInRofSpan(const int rofIdstart, co
215234
{
216235
return static_cast<int>(mROFramesClusters[layerId][(rofIdstart + rofSpanSize) < mROFramesClusters.size() ? rofIdstart + rofSpanSize : mROFramesClusters.size() - 1] - mROFramesClusters[layerId][rofIdstart]);
217236
}
237+
238+
template <int nLayers>
239+
inline std::vector<unsigned int> TimeFrameGPU<nLayers>::getClusterSizes()
240+
{
241+
std::vector<unsigned int> sizes(mUnsortedClusters.size());
242+
std::transform(mUnsortedClusters.begin(), mUnsortedClusters.end(), sizes.begin(),
243+
[](const auto& v) { return static_cast<unsigned int>(v.size()); });
244+
return sizes;
245+
}
246+
218247
} // namespace gpu
219248
} // namespace its
220249
} // namespace o2

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h

Lines changed: 67 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,74 @@ GPUg() void fitTrackSeedsKernel(
5050
#endif
5151
} // namespace gpu
5252

53+
template <int nLayers = 7>
54+
void countTrackletsInROFsHandler(const IndexTableUtils* utils,
55+
const uint8_t* multMask,
56+
const int startROF,
57+
const int endROF,
58+
const int maxROF,
59+
const int deltaROF,
60+
const int vertexId,
61+
const Vertex* vertices,
62+
const int* rofPV,
63+
const int nVertices,
64+
const Cluster** clusters,
65+
std::vector<unsigned int> nClusters,
66+
const int** ROFClusters,
67+
const unsigned char** usedClusters,
68+
const int** clustersIndexTables,
69+
int** trackletsLUTs,
70+
gsl::span<int*> trackletsLUTsHost,
71+
const int iteration,
72+
const float NSigmaCut,
73+
std::vector<float>& phiCuts,
74+
const float resolutionPV,
75+
std::vector<float>& minR,
76+
std::vector<float>& maxR,
77+
std::vector<float>& resolutions,
78+
std::vector<float>& radii,
79+
std::vector<float>& mulScatAng,
80+
const int nBlocks,
81+
const int nThreads);
82+
83+
template <int nLayers = 7>
84+
void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
85+
const uint8_t* multMask,
86+
const int startROF,
87+
const int endROF,
88+
const int maxROF,
89+
const int deltaROF,
90+
const int vertexId,
91+
const Vertex* vertices,
92+
const int* rofPV,
93+
const int nVertices,
94+
const Cluster** clusters,
95+
std::vector<unsigned int> nClusters,
96+
const int** ROFClusters,
97+
const unsigned char** usedClusters,
98+
const int** clustersIndexTables,
99+
Tracklet** tracklets,
100+
gsl::span<Tracklet*> spanTracklets,
101+
gsl::span<int> nTracklets,
102+
int** trackletsLUTs,
103+
gsl::span<int*> trackletsLUTsHost,
104+
const int iteration,
105+
const float NSigmaCut,
106+
std::vector<float>& phiCuts,
107+
const float resolutionPV,
108+
std::vector<float>& minR,
109+
std::vector<float>& maxR,
110+
std::vector<float>& resolutions,
111+
std::vector<float>& radii,
112+
std::vector<float>& mulScatAng,
113+
const int nBlocks,
114+
const int nThreads);
115+
53116
void countCellsHandler(const Cluster** sortedClusters,
54117
const Cluster** unsortedClusters,
55118
const TrackingFrameInfo** tfInfo,
56-
const Tracklet** tracklets,
57-
const int** trackletsLUT,
119+
Tracklet** tracklets,
120+
int** trackletsLUT,
58121
const int nTracklets,
59122
const int layer,
60123
CellSeed* cells,
@@ -70,8 +133,8 @@ void countCellsHandler(const Cluster** sortedClusters,
70133
void computeCellsHandler(const Cluster** sortedClusters,
71134
const Cluster** unsortedClusters,
72135
const TrackingFrameInfo** tfInfo,
73-
const Tracklet** tracklets,
74-
const int** trackletsLUT,
136+
Tracklet** tracklets,
137+
int** trackletsLUT,
75138
const int nTracklets,
76139
const int layer,
77140
CellSeed* cells,

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/Utils.h

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,49 @@ struct gpuPair {
3131

3232
namespace gpu
3333
{
34+
// Poor man implementation of a span-like struct. It is very limited.
35+
template <typename T>
36+
struct gpuSpan {
37+
using value_type = T;
38+
using ptr = T*;
39+
using ref = T&;
40+
41+
GPUd() gpuSpan() : _data(nullptr), _size(0) {}
42+
GPUd() gpuSpan(ptr data, unsigned int dim) : _data(data), _size(dim) {}
43+
GPUd() ref operator[](unsigned int idx) const { return _data[idx]; }
44+
GPUd() unsigned int size() const { return _size; }
45+
GPUd() bool empty() const { return _size == 0; }
46+
GPUd() ref front() const { return _data[0]; }
47+
GPUd() ref back() const { return _data[_size - 1]; }
48+
GPUd() ptr begin() const { return _data; }
49+
GPUd() ptr end() const { return _data + _size; }
50+
51+
protected:
52+
ptr _data;
53+
unsigned int _size;
54+
};
55+
56+
template <typename T>
57+
struct gpuSpan<const T> {
58+
using value_type = T;
59+
using ptr = const T*;
60+
using ref = const T&;
61+
62+
GPUd() gpuSpan() : _data(nullptr), _size(0) {}
63+
GPUd() gpuSpan(ptr data, unsigned int dim) : _data(data), _size(dim) {}
64+
GPUd() gpuSpan(const gpuSpan<T>& other) : _data(other._data), _size(other._size) {}
65+
GPUd() ref operator[](unsigned int idx) const { return _data[idx]; }
66+
GPUd() unsigned int size() const { return _size; }
67+
GPUd() bool empty() const { return _size == 0; }
68+
GPUd() ref front() const { return _data[0]; }
69+
GPUd() ref back() const { return _data[_size - 1]; }
70+
GPUd() ptr begin() const { return _data; }
71+
GPUd() ptr end() const { return _data + _size; }
72+
73+
protected:
74+
ptr _data;
75+
unsigned int _size;
76+
};
3477

3578
enum class Task {
3679
Tracker = 0,

0 commit comments

Comments
 (0)