Skip to content

Commit 24361a9

Browse files
authored
Merge ba3d196 into sapling-pr-archive-ktf
2 parents 3a59e44 + ba3d196 commit 24361a9

File tree

11 files changed

+360
-375
lines changed

11 files changed

+360
-375
lines changed

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -62,23 +62,18 @@ class TimeFrameGPU : public TimeFrame<nLayers>
6262
void createCellsDevice();
6363
void createCellsLUTDevice();
6464
void createNeighboursIndexTablesDevice();
65-
void createNeighboursDevice(const unsigned int layer, const unsigned int nNeighbours);
66-
void createNeighboursDevice(const unsigned int layer, std::vector<std::pair<int, int>>& neighbours);
65+
void createNeighboursDevice(const unsigned int layer);
6766
void createNeighboursLUTDevice(const int, const unsigned int);
68-
void createNeighboursDeviceArray();
6967
void createTrackITSExtDevice(bounded_vector<CellSeed>&);
7068
void downloadTrackITSExtDevice(bounded_vector<CellSeed>&);
7169
void downloadCellsNeighboursDevice(std::vector<bounded_vector<std::pair<int, int>>>&, const int);
7270
void downloadNeighboursLUTDevice(bounded_vector<int>&, const int);
7371
void downloadCellsDevice();
7472
void downloadCellsLUTDevice();
75-
void unregisterRest();
76-
template <Task task>
77-
auto& getStream(const size_t stream)
78-
{
79-
return mGpuStreams[stream];
80-
}
73+
auto& getStream(const size_t stream) { return mGpuStreams[stream]; }
8174
auto& getStreams() { return mGpuStreams; }
75+
void syncStream(const size_t stream);
76+
void syncStreams();
8277
virtual void wipe() final;
8378

8479
/// interface
@@ -99,7 +94,7 @@ class TimeFrameGPU : public TimeFrame<nLayers>
9994
gpuPair<int, int>* getDeviceNeighbourPairs(const int layer) { return mNeighbourPairsDevice[layer]; }
10095
std::array<int*, nLayers - 2>& getDeviceNeighboursAll() { return mNeighboursDevice; }
10196
int* getDeviceNeighbours(const int layer) { return mNeighboursDevice[layer]; }
102-
int** getDeviceNeighboursArray() { return mNeighboursDeviceArray; }
97+
int** getDeviceNeighboursArray() { return mNeighboursDevice.data(); }
10398
TrackingFrameInfo* getDeviceTrackingFrameInfo(const int);
10499
const TrackingFrameInfo** getDeviceArrayTrackingFrameInfo() const { return mTrackingFrameInfoDeviceArray; }
105100
const Cluster** getDeviceArrayClusters() const { return mClustersDeviceArray; }
@@ -108,11 +103,11 @@ class TimeFrameGPU : public TimeFrame<nLayers>
108103
std::vector<unsigned int> getClusterSizes();
109104
const unsigned char** getDeviceArrayUsedClusters() const { return mUsedClustersDeviceArray; }
110105
const int** getDeviceROframeClusters() const { return mROFrameClustersDeviceArray; }
111-
Tracklet** getDeviceArrayTracklets() { return mTrackletsDeviceArray; }
106+
Tracklet** getDeviceArrayTracklets() { return mTrackletsDevice.data(); }
112107
int** getDeviceArrayTrackletsLUT() const { return mTrackletsLUTDeviceArray; }
113108
int** getDeviceArrayCellsLUT() const { return mCellsLUTDeviceArray; }
114109
int** getDeviceArrayNeighboursCellLUT() const { return mNeighboursCellLUTDeviceArray; }
115-
CellSeed** getDeviceArrayCells() const { return mCellsDeviceArray; }
110+
CellSeed** getDeviceArrayCells() { return mCellsDevice.data(); }
116111
CellSeed* getDeviceTrackSeeds() { return mTrackSeedsDevice; }
117112
o2::track::TrackParCovF** getDeviceArrayTrackSeeds() { return mCellSeedsDeviceArray; }
118113
float** getDeviceArrayTrackSeedsChi2() { return mCellSeedsChi2DeviceArray; }
@@ -140,7 +135,8 @@ class TimeFrameGPU : public TimeFrame<nLayers>
140135
int getNumberOfNeighbours() const final;
141136

142137
private:
143-
void allocMemAsync(void**, size_t, Stream&, bool); // Abstract owned and unowned memory allocations
138+
void allocMemAsync(void**, size_t, Stream&, bool); // Abstract owned and unowned memory allocations on specific stream
139+
void allocMem(void**, size_t, bool); // Abstract owned and unowned memory allocations on default stream
144140
bool mHostRegistered = false;
145141
TimeFrameGPUParameters mGpuParams;
146142

@@ -167,7 +163,6 @@ class TimeFrameGPU : public TimeFrame<nLayers>
167163
const unsigned char** mUsedClustersDeviceArray;
168164
const int** mROFrameClustersDeviceArray;
169165
std::array<Tracklet*, nLayers - 1> mTrackletsDevice;
170-
Tracklet** mTrackletsDeviceArray;
171166
std::array<int*, nLayers - 1> mTrackletsLUTDevice;
172167
std::array<int*, nLayers - 2> mCellsLUTDevice;
173168
std::array<int*, nLayers - 3> mNeighboursLUTDevice;
@@ -179,7 +174,6 @@ class TimeFrameGPU : public TimeFrame<nLayers>
179174
std::array<CellSeed*, nLayers - 2> mCellsDevice;
180175
std::array<int*, nLayers - 2> mNeighboursIndexTablesDevice;
181176
CellSeed* mTrackSeedsDevice;
182-
CellSeed** mCellsDeviceArray;
183177
std::array<o2::track::TrackParCovF*, nLayers - 2> mCellSeedsDevice;
184178
o2::track::TrackParCovF** mCellSeedsDeviceArray;
185179
std::array<float*, nLayers - 2> mCellSeedsChi2Device;
@@ -189,14 +183,11 @@ class TimeFrameGPU : public TimeFrame<nLayers>
189183
TrackITSExt* mTrackITSExtDevice;
190184
std::array<gpuPair<int, int>*, nLayers - 2> mNeighbourPairsDevice;
191185
std::array<int*, nLayers - 2> mNeighboursDevice;
192-
int** mNeighboursDeviceArray;
193186
std::array<TrackingFrameInfo*, nLayers> mTrackingFrameInfoDevice;
194187
const TrackingFrameInfo** mTrackingFrameInfoDeviceArray;
195188

196189
// State
197190
Streams mGpuStreams;
198-
size_t mAvailMemGB;
199-
bool mFirstInit = true;
200191

201192
// Temporary buffer for storing output tracks from GPU tracking
202193
bounded_vector<TrackITSExt> mTrackITSExt;

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h

Lines changed: 33 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,10 @@ namespace gpu
2525

2626
#ifdef GPUCA_GPUCODE // GPUg() global kernels must only when compiled by GPU compiler
2727

28-
GPUdi() int4 getEmptyBinsRect() { return int4{0, 0, 0, 0}; }
28+
GPUdi() int4 getEmptyBinsRect()
29+
{
30+
return int4{0, 0, 0, 0};
31+
}
2932

3033
GPUd() bool fitTrack(TrackITSExt& track,
3134
int start,
@@ -83,6 +86,7 @@ void countTrackletsInROFsHandler(const IndexTableUtils* utils,
8386
bounded_vector<float>& resolutions,
8487
std::vector<float>& radii,
8588
bounded_vector<float>& mulScatAng,
89+
o2::its::ExternalAllocator* alloc,
8690
const int nBlocks,
8791
const int nThreads,
8892
gpu::Streams& streams);
@@ -117,6 +121,7 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
117121
bounded_vector<float>& resolutions,
118122
std::vector<float>& radii,
119123
bounded_vector<float>& mulScatAng,
124+
o2::its::ExternalAllocator* alloc,
120125
const int nBlocks,
121126
const int nThreads,
122127
gpu::Streams& streams);
@@ -136,8 +141,10 @@ void countCellsHandler(const Cluster** sortedClusters,
136141
const float maxChi2ClusterAttachment,
137142
const float cellDeltaTanLambdaSigma,
138143
const float nSigmaCut,
144+
o2::its::ExternalAllocator* alloc,
139145
const int nBlocks,
140-
const int nThreads);
146+
const int nThreads,
147+
gpu::Streams& streams);
141148

142149
void computeCellsHandler(const Cluster** sortedClusters,
143150
const Cluster** unsortedClusters,
@@ -155,23 +162,26 @@ void computeCellsHandler(const Cluster** sortedClusters,
155162
const float cellDeltaTanLambdaSigma,
156163
const float nSigmaCut,
157164
const int nBlocks,
158-
const int nThreads);
159-
160-
unsigned int countCellNeighboursHandler(CellSeed** cellsLayersDevice,
161-
int* neighboursLUTs,
162-
int** cellsLUTs,
163-
gpuPair<int, int>* cellNeighbours,
164-
int* neighboursIndexTable,
165-
const Tracklet** tracklets,
166-
const int deltaROF,
167-
const float maxChi2ClusterAttachment,
168-
const float bz,
169-
const int layerIndex,
170-
const unsigned int nCells,
171-
const unsigned int nCellsNext,
172-
const int maxCellNeighbours,
173-
const int nBlocks,
174-
const int nThreads);
165+
const int nThreads,
166+
gpu::Streams& streams);
167+
168+
void countCellNeighboursHandler(CellSeed** cellsLayersDevice,
169+
int* neighboursLUTs,
170+
int** cellsLUTs,
171+
gpuPair<int, int>* cellNeighbours,
172+
int* neighboursIndexTable,
173+
const Tracklet** tracklets,
174+
const int deltaROF,
175+
const float maxChi2ClusterAttachment,
176+
const float bz,
177+
const int layerIndex,
178+
const unsigned int nCells,
179+
const unsigned int nCellsNext,
180+
const int maxCellNeighbours,
181+
o2::its::ExternalAllocator* alloc,
182+
const int nBlocks,
183+
const int nThreads,
184+
gpu::Stream& stream);
175185

176186
void computeCellNeighboursHandler(CellSeed** cellsLayersDevice,
177187
int* neighboursLUTs,
@@ -187,11 +197,13 @@ void computeCellNeighboursHandler(CellSeed** cellsLayersDevice,
187197
const unsigned int nCellsNext,
188198
const int maxCellNeighbours,
189199
const int nBlocks,
190-
const int nThreads);
200+
const int nThreads,
201+
gpu::Stream& stream);
191202

192203
int filterCellNeighboursHandler(gpuPair<int, int>*,
193204
int*,
194205
unsigned int,
206+
gpu::Stream&,
195207
o2::its::ExternalAllocator* = nullptr);
196208

197209
template <int nLayers = 7>
@@ -205,12 +217,12 @@ void processNeighboursHandler(const int startLayer,
205217
gsl::span<int*> neighboursDeviceLUTs,
206218
const TrackingFrameInfo** foundTrackingFrameInfo,
207219
bounded_vector<CellSeed>& seedsHost,
208-
o2::its::ExternalAllocator*,
209220
const float bz,
210221
const float MaxChi2ClusterAttachment,
211222
const float maxChi2NDF,
212223
const o2::base::Propagator* propagator,
213224
const o2::base::PropagatorF::MatCorrType matCorrType,
225+
o2::its::ExternalAllocator* alloc,
214226
const int nBlocks,
215227
const int nThreads);
216228

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/Utils.h

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -94,16 +94,19 @@ class Stream
9494
public:
9595
#if defined(__HIPCC__)
9696
using Handle = hipStream_t;
97-
static constexpr Handle Default = 0;
97+
static constexpr Handle DefaultStream = 0;
98+
static constexpr unsigned int DefaultFlag = hipStreamNonBlocking;
9899
#elif defined(__CUDACC__)
99100
using Handle = cudaStream_t;
100-
static constexpr Handle Default = 0;
101+
static constexpr Handle DefaultStream = 0;
102+
static constexpr unsigned int DefaultFlag = cudaStreamNonBlocking;
101103
#else
102104
using Handle = void*;
103-
static constexpr Handle Default = nullptr;
105+
static constexpr Handle DefaultStream = nullptr;
106+
static constexpr unsigned int DefaultFlag = 0;
104107
#endif
105108

106-
Stream(unsigned int flags = 0)
109+
Stream(unsigned int flags = DefaultFlag)
107110
{
108111
#if defined(__HIPCC__)
109112
GPUChkErrS(hipStreamCreateWithFlags(&mHandle, flags));
@@ -115,7 +118,7 @@ class Stream
115118
Stream(Handle h) : mHandle(h) {}
116119
~Stream()
117120
{
118-
if (mHandle != Default) {
121+
if (mHandle != DefaultStream) {
119122
#if defined(__HIPCC__)
120123
GPUChkErrS(hipStreamDestroy(mHandle));
121124
#elif defined(__CUDACC__)
@@ -124,7 +127,7 @@ class Stream
124127
}
125128
}
126129

127-
operator bool() const { return mHandle != Default; }
130+
operator bool() const { return mHandle != DefaultStream; }
128131
const Handle& get() { return mHandle; }
129132
void sync() const
130133
{
@@ -136,7 +139,7 @@ class Stream
136139
}
137140

138141
private:
139-
Handle mHandle{Default};
142+
Handle mHandle{DefaultStream};
140143
};
141144
static_assert(sizeof(Stream) == sizeof(void*), "Stream type must match pointer type!");
142145

@@ -150,6 +153,12 @@ class Streams
150153
void clear() { mStreams.clear(); }
151154
auto& operator[](size_t i) { return mStreams[i % mStreams.size()]; }
152155
void push_back(const Stream& stream) { mStreams.push_back(stream); }
156+
void sync()
157+
{
158+
for (auto& s : mStreams) {
159+
s.sync();
160+
}
161+
}
153162

154163
private:
155164
std::vector<Stream> mStreams;

0 commit comments

Comments
 (0)