Skip to content

Commit 44d79b3

Browse files
authored
Merge 016da34 into sapling-pr-archive-ktf
2 parents 9309786 + 016da34 commit 44d79b3

File tree

30 files changed

+186
-241
lines changed

30 files changed

+186
-241
lines changed

DataFormats/Headers/include/Headers/Stack.h

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,7 @@
1414
#include "MemoryResources/MemoryResources.h"
1515
#include "Headers/DataHeader.h"
1616

17-
namespace o2
18-
{
19-
20-
namespace header
17+
namespace o2::header
2118
{
2219
//__________________________________________________________________________________________________
2320
/// @struct Stack
@@ -45,20 +42,20 @@ struct Stack {
4542
};
4643

4744
public:
48-
using allocator_type = boost::container::pmr::polymorphic_allocator<std::byte>;
45+
using allocator_type = fair::mq::pmr::polymorphic_allocator<std::byte>;
4946
using value_type = std::byte;
5047
using BufferType = std::unique_ptr<value_type[], freeobj>; //this gives us proper default move semantics for free
5148

5249
Stack() = default;
5350
Stack(Stack&&) = default;
5451
Stack(Stack&) = delete;
5552
Stack& operator=(Stack&) = delete;
56-
Stack& operator=(Stack&&) = default;
53+
Stack& operator=(Stack&&) = delete;
5754

58-
value_type* data() const { return buffer.get(); }
59-
size_t size() const { return bufferSize; }
55+
[[nodiscard]] value_type* data() const { return buffer.get(); }
56+
[[nodiscard]] size_t size() const { return bufferSize; }
6057
allocator_type get_allocator() const { return allocator; }
61-
const BaseHeader* first() const { return reinterpret_cast<const BaseHeader*>(this->data()); }
58+
[[nodiscard]] const BaseHeader* first() const { return reinterpret_cast<const BaseHeader*>(this->data()); }
6259
static const BaseHeader* firstHeader(std::byte const* buf) { return BaseHeader::get(buf); }
6360
static const BaseHeader* lastHeader(std::byte const* buf)
6461
{
@@ -90,9 +87,9 @@ struct Stack {
9087
/// all headers must derive from BaseHeader, in addition also other stacks can be passed to ctor.
9188
template <typename FirstArgType, typename... Headers,
9289
typename std::enable_if_t<
93-
!std::is_convertible<FirstArgType, boost::container::pmr::polymorphic_allocator<std::byte>>::value, int> = 0>
90+
!std::is_convertible<FirstArgType, fair::mq::pmr::polymorphic_allocator<std::byte>>::value, int> = 0>
9491
Stack(FirstArgType&& firstHeader, Headers&&... headers)
95-
: Stack(boost::container::pmr::new_delete_resource(), std::forward<FirstArgType>(firstHeader),
92+
: Stack(fair::mq::pmr::new_delete_resource(), std::forward<FirstArgType>(firstHeader),
9693
std::forward<Headers>(headers)...)
9794
{
9895
}
@@ -143,7 +140,7 @@ struct Stack {
143140
constexpr static size_t calculateSize() { return 0; }
144141

145142
private:
146-
allocator_type allocator{boost::container::pmr::new_delete_resource()};
143+
allocator_type allocator{fair::mq::pmr::new_delete_resource()};
147144
size_t bufferSize{0};
148145
BufferType buffer{nullptr, freeobj{allocator.resource()}};
149146

@@ -231,7 +228,7 @@ struct Stack {
231228
}
232229
};
233230

234-
} // namespace header
235-
} // namespace o2
231+
} // namespace o2::header
232+
236233

237234
#endif // HEADERS_STACK_H

DataFormats/Headers/test/testDataHeader.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,7 @@ BOOST_AUTO_TEST_CASE(headerStack_test)
314314
BOOST_CHECK(h3->secret == 42);
315315

316316
//test constructing from a buffer and an additional header
317-
using namespace boost::container::pmr;
317+
using namespace fair::mq::pmr;
318318
Stack s5(new_delete_resource(), s1.data(), Stack{}, meta);
319319
BOOST_CHECK(s5.size() == s1.size() + sizeof(meta));
320320
// check if we can find the header even though there was an empty stack in the middle

DataFormats/MemoryResources/include/MemoryResources/MemoryResources.h

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ class MessageResource : public FairMQMemoryResource
115115
// A spectator pmr memory resource which only watches the memory of the underlying buffer, does not
116116
// carry out real allocation. It owns the underlying buffer which is destroyed on deallocation.
117117
template <typename BufferType>
118-
class SpectatorMemoryResource : public boost::container::pmr::memory_resource
118+
class SpectatorMemoryResource : public fair::mq::pmr::memory_resource
119119
{
120120
public:
121121
using buffer_type = BufferType;
@@ -183,10 +183,10 @@ class SpectatorMemoryResource : public boost::container::pmr::memory_resource
183183
// This in general (as in STL) is a bad idea, but here it is safe to inherit from an allocator since we
184184
// have no additional data and only override some methods so we don't get into slicing and other problems.
185185
template <typename T>
186-
class SpectatorAllocator : public boost::container::pmr::polymorphic_allocator<T>
186+
class SpectatorAllocator : public fair::mq::pmr::polymorphic_allocator<T>
187187
{
188188
public:
189-
using boost::container::pmr::polymorphic_allocator<T>::polymorphic_allocator;
189+
using fair::mq::pmr::polymorphic_allocator<T>::polymorphic_allocator;
190190
using propagate_on_container_move_assignment = std::true_type;
191191

192192
// skip default construction of empty elements
@@ -243,7 +243,7 @@ class OwningMessageSpectatorAllocator
243243
return OwningMessageSpectatorAllocator();
244244
}
245245

246-
boost::container::pmr::memory_resource* resource() { return &mResource; }
246+
fair::mq::pmr::memory_resource* resource() { return &mResource; }
247247

248248
// skip default construction of empty elements
249249
// this is important for two reasons: one: it allows us to adopt an existing buffer (e.g. incoming message) and
@@ -269,14 +269,14 @@ class OwningMessageSpectatorAllocator
269269

270270
// The NoConstructAllocator behaves like the normal pmr vector but does not call constructors / destructors
271271
template <typename T>
272-
class NoConstructAllocator : public boost::container::pmr::polymorphic_allocator<T>
272+
class NoConstructAllocator : public fair::mq::pmr::polymorphic_allocator<T>
273273
{
274274
public:
275-
using boost::container::pmr::polymorphic_allocator<T>::polymorphic_allocator;
275+
using fair::mq::pmr::polymorphic_allocator<T>::polymorphic_allocator;
276276
using propagate_on_container_move_assignment = std::true_type;
277277

278278
template <typename... Args>
279-
NoConstructAllocator(Args&&... args) : boost::container::pmr::polymorphic_allocator<T>(std::forward<Args>(args)...)
279+
NoConstructAllocator(Args&&... args) : fair::mq::pmr::polymorphic_allocator<T>(std::forward<Args>(args)...)
280280
{
281281
}
282282

@@ -302,9 +302,9 @@ class NoConstructAllocator : public boost::container::pmr::polymorphic_allocator
302302
//__________________________________________________________________________________________________
303303

304304
using ByteSpectatorAllocator = SpectatorAllocator<std::byte>;
305-
using BytePmrAllocator = boost::container::pmr::polymorphic_allocator<std::byte>;
305+
using BytePmrAllocator = fair::mq::pmr::polymorphic_allocator<std::byte>;
306306
template <class T>
307-
using vector = std::vector<T, o2::pmr::polymorphic_allocator<T>>;
307+
using vector = std::vector<T, fair::mq::pmr::polymorphic_allocator<T>>;
308308

309309
//__________________________________________________________________________________________________
310310
/// Return a std::vector spanned over the contents of the message, takes ownership of the message

DataFormats/MemoryResources/test/testMemoryResources.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ BOOST_AUTO_TEST_CASE(transportallocatormap_test)
6060
BOOST_CHECK(_tmp == allocZMQ);
6161
}
6262

63-
using namespace boost::container::pmr;
63+
using namespace fair::mq::pmr;
6464

6565
BOOST_AUTO_TEST_CASE(allocator_test)
6666
{

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
namespace o2::its
2020
{
2121
class CellSeed;
22+
class ExternalAllocator;
2223
namespace gpu
2324
{
2425
#ifdef GPUCA_GPUCODE // GPUg() global kernels must only when compiled by GPU compiler
@@ -178,7 +179,8 @@ void computeCellNeighboursHandler(CellSeed** cellsLayersDevice,
178179

179180
int filterCellNeighboursHandler(gpuPair<int, int>*,
180181
int*,
181-
unsigned int);
182+
unsigned int,
183+
o2::its::ExternalAllocator* = nullptr);
182184

183185
template <int nLayers = 7>
184186
void processNeighboursHandler(const int startLayer,
@@ -191,6 +193,7 @@ void processNeighboursHandler(const int startLayer,
191193
gsl::span<int*> neighboursDeviceLUTs,
192194
const TrackingFrameInfo** foundTrackingFrameInfo,
193195
bounded_vector<CellSeed>& seedsHost,
196+
o2::its::ExternalAllocator*,
194197
const float bz,
195198
const float MaxChi2ClusterAttachment,
196199
const float maxChi2NDF,

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "ITStrackingGPU/TrackerTraitsGPU.h"
1919
#include "ITStrackingGPU/TrackingKernels.h"
2020
#include "ITStracking/TrackingConfigParam.h"
21+
2122
namespace o2::its
2223
{
2324
constexpr int UnusedIndex{-1};
@@ -209,7 +210,8 @@ void TrackerTraitsGPU<nLayers>::findCellsNeighbours(const int iteration)
209210

210211
filterCellNeighboursHandler(mTimeFrameGPU->getDeviceNeighbourPairs(iLayer),
211212
mTimeFrameGPU->getDeviceNeighbours(iLayer),
212-
nNeigh);
213+
nNeigh,
214+
mTimeFrameGPU->getExternalAllocator());
213215
}
214216
mTimeFrameGPU->createNeighboursDeviceArray();
215217
mTimeFrameGPU->unregisterRest();
@@ -236,6 +238,7 @@ void TrackerTraitsGPU<nLayers>::findRoads(const int iteration)
236238
mTimeFrameGPU->getDeviceNeighboursLUTs(),
237239
mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(),
238240
trackSeeds,
241+
mTimeFrameGPU->getExternalAllocator(),
239242
this->mBz,
240243
this->mTrkParams[0].MaxChi2ClusterAttachment,
241244
this->mTrkParams[0].MaxChi2NDF,

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu

Lines changed: 47 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -28,15 +28,14 @@
2828
#include "ITStracking/Constants.h"
2929
#include "ITStracking/IndexTableUtils.h"
3030
#include "ITStracking/MathUtils.h"
31+
#include "ITStracking/ExternalAllocator.h"
3132
#include "DataFormatsITS/TrackITS.h"
3233
#include "ReconstructionDataFormats/Vertex.h"
3334

3435
#include "ITStrackingGPU/TrackerTraitsGPU.h"
3536
#include "ITStrackingGPU/TrackingKernels.h"
3637
#include "ITStrackingGPU/Utils.h"
3738

38-
#include "GPUCommonHelpers.h"
39-
4039
#ifndef __HIPCC__
4140
#define THRUST_NAMESPACE thrust::cuda
4241
#else
@@ -64,6 +63,37 @@ GPUdii() float Sq(float v)
6463
namespace gpu
6564
{
6665

66+
template <typename T>
67+
class TypedAllocator : public thrust::device_allocator<T>
68+
{
69+
public:
70+
using value_type = T;
71+
using pointer = T*;
72+
73+
template <typename U>
74+
struct rebind {
75+
using other = TypedAllocator<U>;
76+
};
77+
78+
explicit TypedAllocator(ExternalAllocator* allocPtr)
79+
: mInternalAllocator(allocPtr) {}
80+
81+
T* allocate(size_t n)
82+
{
83+
return reinterpret_cast<T*>(mInternalAllocator->allocate(n * sizeof(T)));
84+
}
85+
86+
void deallocate(T* p, size_t n)
87+
{
88+
char* raw_ptr = reinterpret_cast<char*>(p);
89+
size_t bytes = n * sizeof(T);
90+
mInternalAllocator->deallocate(raw_ptr, bytes); // redundant as internal dealloc is no-op.
91+
}
92+
93+
private:
94+
ExternalAllocator* mInternalAllocator;
95+
};
96+
6797
GPUd() const int4 getBinsRect(const Cluster& currentCluster, const int layerIndex,
6898
const o2::its::IndexTableUtils& utils,
6999
const float z1, const float z2, float maxdeltaz, float maxdeltaphi)
@@ -1117,7 +1147,8 @@ void computeCellNeighboursHandler(CellSeed** cellsLayersDevice,
11171147
11181148
int filterCellNeighboursHandler(gpuPair<int, int>* cellNeighbourPairs,
11191149
int* cellNeighbours,
1120-
unsigned int nNeigh)
1150+
unsigned int nNeigh,
1151+
o2::its::ExternalAllocator* allocator)
11211152
{
11221153
thrust::device_ptr<gpuPair<int, int>> neighVectorPairs(cellNeighbourPairs);
11231154
thrust::device_ptr<int> validNeighs(cellNeighbours);
@@ -1140,6 +1171,7 @@ void processNeighboursHandler(const int startLayer,
11401171
gsl::span<int*> neighboursDeviceLUTs,
11411172
const TrackingFrameInfo** foundTrackingFrameInfo,
11421173
bounded_vector<CellSeed>& seedsHost,
1174+
o2::its::ExternalAllocator* allocator,
11431175
const float bz,
11441176
const float maxChi2ClusterAttachment,
11451177
const float maxChi2NDF,
@@ -1148,8 +1180,10 @@ void processNeighboursHandler(const int startLayer,
11481180
const int nBlocks,
11491181
const int nThreads)
11501182
{
1151-
thrust::device_vector<int> foundSeedsTable(nCells[startLayer] + 1); // Shortcut: device_vector skips central memory management, we are relying on the contingency.
1152-
// TODO: fix this.
1183+
auto allocInt = gpu::TypedAllocator<int>(allocator);
1184+
auto allocCellSeed = gpu::TypedAllocator<CellSeed>(allocator);
1185+
thrust::device_vector<int, gpu::TypedAllocator<int>> foundSeedsTable(nCells[startLayer] + 1, 0, allocInt); // Shortcut: device_vector skips central memory management, we are relying on the contingency.
1186+
// TODO: fix this.
11531187
11541188
gpu::processNeighboursKernel<true><<<o2::gpu::CAMath::Min(nBlocks, GPU_BLOCKS),
11551189
o2::gpu::CAMath::Min(nThreads, GPU_THREADS)>>>(
@@ -1172,8 +1206,8 @@ void processNeighboursHandler(const int startLayer,
11721206
matCorrType);
11731207
gpu::cubExclusiveScanInPlace(foundSeedsTable, nCells[startLayer] + 1);
11741208
1175-
thrust::device_vector<int> updatedCellId(foundSeedsTable.back());
1176-
thrust::device_vector<CellSeed> updatedCellSeed(foundSeedsTable.back());
1209+
thrust::device_vector<int, gpu::TypedAllocator<int>> updatedCellId(foundSeedsTable.back(), 0, allocInt);
1210+
thrust::device_vector<CellSeed, gpu::TypedAllocator<CellSeed>> updatedCellSeed(foundSeedsTable.back(), allocCellSeed);
11771211
gpu::processNeighboursKernel<false><<<o2::gpu::CAMath::Min(nBlocks, GPU_BLOCKS),
11781212
o2::gpu::CAMath::Min(nThreads, GPU_THREADS)>>>(
11791213
startLayer,
@@ -1195,13 +1229,13 @@ void processNeighboursHandler(const int startLayer,
11951229
matCorrType);
11961230
11971231
int level = startLevel;
1198-
thrust::device_vector<int> lastCellId;
1199-
thrust::device_vector<CellSeed> lastCellSeed;
1232+
thrust::device_vector<int, gpu::TypedAllocator<int>> lastCellId(allocInt);
1233+
thrust::device_vector<CellSeed, gpu::TypedAllocator<CellSeed>> lastCellSeed(allocCellSeed);
12001234
for (int iLayer{startLayer - 1}; iLayer > 0 && level > 2; --iLayer) {
12011235
lastCellSeed.swap(updatedCellSeed);
12021236
lastCellId.swap(updatedCellId);
1203-
thrust::device_vector<CellSeed>().swap(updatedCellSeed);
1204-
thrust::device_vector<int>().swap(updatedCellId);
1237+
thrust::device_vector<CellSeed, gpu::TypedAllocator<CellSeed>>(allocCellSeed).swap(updatedCellSeed);
1238+
thrust::device_vector<int, gpu::TypedAllocator<int>>(allocInt).swap(updatedCellId);
12051239
auto lastCellSeedSize{lastCellSeed.size()};
12061240
foundSeedsTable.resize(lastCellSeedSize + 1);
12071241
thrust::fill(foundSeedsTable.begin(), foundSeedsTable.end(), 0);
@@ -1253,8 +1287,7 @@ void processNeighboursHandler(const int startLayer,
12531287
propagator,
12541288
matCorrType);
12551289
}
1256-
1257-
thrust::device_vector<CellSeed> outSeeds(updatedCellSeed.size());
1290+
thrust::device_vector<CellSeed, gpu::TypedAllocator<CellSeed>> outSeeds(updatedCellSeed.size(), allocCellSeed);
12581291
auto end = thrust::copy_if(updatedCellSeed.begin(), updatedCellSeed.end(), outSeeds.begin(), gpu::seed_selector(1.e3, maxChi2NDF * ((startLevel + 2) * 2 - 5)));
12591292
auto s{end - outSeeds.begin()};
12601293
seedsHost.reserve(seedsHost.size() + s);
@@ -1367,6 +1400,7 @@ template void processNeighboursHandler<7>(const int startLayer,
13671400
gsl::span<int*> neighboursDeviceLUTs,
13681401
const TrackingFrameInfo** foundTrackingFrameInfo,
13691402
bounded_vector<CellSeed>& seedsHost,
1403+
o2::its::ExternalAllocator*,
13701404
const float bz,
13711405
const float maxChi2ClusterAttachment,
13721406
const float maxChi2NDF,

Detectors/ITSMFT/ITS/tracking/include/ITStracking/Cell.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ class CellSeed final : public o2::track::TrackParCovF
6767
{
6868
public:
6969
GPUhdDefault() CellSeed() = default;
70-
GPUhd() CellSeed(int innerL, int cl0, int cl1, int cl2, int trkl0, int trkl1, o2::track::TrackParCovF& tpc, float chi2) : o2::track::TrackParCovF{tpc}, mLevel{1}, mChi2{chi2}
70+
GPUhd() CellSeed(int innerL, int cl0, int cl1, int cl2, int trkl0, int trkl1, o2::track::TrackParCovF& tpc, float chi2) : o2::track::TrackParCovF(tpc), mChi2(chi2), mLevel(1)
7171
{
7272
setUserField(innerL);
7373
mClusters[innerL + 0] = cl0;

Detectors/ITSMFT/ITS/tracking/include/ITStracking/ExternalAllocator.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ class ExternalAllocator
2323
{
2424
public:
2525
virtual void* allocate(size_t) = 0;
26+
virtual void deallocate(char*, size_t) = 0;
2627
};
27-
2828
} // namespace o2::its
2929

3030
#endif

Detectors/ITSMFT/ITS/tracking/include/ITStracking/IOUtils.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,10 +72,6 @@ inline static const o2::itsmft::ChipMappingITS& getChipMappingITS()
7272
return MP;
7373
}
7474

75-
std::vector<std::unordered_map<int, Label>> loadLabels(const int, const std::string&);
76-
void writeRoadsReport(std::ofstream&, std::ofstream&, std::ofstream&, const std::vector<std::vector<Road<5>>>&,
77-
const std::unordered_map<int, Label>&);
78-
7975
template <class iterator, typename T>
8076
o2::math_utils::Point3D<T> extractClusterData(const itsmft::CompClusterExt& c, iterator& iter, const itsmft::TopologyDictionary* dict, T& sig2y, T& sig2z)
8177
{

0 commit comments

Comments
 (0)