Skip to content

Commit b072fc8

Browse files
authored
Merge 97bb4f5 into sapling-pr-archive-ktf
2 parents c4285c8 + 97bb4f5 commit b072fc8

File tree

69 files changed

+1787
-724
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

69 files changed

+1787
-724
lines changed

DataFormats/Detectors/TPC/src/DCS.cxx

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -329,12 +329,27 @@ void fillBuffer(std::pair<std::vector<float>, std::vector<TimeStampType>>& buffe
329329
}
330330
}
331331

332-
std::pair<std::vector<float>, std::vector<TimeStampType>> buffTmp{
333-
std::vector<float>(buffer.first.begin() + idxStartBuffer, buffer.first.end()),
334-
std::vector<TimeStampType>(buffer.second.begin() + idxStartBuffer, buffer.second.end())};
335-
336-
buffTmp.first.insert(buffTmp.first.end(), values.first.begin(), values.first.end());
337-
buffTmp.second.insert(buffTmp.second.end(), values.second.begin(), values.second.end());
332+
std::pair<std::vector<float>, std::vector<TimeStampType>> buffTmp;
333+
auto& [buffVals, buffTimes] = buffTmp;
334+
335+
// Preallocate enough capacity to avoid reallocations
336+
buffVals.reserve(buffer.first.size() - idxStartBuffer + values.first.size());
337+
buffTimes.reserve(buffer.second.size() - idxStartBuffer + values.second.size());
338+
// Insert the kept part of the old buffer
339+
buffVals.insert(buffVals.end(), buffer.first.begin() + idxStartBuffer, buffer.first.end());
340+
buffTimes.insert(buffTimes.end(), buffer.second.begin() + idxStartBuffer, buffer.second.end());
341+
// Insert the new values
342+
buffVals.insert(buffVals.end(), values.first.begin(), values.first.end());
343+
buffTimes.insert(buffTimes.end(), values.second.begin(), values.second.end());
344+
345+
// this should not happen
346+
if (!std::is_sorted(buffTimes.begin(), buffTimes.end())) {
347+
LOGP(info, "Pressure buffer not sorted after filling - sorting it");
348+
std::vector<size_t> idx(buffTimes.size());
349+
o2::math_utils::SortData(buffTimes, idx);
350+
o2::math_utils::Reorder(buffVals, idx);
351+
o2::math_utils::Reorder(buffTimes, idx);
352+
}
338353

339354
buffer = std::move(buffTmp);
340355
}

DataFormats/Headers/include/Headers/Stack.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,10 @@ struct Stack {
3636

3737
private:
3838
struct freeobj {
39-
freeobj(memory_resource* mr) : resource(mr) {}
39+
freeobj(memory_resource* mr, size_t s) : resource(mr), size(s) {}
4040
memory_resource* resource{nullptr};
41-
void operator()(std::byte* ptr) { resource->deallocate(ptr, 0, 0); }
41+
size_t size{0};
42+
void operator()(std::byte* ptr) { resource->deallocate(ptr, size, alignof(std::max_align_t)); }
4243
};
4344

4445
public:
@@ -99,7 +100,7 @@ struct Stack {
99100
Stack(const allocator_type allocatorArg, Headers&&... headers)
100101
: allocator{allocatorArg},
101102
bufferSize{calculateSize(std::forward<Headers>(headers)...)},
102-
buffer{static_cast<std::byte*>(allocator.resource()->allocate(bufferSize, alignof(std::max_align_t))), freeobj{allocator.resource()}}
103+
buffer{static_cast<std::byte*>(allocator.resource()->allocate(bufferSize, alignof(std::max_align_t))), freeobj{allocator.resource(), bufferSize}}
103104
{
104105
if constexpr (sizeof...(headers) > 1) {
105106
injectAll(buffer.get(), std::forward<Headers>(headers)...);
@@ -142,7 +143,7 @@ struct Stack {
142143
private:
143144
allocator_type allocator{fair::mq::pmr::new_delete_resource()};
144145
size_t bufferSize{0};
145-
BufferType buffer{nullptr, freeobj{allocator.resource()}};
146+
BufferType buffer{nullptr, freeobj{allocator.resource(), 0}};
146147

147148
//______________________________________________________________________________________________
148149
template <typename T>

DataFormats/common/include/CommonDataFormat/InteractionRecord.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ struct InteractionRecord {
281281
return tmp;
282282
}
283283

284-
#ifndef GPUCA_ALIGPUCODE
284+
#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE)
285285
void print() const;
286286
std::string asString() const;
287287
friend std::ostream& operator<<(std::ostream& stream, InteractionRecord const& ir);
@@ -359,7 +359,7 @@ struct InteractionTimeRecord : public InteractionRecord {
359359
return !((*this) > other);
360360
}
361361

362-
#ifndef GPUCA_ALIGPUCODE
362+
#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE)
363363
void print() const;
364364
std::string asString() const;
365365
friend std::ostream& operator<<(std::ostream& stream, InteractionTimeRecord const& ir);

Detectors/GlobalTrackingWorkflow/tpcinterpolationworkflow/src/tpc-residual-aggregator.cxx

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,17 @@
1414
#include "TPCInterpolationWorkflow/TPCResidualAggregatorSpec.h"
1515
#include "TPCInterpolationWorkflow/TPCUnbinnedResidualReaderSpec.h"
1616
#include "GlobalTrackingWorkflowHelpers/InputHelper.h"
17+
#include "DetectorsRaw/HBFUtilsInitializer.h"
18+
#include "Framework/CallbacksPolicy.h"
1719

1820
using namespace o2::framework;
1921
using GID = o2::dataformats::GlobalTrackID;
2022

23+
void customize(std::vector<o2::framework::CallbacksPolicy>& policies)
24+
{
25+
o2::raw::HBFUtilsInitializer::addNewTimeSliceCallback(policies);
26+
}
27+
2128
// we need to add workflow options before including Framework/runDataProcessing
2229
void customize(std::vector<o2::framework::ConfigParamSpec>& workflowOptions)
2330
{
@@ -27,6 +34,7 @@ void customize(std::vector<o2::framework::ConfigParamSpec>& workflowOptions)
2734
{"enable-ctp", VariantType::Bool, false, {"Subscribe to lumi info from CTP"}},
2835
{"disable-root-input", VariantType::Bool, false, {"disable root-files input readers"}},
2936
{"configKeyValues", VariantType::String, "", {"Semicolon separated key=value strings ..."}}};
37+
o2::raw::HBFUtilsInitializer::addConfigOption(options);
3038
std::swap(workflowOptions, options);
3139
}
3240

@@ -79,5 +87,8 @@ WorkflowSpec defineDataProcessing(ConfigContext const& configcontext)
7987
o2::globaltracking::InputHelper::addInputSpecs(configcontext, specs, maskClusters, maskNone, maskNone, false);
8088
}
8189

90+
// configure dpl timer to inject correct firstTForbit: start from the 1st orbit of TF containing 1st sampled orbit
91+
o2::raw::HBFUtilsInitializer hbfIni(configcontext, specs);
92+
8293
return specs;
8394
}

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ template <int nLayers = 7>
2828
class TimeFrameGPU : public TimeFrame<nLayers>
2929
{
3030
using typename TimeFrame<nLayers>::CellSeedN;
31+
using typename TimeFrame<nLayers>::IndexTableUtilsN;
3132

3233
public:
3334
TimeFrameGPU();
@@ -36,8 +37,8 @@ class TimeFrameGPU : public TimeFrame<nLayers>
3637
/// Most relevant operations
3738
void registerHostMemory(const int);
3839
void unregisterHostMemory(const int);
39-
void initialise(const int, const TrackingParameters&, const int, IndexTableUtils* utils = nullptr, const TimeFrameGPUParameters* pars = nullptr);
40-
void initDevice(IndexTableUtils*, const TrackingParameters& trkParam, const TimeFrameGPUParameters&, const int, const int);
40+
void initialise(const int, const TrackingParameters&, const int, IndexTableUtilsN* utils = nullptr, const TimeFrameGPUParameters* pars = nullptr);
41+
void initDevice(IndexTableUtilsN*, const TrackingParameters& trkParam, const TimeFrameGPUParameters&, const int, const int);
4142
void initDeviceSAFitting();
4243
void loadIndexTableUtils(const int);
4344
void loadTrackingFrameInfoDevice(const int, const int);
@@ -98,7 +99,7 @@ class TimeFrameGPU : public TimeFrame<nLayers>
9899

99100
/// interface
100101
int getNClustersInRofSpan(const int, const int, const int) const;
101-
IndexTableUtils* getDeviceIndexTableUtils() { return mIndexTableUtilsDevice; }
102+
IndexTableUtilsN* getDeviceIndexTableUtils() { return mIndexTableUtilsDevice; }
102103
int* getDeviceROFramesClusters(const int layer) { return mROFramesClustersDevice[layer]; }
103104
auto& getTrackITSExt() { return mTrackITSExt; }
104105
Vertex* getDeviceVertices() { return mPrimaryVerticesDevice; }
@@ -165,7 +166,7 @@ class TimeFrameGPU : public TimeFrame<nLayers>
165166
std::array<int, nLayers - 3> mNNeighbours;
166167

167168
// Device pointers
168-
IndexTableUtils* mIndexTableUtilsDevice;
169+
IndexTableUtilsN* mIndexTableUtilsDevice;
169170

170171
// Hybrid pref
171172
uint8_t* mMultMaskDevice;

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackerTraitsGPU.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ namespace o2::its
2222
template <int nLayers = 7>
2323
class TrackerTraitsGPU final : public TrackerTraits<nLayers>
2424
{
25+
using typename TrackerTraits<nLayers>::IndexTableUtilsN;
26+
2527
public:
2628
TrackerTraitsGPU() = default;
2729
~TrackerTraitsGPU() final = default;
@@ -48,7 +50,7 @@ class TrackerTraitsGPU final : public TrackerTraits<nLayers>
4850
int getTFNumberOfCells() const override;
4951

5052
private:
51-
IndexTableUtils* mDeviceIndexTableUtils;
53+
IndexTableUtilsN* mDeviceIndexTableUtils;
5254
gpu::TimeFrameGPU<nLayers>* mTimeFrameGPU;
5355
};
5456

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,14 @@ template <int>
2727
class CellSeed;
2828
class TrackingFrameInfo;
2929
class Tracklet;
30+
template <int>
3031
class IndexTableUtils;
3132
class Cluster;
3233
class TrackITSExt;
3334
class ExternalAllocator;
3435

3536
template <int nLayers = 7>
36-
void countTrackletsInROFsHandler(const IndexTableUtils* utils,
37+
void countTrackletsInROFsHandler(const IndexTableUtils<nLayers>* utils,
3738
const uint8_t* multMask,
3839
const int layer,
3940
const int startROF,
@@ -66,7 +67,7 @@ void countTrackletsInROFsHandler(const IndexTableUtils* utils,
6667
gpu::Streams& streams);
6768

6869
template <int nLayers = 7>
69-
void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
70+
void computeTrackletsInROFsHandler(const IndexTableUtils<nLayers>* utils,
7071
const uint8_t* multMask,
7172
const int layer,
7273
const int startROF,

Detectors/ITSMFT/ITS/tracking/GPU/cuda/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ if(CUDA_ENABLED)
2222
TimeFrameGPU.cu
2323
TracerGPU.cu
2424
TrackingKernels.cu
25-
VertexingKernels.cu
26-
VertexerTraitsGPU.cxx
25+
# VertexingKernels.cu
26+
# VertexerTraitsGPU.cxx
2727
PUBLIC_INCLUDE_DIRECTORIES ../
2828
PUBLIC_LINK_LIBRARIES O2::ITStracking
2929
O2::SimConfig

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,11 @@ void TimeFrameGPU<nLayers>::loadIndexTableUtils(const int iteration)
6161
{
6262
GPUTimer timer("loading indextable utils");
6363
if (!iteration) {
64-
GPULog("gpu-allocation: allocating IndexTableUtils buffer, for {:.2f} MB.", sizeof(IndexTableUtils) / constants::MB);
65-
allocMem(reinterpret_cast<void**>(&mIndexTableUtilsDevice), sizeof(IndexTableUtils), this->getExtAllocator());
64+
GPULog("gpu-allocation: allocating IndexTableUtils buffer, for {:.2f} MB.", sizeof(IndexTableUtilsN) / constants::MB);
65+
allocMem(reinterpret_cast<void**>(&mIndexTableUtilsDevice), sizeof(IndexTableUtilsN), this->getExtAllocator());
6666
}
67-
GPULog("gpu-transfer: loading IndexTableUtils object, for {:.2f} MB.", sizeof(IndexTableUtils) / constants::MB);
68-
GPUChkErrS(cudaMemcpy(mIndexTableUtilsDevice, &(this->mIndexTableUtils), sizeof(IndexTableUtils), cudaMemcpyHostToDevice));
67+
GPULog("gpu-transfer: loading IndexTableUtils object, for {:.2f} MB.", sizeof(IndexTableUtilsN) / constants::MB);
68+
GPUChkErrS(cudaMemcpy(mIndexTableUtilsDevice, &(this->mIndexTableUtils), sizeof(IndexTableUtilsN), cudaMemcpyHostToDevice));
6969
}
7070

7171
template <int nLayers>
@@ -547,7 +547,7 @@ template <int nLayers>
547547
void TimeFrameGPU<nLayers>::initialise(const int iteration,
548548
const TrackingParameters& trkParam,
549549
const int maxLayers,
550-
IndexTableUtils* utils,
550+
IndexTableUtilsN* utils,
551551
const TimeFrameGPUParameters* gpuParam)
552552
{
553553
mGpuStreams.resize(nLayers);

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,9 @@ GPUdii() int4 getEmptyBinsRect()
9595
return int4{0, 0, 0, 0};
9696
}
9797

98+
template <int nLayers>
9899
GPUdii() const int4 getBinsRect(const Cluster& currentCluster, const int layerIndex,
99-
const o2::its::IndexTableUtils& utils,
100+
const IndexTableUtils<nLayers>& utils,
100101
const float z1, const float z2, float maxdeltaz, float maxdeltaphi)
101102
{
102103
const float zRangeMin = o2::gpu::CAMath::Min(z1, z2) - maxdeltaz;
@@ -331,7 +332,7 @@ GPUg() void fitTrackSeedsKernel(
331332
temporaryTrack.resetCovariance();
332333
temporaryTrack.setChi2(0);
333334
auto& clusters = seed.getClusters();
334-
for (int iL{0}; iL < 7; ++iL) {
335+
for (int iL{0}; iL < nLayers; ++iL) {
335336
temporaryTrack.setExternalClusterIndex(iL, clusters[iL], clusters[iL] != constants::UnusedIndex);
336337
}
337338
bool fitSuccess = fitTrack(temporaryTrack, // TrackITSExt& track,
@@ -523,9 +524,9 @@ GPUg() void computeLayerCellsKernel(
523524
}
524525
}
525526

526-
template <bool initRun>
527+
template <bool initRun, int nLayers>
527528
GPUg() void computeLayerTrackletsMultiROFKernel(
528-
const IndexTableUtils* utils,
529+
const IndexTableUtils<nLayers>* utils,
529530
const uint8_t* multMask,
530531
const int layerIndex,
531532
const int startROF,
@@ -601,7 +602,7 @@ GPUg() void computeLayerTrackletsMultiROFKernel(
601602
const float zAtRmax{tanLambda * (maxR - currentCluster.radius) + currentCluster.zCoordinate};
602603
const float sqInverseDeltaZ0{1.f / (math_utils::Sq(currentCluster.zCoordinate - primaryVertex.getZ()) + constants::Tolerance)}; /// protecting from overflows adding the detector resolution
603604
const float sigmaZ{o2::gpu::CAMath::Sqrt(math_utils::Sq(resolution) * math_utils::Sq(tanLambda) * ((math_utils::Sq(inverseR0) + sqInverseDeltaZ0) * math_utils::Sq(meanDeltaR) + 1.f) + math_utils::Sq(meanDeltaR * MSAngle))};
604-
const int4 selectedBinsRect{getBinsRect(currentCluster, layerIndex + 1, *utils, zAtRmin, zAtRmax, sigmaZ * NSigmaCut, phiCut)};
605+
const int4 selectedBinsRect{getBinsRect<nLayers>(currentCluster, layerIndex + 1, *utils, zAtRmin, zAtRmax, sigmaZ * NSigmaCut, phiCut)};
605606
if (selectedBinsRect.x == 0 && selectedBinsRect.y == 0 && selectedBinsRect.z == 0 && selectedBinsRect.w == 0) {
606607
continue;
607608
}
@@ -769,7 +770,7 @@ GPUhi() void deallocateMemory(void* p, size_t bytes, cudaStream_t stream = nullp
769770
} // namespace gpu
770771

771772
template <int nLayers>
772-
void countTrackletsInROFsHandler(const IndexTableUtils* utils,
773+
void countTrackletsInROFsHandler(const IndexTableUtils<nLayers>* utils,
773774
const uint8_t* multMask,
774775
const int layer,
775776
const int startROF,
@@ -833,7 +834,7 @@ void countTrackletsInROFsHandler(const IndexTableUtils* utils,
833834
}
834835

835836
template <int nLayers>
836-
void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
837+
void computeTrackletsInROFsHandler(const IndexTableUtils<nLayers>* utils,
837838
const uint8_t* multMask,
838839
const int layer,
839840
const int startROF,
@@ -1241,7 +1242,7 @@ void trackSeedHandler(CellSeed<nLayers>* trackSeeds,
12411242
}
12421243

12431244
/// Explicit instantiation of ITS2 handlers
1244-
template void countTrackletsInROFsHandler<7>(const IndexTableUtils* utils,
1245+
template void countTrackletsInROFsHandler<7>(const IndexTableUtils<7>* utils,
12451246
const uint8_t* multMask,
12461247
const int layer,
12471248
const int startROF,
@@ -1273,7 +1274,7 @@ template void countTrackletsInROFsHandler<7>(const IndexTableUtils* utils,
12731274
const int nThreads,
12741275
gpu::Streams& streams);
12751276

1276-
template void computeTrackletsInROFsHandler<7>(const IndexTableUtils* utils,
1277+
template void computeTrackletsInROFsHandler<7>(const IndexTableUtils<7>* utils,
12771278
const uint8_t* multMask,
12781279
const int layer,
12791280
const int startROF,

0 commit comments

Comments
 (0)