Skip to content

Commit 4d0047c

Browse files
committed
GPU TPC: Fix deterministic mode in combination of propagation of MC labels
1 parent 7eb731a commit 4d0047c

File tree

2 files changed

+72
-21
lines changed

2 files changed

+72
-21
lines changed

GPU/GPUTracking/Global/GPUChainTracking.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,7 @@ class GPUChainTracking : public GPUChain
306306
void RunTPCClusterFilter(o2::tpc::ClusterNativeAccess* clusters, std::function<o2::tpc::ClusterNative*(size_t)> allocator, bool applyClusterCuts);
307307
bool NeedTPCClustersOnGPU();
308308
void WriteReducedClusters();
309+
void SortClusters(bool buildNativeGPU, bool propagateMCLabels, o2::tpc::ClusterNativeAccess* clusterAccess, o2::tpc::ClusterNative* clusters);
309310
template <int32_t I>
310311
int32_t RunTRDTrackingInternal();
311312
uint32_t StreamForSector(uint32_t sector) const;

GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx

Lines changed: 71 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@
5757
#include "utils/VcShim.h"
5858
#include "utils/strtag.h"
5959
#include <fstream>
60+
#include <numeric>
61+
#include <vector>
6062

6163
using namespace o2::gpu;
6264
using namespace o2::tpc;
@@ -762,14 +764,13 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
762764
ClusterNative* tmpNativeClusters = nullptr;
763765
std::unique_ptr<ClusterNative[]> tmpNativeClusterBuffer;
764766

765-
// setup MC Labels
766-
bool propagateMCLabels = GetProcessingSettings().runMC && processors()->ioPtrs.tpcPackedDigits && processors()->ioPtrs.tpcPackedDigits->tpcDigitsMC;
767+
const bool buildNativeGPU = doGPU && NeedTPCClustersOnGPU();
768+
const bool buildNativeHost = (mRec->GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCClusters) || GetProcessingSettings().deterministicGPUReconstruction; // TODO: Should do this also when clusters are needed for later steps on the host but not requested as output
769+
const bool propagateMCLabels = buildNativeHost && GetProcessingSettings().runMC && processors()->ioPtrs.tpcPackedDigits && processors()->ioPtrs.tpcPackedDigits->tpcDigitsMC;
770+
const bool sortClusters = buildNativeHost && (GetProcessingSettings().deterministicGPUReconstruction || GetProcessingSettings().debugLevel >= 4);
767771

768772
auto* digitsMC = propagateMCLabels ? processors()->ioPtrs.tpcPackedDigits->tpcDigitsMC : nullptr;
769773

770-
bool buildNativeGPU = doGPU && NeedTPCClustersOnGPU();
771-
bool buildNativeHost = (mRec->GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCClusters) || GetProcessingSettings().deterministicGPUReconstruction; // TODO: Should do this also when clusters are needed for later steps on the host but not requested as output
772-
773774
mInputsHost->mNClusterNative = mInputsShadow->mNClusterNative = mRec->MemoryScalers()->nTPCHits * tpcHitLowOccupancyScalingFactor;
774775
if (buildNativeGPU) {
775776
AllocateRegisteredMemory(mInputsHost->mResourceClusterNativeBuffer);
@@ -1281,21 +1282,20 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
12811282
}
12821283

12831284
ClusterNativeAccess::ConstMCLabelContainerView* mcLabelsConstView = nullptr;
1284-
if (propagateMCLabels) {
1285-
// TODO: write to buffer directly
1285+
if (propagateMCLabels) { // TODO: write to buffer directly
12861286
o2::dataformats::MCTruthContainer<o2::MCCompLabel> mcLabels;
12871287
std::pair<ConstMCLabelContainer*, ConstMCLabelContainerView*> buffer;
1288-
if (!GetProcessingSettings().tpcWriteClustersAfterRejection && mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clusterLabels)] && mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clusterLabels)]->useExternal()) {
1289-
if (!mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clusterLabels)]->allocator) {
1288+
auto& labelOutputControl = mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clusterLabels)];
1289+
if (!GetProcessingSettings().tpcWriteClustersAfterRejection && !sortClusters && labelOutputControl && labelOutputControl->useExternal()) {
1290+
if (!labelOutputControl->allocator) {
12901291
throw std::runtime_error("Cluster MC Label buffer missing");
12911292
}
1292-
ClusterNativeAccess::ConstMCLabelContainerViewWithBuffer* container = reinterpret_cast<ClusterNativeAccess::ConstMCLabelContainerViewWithBuffer*>(mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clusterLabels)]->allocator(0));
1293+
ClusterNativeAccess::ConstMCLabelContainerViewWithBuffer* container = reinterpret_cast<ClusterNativeAccess::ConstMCLabelContainerViewWithBuffer*>(labelOutputControl->allocator(0));
12931294
buffer = {&container->first, &container->second};
12941295
} else {
12951296
mIOMem.clusterNativeMCView = std::make_unique<ConstMCLabelContainerView>();
12961297
mIOMem.clusterNativeMCBuffer = std::make_unique<ConstMCLabelContainer>();
1297-
buffer.first = mIOMem.clusterNativeMCBuffer.get();
1298-
buffer.second = mIOMem.clusterNativeMCView.get();
1298+
buffer = {mIOMem.clusterNativeMCBuffer.get(), mIOMem.clusterNativeMCView.get()};
12991299
}
13001300

13011301
assert(propagateMCLabels ? mcLinearLabels.header.size() == nClsTotal : true);
@@ -1350,15 +1350,8 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
13501350
if (doGPU && synchronizeCalibUpdate) {
13511351
SynchronizeStream(0);
13521352
}
1353-
if (buildNativeHost && (GetProcessingSettings().deterministicGPUReconstruction || GetProcessingSettings().debugLevel >= 4)) {
1354-
for (uint32_t i = 0; i < NSECTORS; i++) {
1355-
for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) {
1356-
std::sort(&tmpNativeClusters[tmpNativeAccess->clusterOffset[i][j]], &tmpNativeClusters[tmpNativeAccess->clusterOffset[i][j] + tmpNativeAccess->nClusters[i][j]]);
1357-
}
1358-
}
1359-
if (buildNativeGPU) {
1360-
GPUMemCpy(RecoStep::TPCClusterFinding, (void*)mInputsShadow->mPclusterNativeBuffer, (const void*)tmpNativeClusters, nClsTotal * sizeof(tmpNativeClusters[0]), -1, true);
1361-
}
1353+
if (sortClusters) {
1354+
SortClusters(buildNativeGPU, propagateMCLabels, tmpNativeAccess, tmpNativeClusters);
13621355
}
13631356
mRec->MemoryScalers()->nTPCHits = nClsTotal;
13641357
mRec->PopNonPersistentMemory(RecoStep::TPCClusterFinding, qStr2Tag("TPCCLUST"));
@@ -1374,3 +1367,60 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
13741367
#endif
13751368
return 0;
13761369
}
1370+
1371+
void GPUChainTracking::SortClusters(bool buildNativeGPU, bool propagateMCLabels, ClusterNativeAccess* clusterAccess, ClusterNative* clusters)
1372+
{
1373+
if (propagateMCLabels) {
1374+
std::vector<uint32_t> clsOrder(clusterAccess->nClustersTotal);
1375+
std::iota(clsOrder.begin(), clsOrder.end(), 0);
1376+
std::vector<ClusterNative> tmpClusters;
1377+
for (uint32_t i = 0; i < NSECTORS; i++) {
1378+
for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) {
1379+
const uint32_t offset = clusterAccess->clusterOffset[i][j];
1380+
std::sort(&clsOrder[offset], &clsOrder[offset + clusterAccess->nClusters[i][j]], [&clusters](const uint32_t a, const uint32_t b) {
1381+
return clusters[a] < clusters[b];
1382+
});
1383+
tmpClusters.resize(clusterAccess->nClusters[i][j]);
1384+
memcpy(tmpClusters.data(), &clusters[offset], clusterAccess->nClusters[i][j] * sizeof(tmpClusters[0]));
1385+
for (uint32_t k = 0; k < tmpClusters.size(); k++) {
1386+
clusters[offset + k] = tmpClusters[clsOrder[offset + k] - offset];
1387+
}
1388+
}
1389+
}
1390+
tmpClusters.clear();
1391+
1392+
std::pair<o2::dataformats::ConstMCLabelContainer*, o2::dataformats::ConstMCLabelContainerView*> labelBuffer;
1393+
GPUOutputControl* labelOutput = mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clusterLabels)];
1394+
std::unique_ptr<ConstMCLabelContainerView> tmpUniqueContainerView;
1395+
std::unique_ptr<ConstMCLabelContainer> tmpUniqueContainerBuffer;
1396+
if (labelOutput && labelOutput->allocator) {
1397+
ClusterNativeAccess::ConstMCLabelContainerViewWithBuffer* labelContainer = reinterpret_cast<ClusterNativeAccess::ConstMCLabelContainerViewWithBuffer*>(labelOutput->allocator(0));
1398+
labelBuffer = {&labelContainer->first, &labelContainer->second};
1399+
} else {
1400+
tmpUniqueContainerView = std::move(mIOMem.clusterNativeMCView);
1401+
tmpUniqueContainerBuffer = std::move(mIOMem.clusterNativeMCBuffer);
1402+
mIOMem.clusterNativeMCView = std::make_unique<ConstMCLabelContainerView>();
1403+
mIOMem.clusterNativeMCBuffer = std::make_unique<ConstMCLabelContainer>();
1404+
labelBuffer = {mIOMem.clusterNativeMCBuffer.get(), mIOMem.clusterNativeMCView.get()};
1405+
}
1406+
1407+
o2::dataformats::MCLabelContainer tmpContainer;
1408+
for (uint32_t i = 0; i < clusterAccess->nClustersTotal; i++) {
1409+
for (const auto& element : clusterAccess->clustersMCTruth->getLabels(clsOrder[i])) {
1410+
tmpContainer.addElement(i, element);
1411+
}
1412+
}
1413+
tmpContainer.flatten_to(*labelBuffer.first);
1414+
*labelBuffer.second = *labelBuffer.first;
1415+
clusterAccess->clustersMCTruth = labelBuffer.second;
1416+
} else {
1417+
for (uint32_t i = 0; i < NSECTORS; i++) {
1418+
for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) {
1419+
std::sort(&clusters[clusterAccess->clusterOffset[i][j]], &clusters[clusterAccess->clusterOffset[i][j] + clusterAccess->nClusters[i][j]]);
1420+
}
1421+
}
1422+
}
1423+
if (buildNativeGPU) {
1424+
GPUMemCpy(RecoStep::TPCClusterFinding, (void*)mInputsShadow->mPclusterNativeBuffer, (const void*)clusters, clusterAccess->nClustersTotal * sizeof(clusters[0]), -1, true);
1425+
}
1426+
}

0 commit comments

Comments
 (0)