5757#include " utils/VcShim.h"
5858#include " utils/strtag.h"
5959#include < fstream>
60+ #include < numeric>
61+ #include < vector>
6062
6163using namespace o2 ::gpu;
6264using namespace o2 ::tpc;
@@ -762,14 +764,13 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
762764 ClusterNative* tmpNativeClusters = nullptr ;
763765 std::unique_ptr<ClusterNative[]> tmpNativeClusterBuffer;
764766
765- // setup MC Labels
766- bool propagateMCLabels = GetProcessingSettings ().runMC && processors ()->ioPtrs .tpcPackedDigits && processors ()->ioPtrs .tpcPackedDigits ->tpcDigitsMC ;
767+ const bool buildNativeGPU = doGPU && NeedTPCClustersOnGPU ();
768+ const bool buildNativeHost = (mRec ->GetRecoStepsOutputs () & GPUDataTypes::InOutType::TPCClusters) || GetProcessingSettings ().deterministicGPUReconstruction ; // TODO: Should do this also when clusters are needed for later steps on the host but not requested as output
769+ const bool propagateMCLabels = buildNativeHost && GetProcessingSettings ().runMC && processors ()->ioPtrs .tpcPackedDigits && processors ()->ioPtrs .tpcPackedDigits ->tpcDigitsMC ;
770+ const bool sortClusters = buildNativeHost && (GetProcessingSettings ().deterministicGPUReconstruction || GetProcessingSettings ().debugLevel >= 4 );
767771
768772 auto * digitsMC = propagateMCLabels ? processors ()->ioPtrs .tpcPackedDigits ->tpcDigitsMC : nullptr ;
769773
770- bool buildNativeGPU = doGPU && NeedTPCClustersOnGPU ();
771- bool buildNativeHost = (mRec ->GetRecoStepsOutputs () & GPUDataTypes::InOutType::TPCClusters) || GetProcessingSettings ().deterministicGPUReconstruction ; // TODO: Should do this also when clusters are needed for later steps on the host but not requested as output
772-
773774 mInputsHost ->mNClusterNative = mInputsShadow ->mNClusterNative = mRec ->MemoryScalers ()->nTPCHits * tpcHitLowOccupancyScalingFactor;
774775 if (buildNativeGPU) {
775776 AllocateRegisteredMemory (mInputsHost ->mResourceClusterNativeBuffer );
@@ -1281,21 +1282,20 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
12811282 }
12821283
12831284 ClusterNativeAccess::ConstMCLabelContainerView* mcLabelsConstView = nullptr ;
1284- if (propagateMCLabels) {
1285- // TODO: write to buffer directly
1285+ if (propagateMCLabels) { // TODO: write to buffer directly
12861286 o2::dataformats::MCTruthContainer<o2::MCCompLabel> mcLabels;
12871287 std::pair<ConstMCLabelContainer*, ConstMCLabelContainerView*> buffer;
1288- if (!GetProcessingSettings ().tpcWriteClustersAfterRejection && mSubOutputControls [GPUTrackingOutputs::getIndex (&GPUTrackingOutputs::clusterLabels)] && mSubOutputControls [GPUTrackingOutputs::getIndex (&GPUTrackingOutputs::clusterLabels)]->useExternal ()) {
1289- if (!mSubOutputControls [GPUTrackingOutputs::getIndex (&GPUTrackingOutputs::clusterLabels)]->allocator ) {
1288+ auto & labelOutputControl = mSubOutputControls [GPUTrackingOutputs::getIndex (&GPUTrackingOutputs::clusterLabels)];
1289+ if (!GetProcessingSettings ().tpcWriteClustersAfterRejection && !sortClusters && labelOutputControl && labelOutputControl->useExternal ()) {
1290+ if (!labelOutputControl->allocator ) {
12901291 throw std::runtime_error (" Cluster MC Label buffer missing" );
12911292 }
1292- ClusterNativeAccess::ConstMCLabelContainerViewWithBuffer* container = reinterpret_cast <ClusterNativeAccess::ConstMCLabelContainerViewWithBuffer*>(mSubOutputControls [ GPUTrackingOutputs::getIndex (&GPUTrackingOutputs::clusterLabels)] ->allocator (0 ));
1293+ ClusterNativeAccess::ConstMCLabelContainerViewWithBuffer* container = reinterpret_cast <ClusterNativeAccess::ConstMCLabelContainerViewWithBuffer*>(labelOutputControl ->allocator (0 ));
12931294 buffer = {&container->first , &container->second };
12941295 } else {
12951296 mIOMem .clusterNativeMCView = std::make_unique<ConstMCLabelContainerView>();
12961297 mIOMem .clusterNativeMCBuffer = std::make_unique<ConstMCLabelContainer>();
1297- buffer.first = mIOMem .clusterNativeMCBuffer .get ();
1298- buffer.second = mIOMem .clusterNativeMCView .get ();
1298+ buffer = {mIOMem .clusterNativeMCBuffer .get (), mIOMem .clusterNativeMCView .get ()};
12991299 }
13001300
13011301 assert (propagateMCLabels ? mcLinearLabels.header .size () == nClsTotal : true );
@@ -1350,15 +1350,8 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
13501350 if (doGPU && synchronizeCalibUpdate) {
13511351 SynchronizeStream (0 );
13521352 }
1353- if (buildNativeHost && (GetProcessingSettings ().deterministicGPUReconstruction || GetProcessingSettings ().debugLevel >= 4 )) {
1354- for (uint32_t i = 0 ; i < NSECTORS; i++) {
1355- for (uint32_t j = 0 ; j < GPUCA_ROW_COUNT; j++) {
1356- std::sort (&tmpNativeClusters[tmpNativeAccess->clusterOffset [i][j]], &tmpNativeClusters[tmpNativeAccess->clusterOffset [i][j] + tmpNativeAccess->nClusters [i][j]]);
1357- }
1358- }
1359- if (buildNativeGPU) {
1360- GPUMemCpy (RecoStep::TPCClusterFinding, (void *)mInputsShadow ->mPclusterNativeBuffer , (const void *)tmpNativeClusters, nClsTotal * sizeof (tmpNativeClusters[0 ]), -1 , true );
1361- }
1353+ if (sortClusters) {
1354+ SortClusters (buildNativeGPU, propagateMCLabels, tmpNativeAccess, tmpNativeClusters);
13621355 }
13631356 mRec ->MemoryScalers ()->nTPCHits = nClsTotal;
13641357 mRec ->PopNonPersistentMemory (RecoStep::TPCClusterFinding, qStr2Tag (" TPCCLUST" ));
@@ -1374,3 +1367,60 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
13741367#endif
13751368 return 0 ;
13761369}
1370+
1371+ void GPUChainTracking::SortClusters (bool buildNativeGPU, bool propagateMCLabels, ClusterNativeAccess* clusterAccess, ClusterNative* clusters)
1372+ {
1373+ if (propagateMCLabels) {
1374+ std::vector<uint32_t > clsOrder (clusterAccess->nClustersTotal );
1375+ std::iota (clsOrder.begin (), clsOrder.end (), 0 );
1376+ std::vector<ClusterNative> tmpClusters;
1377+ for (uint32_t i = 0 ; i < NSECTORS; i++) {
1378+ for (uint32_t j = 0 ; j < GPUCA_ROW_COUNT; j++) {
1379+ const uint32_t offset = clusterAccess->clusterOffset [i][j];
1380+ std::sort (&clsOrder[offset], &clsOrder[offset + clusterAccess->nClusters [i][j]], [&clusters](const uint32_t a, const uint32_t b) {
1381+ return clusters[a] < clusters[b];
1382+ });
1383+ tmpClusters.resize (clusterAccess->nClusters [i][j]);
1384+ memcpy (tmpClusters.data (), &clusters[offset], clusterAccess->nClusters [i][j] * sizeof (tmpClusters[0 ]));
1385+ for (uint32_t k = 0 ; k < tmpClusters.size (); k++) {
1386+ clusters[offset + k] = tmpClusters[clsOrder[offset + k] - offset];
1387+ }
1388+ }
1389+ }
1390+ tmpClusters.clear ();
1391+
1392+ std::pair<o2::dataformats::ConstMCLabelContainer*, o2::dataformats::ConstMCLabelContainerView*> labelBuffer;
1393+ GPUOutputControl* labelOutput = mSubOutputControls [GPUTrackingOutputs::getIndex (&GPUTrackingOutputs::clusterLabels)];
1394+ std::unique_ptr<ConstMCLabelContainerView> tmpUniqueContainerView;
1395+ std::unique_ptr<ConstMCLabelContainer> tmpUniqueContainerBuffer;
1396+ if (labelOutput && labelOutput->allocator ) {
1397+ ClusterNativeAccess::ConstMCLabelContainerViewWithBuffer* labelContainer = reinterpret_cast <ClusterNativeAccess::ConstMCLabelContainerViewWithBuffer*>(labelOutput->allocator (0 ));
1398+ labelBuffer = {&labelContainer->first , &labelContainer->second };
1399+ } else {
1400+ tmpUniqueContainerView = std::move (mIOMem .clusterNativeMCView );
1401+ tmpUniqueContainerBuffer = std::move (mIOMem .clusterNativeMCBuffer );
1402+ mIOMem .clusterNativeMCView = std::make_unique<ConstMCLabelContainerView>();
1403+ mIOMem .clusterNativeMCBuffer = std::make_unique<ConstMCLabelContainer>();
1404+ labelBuffer = {mIOMem .clusterNativeMCBuffer .get (), mIOMem .clusterNativeMCView .get ()};
1405+ }
1406+
1407+ o2::dataformats::MCLabelContainer tmpContainer;
1408+ for (uint32_t i = 0 ; i < clusterAccess->nClustersTotal ; i++) {
1409+ for (const auto & element : clusterAccess->clustersMCTruth ->getLabels (clsOrder[i])) {
1410+ tmpContainer.addElement (i, element);
1411+ }
1412+ }
1413+ tmpContainer.flatten_to (*labelBuffer.first );
1414+ *labelBuffer.second = *labelBuffer.first ;
1415+ clusterAccess->clustersMCTruth = labelBuffer.second ;
1416+ } else {
1417+ for (uint32_t i = 0 ; i < NSECTORS; i++) {
1418+ for (uint32_t j = 0 ; j < GPUCA_ROW_COUNT; j++) {
1419+ std::sort (&clusters[clusterAccess->clusterOffset [i][j]], &clusters[clusterAccess->clusterOffset [i][j] + clusterAccess->nClusters [i][j]]);
1420+ }
1421+ }
1422+ }
1423+ if (buildNativeGPU) {
1424+ GPUMemCpy (RecoStep::TPCClusterFinding, (void *)mInputsShadow ->mPclusterNativeBuffer , (const void *)clusters, clusterAccess->nClustersTotal * sizeof (clusters[0 ]), -1 , true );
1425+ }
1426+ }
0 commit comments