Skip to content

Commit 8722184

Browse files
committed
GPU: Fix copying of cluster data to GPU when not all processing steps are running on GPU
1 parent 5269f06 commit 8722184

File tree

3 files changed

+18
-14
lines changed

3 files changed

+18
-14
lines changed

GPU/GPUTracking/Global/GPUChainTracking.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,7 @@ class GPUChainTracking : public GPUChain, GPUReconstructionHelpers::helperDelega
313313
void RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSlice, int8_t mergeMode, GPUReconstruction::krnlDeviceType deviceType);
314314
void RunTPCTrackingMerger_Resolve(int8_t useOrigTrackParam, int8_t mergeAll, GPUReconstruction::krnlDeviceType deviceType);
315315
void RunTPCClusterFilter(o2::tpc::ClusterNativeAccess* clusters, std::function<o2::tpc::ClusterNative*(size_t)> allocator, bool applyClusterCuts);
316+
bool NeedTPCClustersOnGPU();
316317

317318
std::atomic_flag mLockAtomicOutputBuffer = ATOMIC_FLAG_INIT;
318319
std::mutex mMutexUpdateCalib;

GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -629,7 +629,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
629629

630630
auto* digitsMC = propagateMCLabels ? processors()->ioPtrs.tpcPackedDigits->tpcDigitsMC : nullptr;
631631

632-
bool buildNativeGPU = (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCConversion) || (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCSliceTracking) || (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCMerging) || (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression);
632+
bool buildNativeGPU = doGPU && NeedTPCClustersOnGPU();
633633
bool buildNativeHost = (mRec->GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCClusters) || GetProcessingSettings().deterministicGPUReconstruction; // TODO: Should do this also when clusters are needed for later steps on the host but not requested as output
634634

635635
mInputsHost->mNClusterNative = mInputsShadow->mNClusterNative = mRec->MemoryScalers()->nTPCHits * tpcHitLowOccupancyScalingFactor;

GPU/GPUTracking/Global/GPUChainTrackingTransformation.cxx

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,11 @@
3232
using namespace GPUCA_NAMESPACE::gpu;
3333
using namespace o2::tpc;
3434

35+
bool GPUChainTracking::NeedTPCClustersOnGPU()
36+
{
37+
return (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCConversion) || (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCSliceTracking) || (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCMerging) || (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression);
38+
}
39+
3540
int32_t GPUChainTracking::ConvertNativeToClusterData()
3641
{
3742
#ifdef GPUCA_HAVE_O2HEADERS
@@ -42,19 +47,17 @@ int32_t GPUChainTracking::ConvertNativeToClusterData()
4247
GPUTPCConvert& convertShadow = doGPU ? processorsShadow()->tpcConverter : convert;
4348

4449
bool transferClusters = false;
45-
if (doGPU) {
46-
if (!(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding)) {
47-
mInputsHost->mNClusterNative = mInputsShadow->mNClusterNative = mIOPtrs.clustersNative->nClustersTotal;
48-
AllocateRegisteredMemory(mInputsHost->mResourceClusterNativeBuffer);
49-
processorsShadow()->ioPtrs.clustersNative = mInputsShadow->mPclusterNativeAccess;
50-
WriteToConstantMemory(RecoStep::TPCConversion, (char*)&processors()->ioPtrs - (char*)processors(), &processorsShadow()->ioPtrs, sizeof(processorsShadow()->ioPtrs), 0);
51-
*mInputsHost->mPclusterNativeAccess = *mIOPtrs.clustersNative;
52-
mInputsHost->mPclusterNativeAccess->clustersLinear = mInputsShadow->mPclusterNativeBuffer;
53-
mInputsHost->mPclusterNativeAccess->setOffsetPtrs();
54-
GPUMemCpy(RecoStep::TPCConversion, mInputsShadow->mPclusterNativeBuffer, mIOPtrs.clustersNative->clustersLinear, sizeof(mIOPtrs.clustersNative->clustersLinear[0]) * mIOPtrs.clustersNative->nClustersTotal, 0, true);
55-
TransferMemoryResourceLinkToGPU(RecoStep::TPCConversion, mInputsHost->mResourceClusterNativeAccess, 0);
56-
transferClusters = true;
57-
}
50+
if (mRec->IsGPU() && !(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding) && NeedTPCClustersOnGPU()) {
51+
mInputsHost->mNClusterNative = mInputsShadow->mNClusterNative = mIOPtrs.clustersNative->nClustersTotal;
52+
AllocateRegisteredMemory(mInputsHost->mResourceClusterNativeBuffer);
53+
processorsShadow()->ioPtrs.clustersNative = mInputsShadow->mPclusterNativeAccess;
54+
WriteToConstantMemory(RecoStep::TPCConversion, (char*)&processors()->ioPtrs - (char*)processors(), &processorsShadow()->ioPtrs, sizeof(processorsShadow()->ioPtrs), 0);
55+
*mInputsHost->mPclusterNativeAccess = *mIOPtrs.clustersNative;
56+
mInputsHost->mPclusterNativeAccess->clustersLinear = mInputsShadow->mPclusterNativeBuffer;
57+
mInputsHost->mPclusterNativeAccess->setOffsetPtrs();
58+
GPUMemCpy(RecoStep::TPCConversion, mInputsShadow->mPclusterNativeBuffer, mIOPtrs.clustersNative->clustersLinear, sizeof(mIOPtrs.clustersNative->clustersLinear[0]) * mIOPtrs.clustersNative->nClustersTotal, 0, true);
59+
TransferMemoryResourceLinkToGPU(RecoStep::TPCConversion, mInputsHost->mResourceClusterNativeAccess, 0);
60+
transferClusters = true;
5861
}
5962
if (!param().par.earlyTpcTransform) {
6063
if (GetProcessingSettings().debugLevel >= 3) {

0 commit comments

Comments
 (0)