Skip to content

Commit 0d4226a

Browse files
authored
Merge cb1651e into sapling-pr-archive-ktf
2 parents 164bef8 + cb1651e commit 0d4226a

File tree

10 files changed

+315
-181
lines changed

10 files changed

+315
-181
lines changed

Framework/Core/src/ComputingQuotaEvaluator.cxx

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,7 @@ void ComputingQuotaEvaluator::updateOffers(std::vector<ComputingQuotaOffer>& pen
247247
{
248248
O2_SIGNPOST_ID_GENERATE(oid, quota);
249249
O2_SIGNPOST_START(quota, oid, "updateOffers", "Starting to processe received offers");
250+
int lastValid = -1;
250251
for (size_t oi = 0; oi < mOffers.size(); oi++) {
251252
auto& storeOffer = mOffers[oi];
252253
auto& info = mInfos[oi];
@@ -256,6 +257,9 @@ void ComputingQuotaEvaluator::updateOffers(std::vector<ComputingQuotaOffer>& pen
256257
}
257258
if (storeOffer.valid == true) {
258259
O2_SIGNPOST_EVENT_EMIT(quota, oid, "updateOffers", "Skipping update of offer %zu because it's still valid", oi);
260+
// In general we want to fill an invalid offer. If we do not find any
261+
// we add to the last valid offer we found.
262+
lastValid = oi;
259263
continue;
260264
}
261265
info.received = now;
@@ -266,7 +270,20 @@ void ComputingQuotaEvaluator::updateOffers(std::vector<ComputingQuotaOffer>& pen
266270
storeOffer.valid = true;
267271
pending.pop_back();
268272
}
269-
O2_SIGNPOST_END_WITH_ERROR(quota, oid, "updateOffers", "Some of the pending offers were not treated");
273+
if (lastValid == -1) {
274+
O2_SIGNPOST_END_WITH_ERROR(quota, oid, "updateOffers", "ComputingQuotaOffer losts. This should never happen.");
275+
return;
276+
}
277+
auto& lastValidOffer = mOffers[lastValid];
278+
for (auto& stillPending : pending) {
279+
lastValidOffer.cpu += stillPending.cpu;
280+
lastValidOffer.memory += stillPending.memory;
281+
lastValidOffer.sharedMemory += stillPending.sharedMemory;
282+
lastValidOffer.timeslices += stillPending.timeslices;
283+
lastValidOffer.runtime = std::max(lastValidOffer.runtime, stillPending.runtime);
284+
}
285+
pending.clear();
286+
O2_SIGNPOST_END(quota, oid, "updateOffers", "Remaining offers cohalesced to %d", lastValid);
270287
}
271288

272289
void ComputingQuotaEvaluator::handleExpired(std::function<void(ComputingQuotaOffer const&, ComputingQuotaStats const& stats)> expirator)

GPU/GPUTracking/DataCompression/GPUTPCClusterRejection.h

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,19 @@
1616
#define GPUTPCCLUSTERREJECTION_H
1717

1818
#include "GPUTPCGMMergerTypes.h"
19+
#include "GPUCommonMath.h"
1920

2021
namespace o2::gpu
2122
{
2223
struct GPUTPCClusterRejection {
24+
template <class T, class S>
25+
GPUdi() static bool IsTrackRejected(const T& trk, const S& param)
26+
{
27+
return CAMath::Abs(trk.GetParam().GetQPt() * param.qptB5Scaler) > param.rec.tpc.rejectQPtB5 || trk.MergedLooper();
28+
}
29+
2330
template <bool C, class T = void, class S = void>
24-
static constexpr inline bool GetProtectionStatus(int32_t attach, bool& physics, bool& protect, T* counts = nullptr, S* mev200 = nullptr)
31+
GPUdi() static constexpr bool GetRejectionStatus(int32_t attach, bool& physics, T* counts = nullptr, S* mev200 = nullptr)
2532
{
2633
(void)counts; // FIXME: Avoid incorrect -Wunused-but-set-parameter warning
2734
(void)mev200;
@@ -39,7 +46,6 @@ struct GPUTPCClusterRejection {
3946
}
4047
retVal = true;
4148
} else if (attach & gputpcgmmergertypes::attachTube) {
42-
protect = true;
4349
if constexpr (C) {
4450
if (*mev200) {
4551
counts->nTube200++;
@@ -49,7 +55,6 @@ struct GPUTPCClusterRejection {
4955
}
5056
retVal = false;
5157
} else if ((attach & gputpcgmmergertypes::attachGood) == 0) {
52-
protect = true;
5358
if constexpr (C) {
5459
counts->nRejected++;
5560
}
@@ -60,16 +65,15 @@ struct GPUTPCClusterRejection {
6065
}
6166

6267
if (attach & gputpcgmmergertypes::attachProtect) {
63-
protect = true;
6468
retVal = false;
6569
}
6670
return retVal;
6771
}
6872

69-
static constexpr inline bool GetIsRejected(int32_t attach)
73+
GPUdi() static constexpr bool GetIsRejected(int32_t attach)
7074
{
71-
bool physics = false, protect = false;
72-
return GetProtectionStatus<false>(attach, physics, protect);
75+
bool physics = false;
76+
return GetRejectionStatus<false>(attach, physics);
7377
}
7478
};
7579
} // namespace o2::gpu

GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ void GPUTPCCompression::RegisterMemoryAllocation()
111111
if (gatherMode == 3) {
112112
mMemoryResOutputGPU = mRec->RegisterMemoryAllocation(this, &GPUTPCCompression::SetPointersOutputGPU, GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_GPU | GPUMemoryResource::MEMORY_CUSTOM | GPUMemoryResource::MEMORY_STACK, "TPCCompressionOutputGPU");
113113
}
114-
uint32_t stackScratch = (gatherMode != 3) ? GPUMemoryResource::MEMORY_STACK : 0;
114+
uint32_t stackScratch = (gatherMode != 3) ? GPUMemoryResource::MEMORY_STACK : 0; // TODO: Can we use stacked memory also with gather mode 3?
115115
if (gatherMode < 2) {
116116
mRec->RegisterMemoryAllocation(this, &GPUTPCCompression::SetPointersOutput, GPUMemoryResource::MEMORY_OUTPUT | stackScratch, "TPCCompressionOutput");
117117
}

GPU/GPUTracking/DataCompression/GPUTPCCompression.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ class GPUTPCCompression : public GPUProcessor
6060
#ifndef GPUCA_GPUCODE
6161
void DumpCompressedClusters(std::ostream& out);
6262
#endif
63+
GPUd() bool rejectCluster(int32_t idx, const GPUParam& param, const GPUTrackingInOutPointers& ioPtrs) const;
6364

6465
protected:
6566
struct memory {
@@ -89,7 +90,6 @@ class GPUTPCCompression : public GPUProcessor
8990
void SetPointersCompressedClusters(void*& mem, T& c, uint32_t nClA, uint32_t nTr, uint32_t nClU, bool reducedClA);
9091
template <class T>
9192
GPUd() static void truncateSignificantBits(T& val, uint32_t nBits, uint32_t max);
92-
GPUd() bool rejectCluster(int32_t idx, GPUParam& param, const GPUTrackingInOutPointers& ioPtrs);
9393

9494
int16_t mMemoryResOutputHost = -1;
9595
int16_t mMemoryResOutputGPU = -1;

GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ GPUdii() void GPUTPCCompressionKernels::Thread<GPUTPCCompressionKernels::step0at
3939
if (!trk.OK()) {
4040
continue;
4141
}
42-
bool rejectTrk = CAMath::Abs(trk.GetParam().GetQPt() * processors.param.qptB5Scaler) > processors.param.rec.tpc.rejectQPtB5 || trk.MergedLooper();
42+
bool rejectTrk = GPUTPCClusterRejection::IsTrackRejected(trk, param);
4343
uint32_t nClustersStored = 0;
4444
CompressedClustersPtrs& GPUrestrict() c = compressor.mPtrs;
4545
uint8_t lastRow = 0, lastSector = 0;
@@ -185,7 +185,7 @@ GPUd() bool GPUTPCCompressionKernels::GPUTPCCompressionKernels_Compare<4>::opera
185185
return mClsPtr[a].qTot < mClsPtr[b].qTot;
186186
}
187187

188-
GPUd() bool GPUTPCCompression::rejectCluster(int32_t idx, GPUParam& GPUrestrict() param, const GPUTrackingInOutPointers& GPUrestrict() ioPtrs)
188+
GPUd() bool GPUTPCCompression::rejectCluster(int32_t idx, const GPUParam& GPUrestrict() param, const GPUTrackingInOutPointers& GPUrestrict() ioPtrs) const
189189
{
190190
if (mClusterStatus[idx]) {
191191
return true;
@@ -206,7 +206,7 @@ GPUd() bool GPUTPCCompression::rejectCluster(int32_t idx, GPUParam& GPUrestrict(
206206
}
207207
int32_t id = attach & gputpcgmmergertypes::attachTrackMask;
208208
auto& trk = ioPtrs.mergedTracks[id];
209-
if (CAMath::Abs(trk.GetParam().GetQPt() * param.qptB5Scaler) > param.rec.tpc.rejectQPtB5 || trk.MergedLooper()) {
209+
if (GPUTPCClusterRejection::IsTrackRejected(trk, param)) {
210210
return true;
211211
}
212212
}

GPU/GPUTracking/Definitions/GPUSettingsList.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,7 @@ AddOption(histMaxNClusters, uint32_t, 500000000, "", 0, "Maximum number of clust
525525
AddOption(minNClFindable, uint32_t, 70, "", 0, "Minimum number of (weighted) MC clusters for a track to count as findable")
526526
AddOption(minNClEff, uint32_t, 10, "", 0, "Minimum number of (weighted) MC clusters for a track to contribute to all-tracks efficiency histogramm")
527527
AddOption(minNClRes, uint32_t, 40, "", 0, "Minimum number of (weighted) MC clusters for a track to contribute to resolution histogram")
528+
AddOption(perfFigure, int32_t, 0, "", 0, "Show as performance figure, positive value for MC, negative value for data")
528529
AddShortcut("compare", 0, "--QAinput", "Compare QA histograms", "--qa", "--QAinputHistogramsOnly")
529530
AddHelp("help", 'h')
530531
EndConfig()
@@ -595,6 +596,7 @@ AddOption(stripDumpedEvents, bool, false, "", 0, "Remove redundant inputs (e.g.
595596
AddOption(printSettings, int32_t, 0, "", 0, "Print all settings", def(1))
596597
AddOption(testSyncAsync, bool, false, "syncAsync", 0, "Test first synchronous and then asynchronous processing")
597598
AddOption(testSync, bool, false, "sync", 0, "Test settings for synchronous phase")
599+
AddOption(testSyncAsyncQcInSync, bool, false, "syncAsyncSyncQC", 0, "Run QC in sync phase of testSyncAsync")
598600
AddOption(timeFrameTime, bool, false, "tfTime", 0, "Print some debug information about time frame processing time")
599601
AddOption(controlProfiler, bool, false, "", 0, "Issues GPU profiler stop and start commands to profile only the relevant processing part")
600602
AddOption(preloadEvents, bool, false, "", 0, "Preload events into host memory before start processing")

GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -587,7 +587,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
587587
return ForwardTPCDigits();
588588
}
589589
#ifdef GPUCA_TPC_GEOMETRY_O2
590-
[[maybe_unused]] int32_t tpcTimeBinCut = mUpdateNewCalibObjects && mNewCalibValues->newTPCTimeBinCut ? mNewCalibValues->tpcTimeBinCut : param().tpcCutTimeBin;
590+
[[maybe_unused]] int32_t tpcTimeBinCut = (mUpdateNewCalibObjects && mNewCalibValues->newTPCTimeBinCut) ? mNewCalibValues->tpcTimeBinCut : param().tpcCutTimeBin; // TODO: Implement time bin cut fultering
591591
mRec->PushNonPersistentMemory(qStr2Tag("TPCCLUST"));
592592
const auto& threadContext = GetThreadContext();
593593
const bool doGPU = GetRecoStepsGPU() & RecoStep::TPCClusterFinding;

GPU/GPUTracking/Standalone/Benchmark/standalone.cxx

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ extern GPUSettingsStandalone configStandalone;
7272
}
7373

7474
GPUReconstruction *rec, *recAsync, *recPipeline;
75+
uint32_t syncAsyncDecodedClusters = 0;
7576
GPUChainTracking *chainTracking, *chainTrackingAsync, *chainTrackingPipeline;
7677
GPUChainITS *chainITS, *chainITSAsync, *chainITSPipeline;
7778
std::string eventsDir;
@@ -430,7 +431,7 @@ int32_t SetupReconstruction()
430431
}
431432
}
432433

433-
bool runAsyncQA = procSet.runQA;
434+
bool runAsyncQA = procSet.runQA && !configStandalone.testSyncAsyncQcInSync;
434435
if (configStandalone.testSyncAsync || configStandalone.testSync) {
435436
// Set settings for synchronous
436437
if (configStandalone.rundEdx == -1) {
@@ -439,7 +440,9 @@ int32_t SetupReconstruction()
439440
recSet.useMatLUT = false;
440441
if (configStandalone.testSyncAsync) {
441442
procSet.eventDisplay = nullptr;
442-
procSet.runQA = false;
443+
if (!configStandalone.testSyncAsyncQcInSync) {
444+
procSet.runQA = false;
445+
}
443446
}
444447
}
445448
if (configStandalone.proc.rtc.optSpecialCode == -1) {
@@ -664,12 +667,12 @@ int32_t RunBenchmark(GPUReconstruction* recUse, GPUChainTracking* chainTrackingU
664667
}
665668

666669
if (tmpRetVal == 0 && configStandalone.testSyncAsync) {
667-
if (configStandalone.testSyncAsync) {
668-
printf("Running asynchronous phase\n");
669-
}
670670

671671
vecpod<char> compressedTmpMem(chainTracking->mIOPtrs.tpcCompressedClusters->totalDataSize);
672672
memcpy(compressedTmpMem.data(), (const void*)chainTracking->mIOPtrs.tpcCompressedClusters, chainTracking->mIOPtrs.tpcCompressedClusters->totalDataSize);
673+
o2::tpc::CompressedClusters tmp(*chainTracking->mIOPtrs.tpcCompressedClusters);
674+
syncAsyncDecodedClusters = tmp.nAttachedClusters + tmp.nUnattachedClusters;
675+
printf("Running asynchronous phase from %'u compressed clusters\n", syncAsyncDecodedClusters);
673676

674677
chainTrackingAsync->mIOPtrs = ioPtrs;
675678
chainTrackingAsync->mIOPtrs.tpcCompressedClusters = (o2::tpc::CompressedClustersFlat*)compressedTmpMem.data();
@@ -937,6 +940,11 @@ int32_t main(int argc, char** argv)
937940
printf("%s (Measured %s time - Extrapolated from %d clusters to %d)\n", stat, configStandalone.proc.debugLevel ? "kernel" : "wall", (int32_t)nClusters, (int32_t)nClsPerTF);
938941
}
939942
}
943+
if (configStandalone.testSyncAsync && chainTracking->mIOPtrs.clustersNative && chainTrackingAsync->mIOPtrs.clustersNative) {
944+
uint32_t rejected = chainTracking->mIOPtrs.clustersNative->nClustersTotal - syncAsyncDecodedClusters;
945+
float rejectionPercentage = (rejected) * 100.f / chainTracking->mIOPtrs.clustersNative->nClustersTotal;
946+
printf("Cluster Rejection: Sync: %'u, Compressed %'u, Async %'u, Rejected %'u (%7.2f%%)\n", chainTracking->mIOPtrs.clustersNative->nClustersTotal, syncAsyncDecodedClusters, chainTrackingAsync->mIOPtrs.clustersNative->nClustersTotal, rejected, rejectionPercentage);
947+
}
940948

941949
if (configStandalone.preloadEvents && configStandalone.proc.doublePipeline) {
942950
break;

0 commit comments

Comments
 (0)