Skip to content

Commit ea4a3b8

Browse files
authored
Merge 49ab21f into sapling-pr-archive-ktf
2 parents 05161ce + 49ab21f commit ea4a3b8

27 files changed

+327
-518
lines changed

Detectors/CTP/reconstruction/src/RawDataDecoder.cxx

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -615,8 +615,9 @@ int RawDataDecoder::checkReadoutConsistentncy(o2::pmr::vector<CTPDigit>& digits,
615615
continue;
616616
}
617617
mClassCountersA[i]++;
618-
if (cls->descriptor == nullptr)
618+
if (cls->descriptor == nullptr) {
619619
continue;
620+
}
620621
uint64_t clsinpmask = cls->descriptor->getInputsMask();
621622
uint64_t diginpmask = digit.CTPInputMask.to_ullong();
622623
if (!((clsinpmask & diginpmask) == clsinpmask)) {
@@ -632,8 +633,9 @@ int RawDataDecoder::checkReadoutConsistentncy(o2::pmr::vector<CTPDigit>& digits,
632633
// if inps => class mask
633634
for (auto const& cls : mCTPConfig.getCTPClasses()) {
634635
// cls.printStream(std::cout);
635-
if (cls.descriptor == nullptr)
636+
if (cls.descriptor == nullptr) {
636637
continue;
638+
}
637639
uint64_t clsinpmask = cls.descriptor->getInputsMask(); // class definition
638640
uint64_t diginpmask = digit.CTPInputMask.to_ullong();
639641
uint64_t digclsmask = digit.CTPClassMask.to_ullong();

Detectors/CTP/workflow/src/RawDecoderSpec.cxx

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,17 +71,19 @@ void RawDecoderSpec::endOfStream(framework::EndOfStreamContext& ec)
7171
std::cout << std::endl;
7272
LOG(info) << " Lost due to the shift:" << mDecoder.getLostDueToShift();
7373
LOG(info) << "Number of missing TF:" << nmiss << std::endl;
74-
if (mDecoder.getErrorIR() || mDecoder.getErrorTCR())
74+
if (mDecoder.getErrorIR() || mDecoder.getErrorTCR()) {
7575
LOG(error) << "# of IR errors:" << mDecoder.getErrorIR() << " TCR errors:" << mDecoder.getErrorTCR() << std::endl;
76+
}
7677
std::array<uint64_t, o2::ctp::CTP_NCLASSES> clsA = mDecoder.getClassCountersA();
7778
std::array<uint64_t, o2::ctp::CTP_NCLASSES> clsB = mDecoder.getClassCountersB();
7879
std::array<uint64_t, o2::ctp::CTP_NCLASSES> clsEA = mDecoder.getClassErrorsA();
7980
std::array<uint64_t, o2::ctp::CTP_NCLASSES> clsEB = mDecoder.getClassErrorsB();
8081

8182
for (int i = 0; i < o2::ctp::CTP_NCLASSES; i++) {
8283
bool print = clsA[i] > 0 || clsB[i] > 0 || clsEA[i] > 0 || clsEB[i] > 0;
83-
if (clsEA[i])
84+
if (clsEA[i]) {
8485
LOG(error) << " Class without inputs:";
86+
}
8587
LOG(important) << "CLASS:" << i << " Cls=>Inp:" << clsA[i] << " Inp=>Cls:" << clsB[i] << " ErrorsCls=>Inps:" << clsEA[i] << " MissingInps=>Cls:" << clsEB[i];
8688
}
8789
}

GPU/Common/GPUCommonDef.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,10 @@
7272
#define GPUCA_RTC_SPECIAL_CODE(...)
7373
#endif
7474

75+
#ifndef GPUCA_RTC_CONSTEXPR
76+
#define GPUCA_RTC_CONSTEXPR
77+
#endif
78+
7579
#ifndef GPUCA_DETERMINISTIC_CODE
7680
#ifdef GPUCA_DETERMINISTIC_MODE
7781
#define GPUCA_DETERMINISTIC_CODE(det, indet) det // In deterministic mode, take deterministic code path

GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,11 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile)
3838
{
3939
std::string rtcparam = std::string("#define GPUCA_RTC_CODE\n") +
4040
std::string(GetProcessingSettings().rtc.optSpecialCode ? "#define GPUCA_RTC_SPECIAL_CODE(...) __VA_ARGS__\n" : "#define GPUCA_RTC_SPECIAL_CODE(...)\n") +
41+
#ifndef GPUCA_HIP_WORKAROUND_CONSTEXPR // TODO: Fixme, once we have C++ P2280R4 in Clang
42+
std::string(GetProcessingSettings().rtc.optConstexpr ? "#define GPUCA_RTC_CONSTEXPR constexpr\n" : "#define GPUCA_RTC_CONSTEXPR\n") +
43+
#else
44+
std::string("#define GPUCA_RTC_CONSTEXPR\n") +
45+
#endif
4146
GPUParamRTC::generateRTCCode(param(), GetProcessingSettings().rtc.optConstexpr);
4247
if (filename == "") {
4348
filename = "/tmp/o2cagpu_rtc_";

GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc

Lines changed: 2 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -14,88 +14,6 @@
1414

1515
#if defined(GPUCA_SPECIALIZE_THRUST_SORTS) && !defined(GPUCA_GPUCODE_COMPILEKERNELS)
1616

17-
namespace o2::gpu::internal
18-
{
19-
namespace // anonymous
20-
{
21-
struct MergeBorderTracks_compMax {
22-
GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b)
23-
{
24-
return GPUCA_DETERMINISTIC_CODE((a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId), a.fMax < b.fMax);
25-
}
26-
};
27-
struct MergeBorderTracks_compMin {
28-
GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b)
29-
{
30-
return GPUCA_DETERMINISTIC_CODE((a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId), a.fMin < b.fMin);
31-
}
32-
};
33-
34-
struct GPUTPCGMMergerSortTracks_comp {
35-
const GPUTPCGMMergedTrack* const mCmp;
36-
GPUhd() GPUTPCGMMergerSortTracks_comp(GPUTPCGMMergedTrack* cmp) : mCmp(cmp) {}
37-
GPUd() bool operator()(const int32_t aa, const int32_t bb)
38-
{
39-
const GPUTPCGMMergedTrack& GPUrestrict() a = mCmp[aa];
40-
const GPUTPCGMMergedTrack& GPUrestrict() b = mCmp[bb];
41-
if (a.CCE() != b.CCE()) {
42-
return a.CCE() > b.CCE();
43-
}
44-
if (a.Legs() != b.Legs()) {
45-
return a.Legs() > b.Legs();
46-
}
47-
GPUCA_DETERMINISTIC_CODE( // clang-format off
48-
if (a.NClusters() != b.NClusters()) {
49-
return a.NClusters() > b.NClusters();
50-
} if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) {
51-
return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt());
52-
} if (a.GetParam().GetY() != b.GetParam().GetY()) {
53-
return a.GetParam().GetY() > b.GetParam().GetY();
54-
}
55-
return aa > bb;
56-
, // !GPUCA_DETERMINISTIC_CODE
57-
return a.NClusters() > b.NClusters();
58-
) // clang-format on
59-
}
60-
};
61-
62-
struct GPUTPCGMMergerSortTracksQPt_comp {
63-
const GPUTPCGMMergedTrack* const mCmp;
64-
GPUhd() GPUTPCGMMergerSortTracksQPt_comp(GPUTPCGMMergedTrack* cmp) : mCmp(cmp) {}
65-
GPUd() bool operator()(const int32_t aa, const int32_t bb)
66-
{
67-
const GPUTPCGMMergedTrack& GPUrestrict() a = mCmp[aa];
68-
const GPUTPCGMMergedTrack& GPUrestrict() b = mCmp[bb];
69-
GPUCA_DETERMINISTIC_CODE( // clang-format off
70-
if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) {
71-
return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt());
72-
} if (a.GetParam().GetY() != b.GetParam().GetY()) {
73-
return a.GetParam().GetY() > b.GetParam().GetY();
74-
}
75-
return a.GetParam().GetZ() > b.GetParam().GetZ();
76-
, // !GPUCA_DETERMINISTIC_CODE
77-
return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt());
78-
) // clang-format on
79-
}
80-
};
81-
82-
struct GPUTPCGMMergerMergeLoopers_comp {
83-
GPUd() bool operator()(const MergeLooperParam& a, const MergeLooperParam& b)
84-
{
85-
return CAMath::Abs(a.refz) < CAMath::Abs(b.refz);
86-
}
87-
};
88-
89-
struct GPUTPCGMO2OutputSort_comp {
90-
GPUd() bool operator()(const GPUTPCGMMerger::tmpSort& a, const GPUTPCGMMerger::tmpSort& b)
91-
{
92-
return (a.y > b.y);
93-
}
94-
};
95-
96-
} // anonymous namespace
97-
} // namespace o2::gpu::internal
98-
9917
template <>
10018
inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed<GPUTPCGMMergerMergeBorders, 3>(const krnlSetupTime& _xyz, GPUTPCGMBorderRange* const& range, int32_t const& N, int32_t const& cmpMax)
10119
{
@@ -109,13 +27,13 @@ inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed
10927
template <>
11028
inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed<GPUTPCGMMergerSortTracks, 0>(const krnlSetupTime& _xyz)
11129
{
112-
GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackOrderProcess(), processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.OutputTracks()));
30+
GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackOrderProcess(), processors()->tpcMerger.NMergedTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.MergedTracks()));
11331
}
11432

11533
template <>
11634
inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed<GPUTPCGMMergerSortTracksQPt, 0>(const krnlSetupTime& _xyz)
11735
{
118-
GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackSort(), processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracksQPt_comp(mProcessorsShadow->tpcMerger.OutputTracks()));
36+
GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackSort(), processors()->tpcMerger.NMergedTracks(), GPUTPCGMMergerSortTracksQPt_comp(mProcessorsShadow->tpcMerger.MergedTracks()));
11937
}
12038

12139
template <>

GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
// Keep some preprocessor calls unprocessed
1919
#define GPUCA_RTC_SPECIAL_CODE(...) GPUCA_RTC_SPECIAL_CODE(__VA_ARGS__)
2020
#define GPUCA_DETERMINISTIC_CODE(...) GPUCA_DETERMINISTIC_CODE(__VA_ARGS__)
21+
#define GPUCA_RTC_CONSTEXPR GPUCA_RTC_CONSTEXPR
2122

2223
// GPUReconstructionCUDAIncludesSystem.h prependended by CMakewithout preprocessor running
2324
#include "GPUReconstructionCUDADef.h"

GPU/GPUTracking/Base/hip/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,3 +270,8 @@ add_dependencies(GPUTrackingHIPExternalProvider O2::GPUTracking) # must not depe
270270
if(NOT DEFINED GPUCA_HIP_HIPIFY_FROM_CUDA OR "${GPUCA_HIP_HIPIFY_FROM_CUDA}")
271271
add_dependencies(GPUTrackingHIPExternalProvider ${MODULE}_HIPIFIED)
272272
endif()
273+
274+
set_source_files_properties("${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPGenRTC.cxx"
275+
TARGET_DIRECTORY O2::GPUTrackingHIP
276+
PROPERTIES
277+
COMPILE_DEFINITIONS "GPUCA_HIP_WORKAROUND_CONSTEXPR")

GPU/GPUTracking/CMakeLists.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,12 @@ set(MODULE GPUTracking)
1616

1717
if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH})
1818
set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_NO_FAST_MATH_FLAGS}")
19+
elseif(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG")
1920
if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_OPTO2})
2021
set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O2")
22+
else()
23+
set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O3 -ffast-math")
2124
endif()
22-
elseif(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG")
23-
set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O3 -ffast-math")
2425
endif()
2526
set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_DENORMALS_FLAGS}")
2627

GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ void GPUTPCCompression::SetMaxData(const GPUTrackingInOutPointers& io)
125125
mMaxClusterFactorBase1024 = mMaxClusters > 100000000 ? mRec->MemoryScalers()->NTPCUnattachedHitsBase1024(mRec->GetParam().rec.tpc.rejectionStrategy) : 1024;
126126
mMaxClustersInCache = mMaxClusters * mMaxClusterFactorBase1024 / 1024;
127127
mMaxTrackClusters = mRec->GetConstantMem().tpcMerger.NOutputTrackClusters(); // TODO: Why is this not using ioPtrs? Could remove GPUConstantMem.h include
128-
mMaxTracks = mRec->GetConstantMem().tpcMerger.NOutputTracks();
128+
mMaxTracks = mRec->GetConstantMem().tpcMerger.NMergedTracks();
129129
if (mMaxClusters % 16) {
130130
mMaxClusters += 16 - (mMaxClusters % 16);
131131
}

GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ GPUdii() void GPUTPCCompressionKernels::Thread<GPUTPCCompressionKernels::step1un
201201
const uint32_t iSector = iSectorRow / GPUCA_ROW_COUNT;
202202
const uint32_t iRow = iSectorRow % GPUCA_ROW_COUNT;
203203
const uint32_t idOffset = clusters->clusterOffset[iSector][iRow];
204-
const uint32_t idOffsetOut = clusters->clusterOffset[iSector][iRow] * compressor.mMaxClusterFactorBase1024 / 1024;
204+
const uint32_t idOffsetOut = clusters->clusterOffset[iSector][iRow] * compressor.mMaxClusterFactorBase1024 / 1024; // 32 bit enough for number of clusters per row * 1024
205205
const uint32_t idOffsetOutMax = ((const uint32_t*)clusters->clusterOffset[iSector])[iRow + 1] * compressor.mMaxClusterFactorBase1024 / 1024; // Array out of bounds access is ok, since it goes to the correct nClustersTotal
206206
if (iThread == nThreads - 1) {
207207
smem.nCount = 0;
@@ -214,7 +214,7 @@ GPUdii() void GPUTPCCompressionKernels::Thread<GPUTPCCompressionKernels::step1un
214214
const uint32_t nn = CAMath::nextMultipleOf<GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionKernels_step1unattached)>(clusters->nClusters[iSector][iRow]);
215215
for (uint32_t i = iThread; i < nn + nThreads; i += nThreads) {
216216
const int32_t idx = idOffset + i;
217-
int32_t cidx = 0;
217+
int32_t storeCluster = 0;
218218
do {
219219
if (i >= clusters->nClusters[iSector][iRow]) {
220220
break;
@@ -239,13 +239,13 @@ GPUdii() void GPUTPCCompressionKernels::Thread<GPUTPCCompressionKernels::step1un
239239
break;
240240
}
241241
}
242-
cidx = 1;
242+
storeCluster = 1;
243243
} while (false);
244244

245245
GPUbarrier();
246-
int32_t myIndex = work_group_scan_inclusive_add(cidx);
246+
int32_t myIndex = work_group_scan_inclusive_add(storeCluster);
247247
int32_t storeLater = -1;
248-
if (cidx) {
248+
if (storeCluster) {
249249
if (smem.nCount + myIndex <= GPUCA_TPC_COMP_CHUNK_SIZE) {
250250
sortBuffer[smem.nCount + myIndex - 1] = i;
251251
} else {

0 commit comments

Comments
 (0)